< prev index next >

src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java

Print this page
*** 27,11 ***
  import java.nio.ByteBuffer;
  import java.nio.ByteOrder;
  import java.nio.ReadOnlyBufferException;
  import java.util.Arrays;
  import java.util.Objects;
- import java.util.function.BinaryOperator;
  import java.util.function.Function;
  import java.util.function.UnaryOperator;
  
  import jdk.internal.misc.ScopedMemoryAccess;
  import jdk.internal.misc.Unsafe;
--- 27,10 ---

*** 171,10 ***
--- 170,13 ---
                               FUnOp f);
      @ForceInline
      final
      IntVector uOpTemplate(VectorMask<Integer> m,
                                       FUnOp f) {
+         if (m == null) {
+             return uOpTemplate(f);
+         }
          int[] vec = vec();
          int[] res = new int[length()];
          boolean[] mbits = ((AbstractMask<Integer>)m).getBits();
          for (int i = 0; i < res.length; i++) {
              res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];

*** 214,10 ***
--- 216,13 ---
      @ForceInline
      final
      IntVector bOpTemplate(Vector<Integer> o,
                                       VectorMask<Integer> m,
                                       FBinOp f) {
+         if (m == null) {
+             return bOpTemplate(o, f);
+         }
          int[] res = new int[length()];
          int[] vec1 = this.vec();
          int[] vec2 = ((IntVector)o).vec();
          boolean[] mbits = ((AbstractMask<Integer>)m).getBits();
          for (int i = 0; i < res.length; i++) {

*** 263,10 ***
--- 268,13 ---
      final
      IntVector tOpTemplate(Vector<Integer> o1,
                                       Vector<Integer> o2,
                                       VectorMask<Integer> m,
                                       FTriOp f) {
+         if (m == null) {
+             return tOpTemplate(o1, o2, f);
+         }
          int[] res = new int[length()];
          int[] vec1 = this.vec();
          int[] vec2 = ((IntVector)o1).vec();
          int[] vec3 = ((IntVector)o2).vec();
          boolean[] mbits = ((AbstractMask<Integer>)m).getBits();

*** 278,11 ***
  
      // Reduction operator
  
      /*package-private*/
      abstract
!     int rOp(int v, FBinOp f);
      @ForceInline
      final
      int rOpTemplate(int v, FBinOp f) {
          int[] vec = vec();
          for (int i = 0; i < vec.length; i++) {
--- 286,26 ---
  
      // Reduction operator
  
      /*package-private*/
      abstract
!     int rOp(int v, VectorMask<Integer> m, FBinOp f);
+ 
+     @ForceInline
+     final
+     int rOpTemplate(int v, VectorMask<Integer> m, FBinOp f) {
+         if (m == null) {
+             return rOpTemplate(v, f);
+         }
+         int[] vec = vec();
+         boolean[] mbits = ((AbstractMask<Integer>)m).getBits();
+         for (int i = 0; i < vec.length; i++) {
+             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
+         }
+         return v;
+     }
+ 
      @ForceInline
      final
      int rOpTemplate(int v, FBinOp f) {
          int[] vec = vec();
          for (int i = 0; i < vec.length; i++) {

*** 547,41 ***
          if (opKind(op, VO_SPECIAL)) {
              if (op == ZOMO) {
                  return blend(broadcast(-1), compare(NE, 0));
              }
              if (op == NOT) {
!                 return broadcast(-1).lanewiseTemplate(XOR, this);
              } else if (op == NEG) {
                  // FIXME: Support this in the JIT.
!                 return broadcast(0).lanewiseTemplate(SUB, this);
              }
          }
          int opc = opCode(op);
          return VectorSupport.unaryOp(
!             opc, getClass(), int.class, length(),
!             this,
!             UN_IMPL.find(op, opc, (opc_) -> {
-               switch (opc_) {
-                 case VECTOR_OP_NEG: return v0 ->
-                         v0.uOp((i, a) -> (int) -a);
-                 case VECTOR_OP_ABS: return v0 ->
-                         v0.uOp((i, a) -> (int) Math.abs(a));
-                 default: return null;
-               }}));
      }
-     private static final
-     ImplCache<Unary,UnaryOperator<IntVector>> UN_IMPL
-         = new ImplCache<>(Unary.class, IntVector.class);
  
      /**
       * {@inheritDoc} <!--workaround-->
       */
!     @ForceInline
!     public final
      IntVector lanewise(VectorOperators.Unary op,
!                                   VectorMask<Integer> m) {
!         return blend(lanewise(op), m);
      }
  
      // Binary lanewise support
  
      /**
--- 570,65 ---
          if (opKind(op, VO_SPECIAL)) {
              if (op == ZOMO) {
                  return blend(broadcast(-1), compare(NE, 0));
              }
              if (op == NOT) {
!                 return broadcast(-1).lanewise(XOR, this);
              } else if (op == NEG) {
                  // FIXME: Support this in the JIT.
!                 return broadcast(0).lanewise(SUB, this);
              }
          }
          int opc = opCode(op);
          return VectorSupport.unaryOp(
!             opc, getClass(), null, int.class, length(),
!             this, null,
!             UN_IMPL.find(op, opc, IntVector::unaryOperations));
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       */
!     @Override
!     public abstract
      IntVector lanewise(VectorOperators.Unary op,
!                                   VectorMask<Integer> m);
!     @ForceInline
+     final
+     IntVector lanewiseTemplate(VectorOperators.Unary op,
+                                           Class<? extends VectorMask<Integer>> maskClass,
+                                           VectorMask<Integer> m) {
+         m.check(maskClass, this);
+         if (opKind(op, VO_SPECIAL)) {
+             if (op == ZOMO) {
+                 return blend(broadcast(-1), compare(NE, 0, m));
+             }
+             if (op == NOT) {
+                 return lanewise(XOR, broadcast(-1), m);
+             } else if (op == NEG) {
+                 return lanewise(NOT, m).lanewise(ADD, broadcast(1), m);
+             }
+         }
+         int opc = opCode(op);
+         return VectorSupport.unaryOp(
+             opc, getClass(), maskClass, int.class, length(),
+             this, m,
+             UN_IMPL.find(op, opc, IntVector::unaryOperations));
+     }
+ 
+     private static final
+     ImplCache<Unary, UnaryOperation<IntVector, VectorMask<Integer>>>
+         UN_IMPL = new ImplCache<>(Unary.class, IntVector.class);
+ 
+     private static UnaryOperation<IntVector, VectorMask<Integer>> unaryOperations(int opc_) {
+         switch (opc_) {
+             case VECTOR_OP_NEG: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> (int) -a);
+             case VECTOR_OP_ABS: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> (int) Math.abs(a));
+             default: return null;
+         }
      }
  
      // Binary lanewise support
  
      /**

*** 597,10 ***
--- 644,11 ---
      final
      IntVector lanewiseTemplate(VectorOperators.Binary op,
                                            Vector<Integer> v) {
          IntVector that = (IntVector) v;
          that.check(this);
+ 
          if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
              if (op == FIRST_NONZERO) {
                  // FIXME: Support this in the JIT.
                  VectorMask<Integer> thisNZ
                      = this.viewAsIntegralLanes().compare(NE, (int) 0);

*** 615,78 ***
              if (op == AND_NOT) {
                  // FIXME: Support this in the JIT.
                  that = that.lanewise(NOT);
                  op = AND;
              } else if (op == DIV) {
!                 VectorMask<Integer> eqz = that.eq((int)0);
                  if (eqz.anyTrue()) {
                      throw that.divZeroException();
                  }
              }
          }
          int opc = opCode(op);
          return VectorSupport.binaryOp(
!             opc, getClass(), int.class, length(),
!             this, that,
!             BIN_IMPL.find(op, opc, (opc_) -> {
-               switch (opc_) {
-                 case VECTOR_OP_ADD: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, b) -> (int)(a + b));
-                 case VECTOR_OP_SUB: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, b) -> (int)(a - b));
-                 case VECTOR_OP_MUL: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, b) -> (int)(a * b));
-                 case VECTOR_OP_DIV: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, b) -> (int)(a / b));
-                 case VECTOR_OP_MAX: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, b) -> (int)Math.max(a, b));
-                 case VECTOR_OP_MIN: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, b) -> (int)Math.min(a, b));
-                 case VECTOR_OP_AND: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, b) -> (int)(a & b));
-                 case VECTOR_OP_OR: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, b) -> (int)(a | b));
-                 case VECTOR_OP_XOR: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, b) -> (int)(a ^ b));
-                 case VECTOR_OP_LSHIFT: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, n) -> (int)(a << n));
-                 case VECTOR_OP_RSHIFT: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, n) -> (int)(a >> n));
-                 case VECTOR_OP_URSHIFT: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, n) -> (int)((a & LSHR_SETUP_MASK) >>> n));
-                 case VECTOR_OP_LROTATE: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, n) -> rotateLeft(a, (int)n));
-                 case VECTOR_OP_RROTATE: return (v0, v1) ->
-                         v0.bOp(v1, (i, a, n) -> rotateRight(a, (int)n));
-                 default: return null;
-                 }}));
      }
-     private static final
-     ImplCache<Binary,BinaryOperator<IntVector>> BIN_IMPL
-         = new ImplCache<>(Binary.class, IntVector.class);
  
      /**
       * {@inheritDoc} <!--workaround-->
       * @see #lanewise(VectorOperators.Binary,int,VectorMask)
       */
!     @ForceInline
!     public final
      IntVector lanewise(VectorOperators.Binary op,
                                    Vector<Integer> v,
!                                   VectorMask<Integer> m) {
          IntVector that = (IntVector) v;
!         if (op == DIV) {
!             VectorMask<Integer> eqz = that.eq((int)0);
!             if (eqz.and(m).anyTrue()) {
!                 throw that.divZeroException();
              }
-             // suppress div/0 exceptions in unset lanes
-             that = that.lanewise(NOT, eqz);
-             return blend(lanewise(DIV, that), m);
          }
!         return blend(lanewise(op, v), m);
      }
      // FIXME: Maybe all of the public final methods in this file (the
      // simple ones that just call lanewise) should be pushed down to
      // the X-VectorBits template.  They can't optimize properly at
      // this level, and must rely on inlining.  Does it work?
      // (If it works, of course keep the code here.)
--- 663,114 ---
              if (op == AND_NOT) {
                  // FIXME: Support this in the JIT.
                  that = that.lanewise(NOT);
                  op = AND;
              } else if (op == DIV) {
!                 VectorMask<Integer> eqz = that.eq((int) 0);
                  if (eqz.anyTrue()) {
                      throw that.divZeroException();
                  }
              }
          }
+ 
          int opc = opCode(op);
          return VectorSupport.binaryOp(
!             opc, getClass(), null, int.class, length(),
!             this, that, null,
!             BIN_IMPL.find(op, opc, IntVector::binaryOperations));
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       * @see #lanewise(VectorOperators.Binary,int,VectorMask)
       */
!     @Override
!     public abstract
      IntVector lanewise(VectorOperators.Binary op,
                                    Vector<Integer> v,
!                                   VectorMask<Integer> m);
+     @ForceInline
+     final
+     IntVector lanewiseTemplate(VectorOperators.Binary op,
+                                           Class<? extends VectorMask<Integer>> maskClass,
+                                           Vector<Integer> v, VectorMask<Integer> m) {
          IntVector that = (IntVector) v;
!         that.check(this);
!         m.check(maskClass, this);
! 
!         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
+             if (op == FIRST_NONZERO) {
+                 // FIXME: Support this in the JIT.
+                 VectorMask<Integer> thisNZ
+                     = this.viewAsIntegralLanes().compare(NE, (int) 0);
+                 that = that.blend((int) 0, thisNZ.cast(vspecies()));
+                 op = OR_UNCHECKED;
+             }
+             if (opKind(op, VO_SHIFT)) {
+                 // As per shift specification for Java, mask the shift count.
+                 // This allows the JIT to ignore some ISA details.
+                 that = that.lanewise(AND, SHIFT_MASK);
+             }
+             if (op == AND_NOT) {
+                 // FIXME: Support this in the JIT.
+                 that = that.lanewise(NOT);
+                 op = AND;
+             } else if (op == DIV) {
+                 VectorMask<Integer> eqz = that.eq((int)0);
+                 if (eqz.and(m).anyTrue()) {
+                     throw that.divZeroException();
+                 }
+                 // suppress div/0 exceptions in unset lanes
+                 that = that.lanewise(NOT, eqz);
              }
          }
! 
+         int opc = opCode(op);
+         return VectorSupport.binaryOp(
+             opc, getClass(), maskClass, int.class, length(),
+             this, that, m,
+             BIN_IMPL.find(op, opc, IntVector::binaryOperations));
+     }
+ 
+     private static final
+     ImplCache<Binary, BinaryOperation<IntVector, VectorMask<Integer>>>
+         BIN_IMPL = new ImplCache<>(Binary.class, IntVector.class);
+ 
+     private static BinaryOperation<IntVector, VectorMask<Integer>> binaryOperations(int opc_) {
+         switch (opc_) {
+             case VECTOR_OP_ADD: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, b) -> (int)(a + b));
+             case VECTOR_OP_SUB: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, b) -> (int)(a - b));
+             case VECTOR_OP_MUL: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, b) -> (int)(a * b));
+             case VECTOR_OP_DIV: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, b) -> (int)(a / b));
+             case VECTOR_OP_MAX: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, b) -> (int)Math.max(a, b));
+             case VECTOR_OP_MIN: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, b) -> (int)Math.min(a, b));
+             case VECTOR_OP_AND: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, b) -> (int)(a & b));
+             case VECTOR_OP_OR: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, b) -> (int)(a | b));
+             case VECTOR_OP_XOR: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, b) -> (int)(a ^ b));
+             case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, n) -> (int)(a << n));
+             case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, n) -> (int)(a >> n));
+             case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, n) -> (int)((a & LSHR_SETUP_MASK) >>> n));
+             case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
+             case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
+                     v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
+             default: return null;
+         }
      }
+ 
      // FIXME: Maybe all of the public final methods in this file (the
      // simple ones that just call lanewise) should be pushed down to
      // the X-VectorBits template.  They can't optimize properly at
      // this level, and must rely on inlining.  Does it work?
      // (If it works, of course keep the code here.)

*** 745,11 ***
      @ForceInline
      public final
      IntVector lanewise(VectorOperators.Binary op,
                                    int e,
                                    VectorMask<Integer> m) {
!         return blend(lanewise(op, e), m);
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       * @apiNote
--- 829,17 ---
      @ForceInline
      public final
      IntVector lanewise(VectorOperators.Binary op,
                                    int e,
                                    VectorMask<Integer> m) {
!         if (opKind(op, VO_SHIFT) && (int)(int)e == e) {
+             return lanewiseShift(op, (int) e, m);
+         }
+         if (op == AND_NOT) {
+             op = AND; e = (int) ~e;
+         }
+         return lanewise(op, broadcast(e), m);
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       * @apiNote

*** 765,12 ***
      IntVector lanewise(VectorOperators.Binary op,
                                    long e) {
          int e1 = (int) e;
          if ((long)e1 != e
              // allow shift ops to clip down their int parameters
!             && !(opKind(op, VO_SHIFT) && (int)e1 == e)
-             ) {
              vspecies().checkValue(e);  // for exception
          }
          return lanewise(op, e1);
      }
  
--- 855,11 ---
      IntVector lanewise(VectorOperators.Binary op,
                                    long e) {
          int e1 = (int) e;
          if ((long)e1 != e
              // allow shift ops to clip down their int parameters
!             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
              vspecies().checkValue(e);  // for exception
          }
          return lanewise(op, e1);
      }
  

*** 786,11 ***
       */
      @ForceInline
      public final
      IntVector lanewise(VectorOperators.Binary op,
                                    long e, VectorMask<Integer> m) {
!         return blend(lanewise(op, e), m);
      }
  
      /*package-private*/
      abstract IntVector
      lanewiseShift(VectorOperators.Binary op, int e);
--- 875,17 ---
       */
      @ForceInline
      public final
      IntVector lanewise(VectorOperators.Binary op,
                                    long e, VectorMask<Integer> m) {
!         int e1 = (int) e;
+         if ((long)e1 != e
+             // allow shift ops to clip down their int parameters
+             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
+             vspecies().checkValue(e);  // for exception
+         }
+         return lanewise(op, e1, m);
      }
  
      /*package-private*/
      abstract IntVector
      lanewiseShift(VectorOperators.Binary op, int e);

*** 803,31 ***
          assert(opKind(op, VO_SHIFT));
          // As per shift specification for Java, mask the shift count.
          e &= SHIFT_MASK;
          int opc = opCode(op);
          return VectorSupport.broadcastInt(
!             opc, getClass(), int.class, length(),
!             this, e,
!             BIN_INT_IMPL.find(op, opc, (opc_) -> {
-               switch (opc_) {
-                 case VECTOR_OP_LSHIFT: return (v, n) ->
-                         v.uOp((i, a) -> (int)(a << n));
-                 case VECTOR_OP_RSHIFT: return (v, n) ->
-                         v.uOp((i, a) -> (int)(a >> n));
-                 case VECTOR_OP_URSHIFT: return (v, n) ->
-                         v.uOp((i, a) -> (int)((a & LSHR_SETUP_MASK) >>> n));
-                 case VECTOR_OP_LROTATE: return (v, n) ->
-                         v.uOp((i, a) -> rotateLeft(a, (int)n));
-                 case VECTOR_OP_RROTATE: return (v, n) ->
-                         v.uOp((i, a) -> rotateRight(a, (int)n));
-                 default: return null;
-                 }}));
      }
      private static final
!     ImplCache<Binary,VectorBroadcastIntOp<IntVector>> BIN_INT_IMPL
          = new ImplCache<>(Binary.class, IntVector.class);
  
      // As per shift specification for Java, mask the shift count.
      // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
      // The latter two maskings go beyond the JLS, but seem reasonable
      // since our lane types are first-class types, not just dressed
      // up ints.
--- 898,56 ---
          assert(opKind(op, VO_SHIFT));
          // As per shift specification for Java, mask the shift count.
          e &= SHIFT_MASK;
          int opc = opCode(op);
          return VectorSupport.broadcastInt(
!             opc, getClass(), null, int.class, length(),
!             this, e, null,
!             BIN_INT_IMPL.find(op, opc, IntVector::broadcastIntOperations));
      }
+ 
+     /*package-private*/
+     abstract IntVector
+     lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Integer> m);
+ 
+     /*package-private*/
+     @ForceInline
+     final IntVector
+     lanewiseShiftTemplate(VectorOperators.Binary op,
+                           Class<? extends VectorMask<Integer>> maskClass,
+                           int e, VectorMask<Integer> m) {
+         m.check(maskClass, this);
+         assert(opKind(op, VO_SHIFT));
+         // As per shift specification for Java, mask the shift count.
+         e &= SHIFT_MASK;
+         int opc = opCode(op);
+         return VectorSupport.broadcastInt(
+             opc, getClass(), maskClass, int.class, length(),
+             this, e, m,
+             BIN_INT_IMPL.find(op, opc, IntVector::broadcastIntOperations));
+     }
+ 
      private static final
!     ImplCache<Binary,VectorBroadcastIntOp<IntVector, VectorMask<Integer>>> BIN_INT_IMPL
          = new ImplCache<>(Binary.class, IntVector.class);
  
+     private static VectorBroadcastIntOp<IntVector, VectorMask<Integer>> broadcastIntOperations(int opc_) {
+         switch (opc_) {
+             case VECTOR_OP_LSHIFT: return (v, n, m) ->
+                     v.uOp(m, (i, a) -> (int)(a << n));
+             case VECTOR_OP_RSHIFT: return (v, n, m) ->
+                     v.uOp(m, (i, a) -> (int)(a >> n));
+             case VECTOR_OP_URSHIFT: return (v, n, m) ->
+                     v.uOp(m, (i, a) -> (int)((a & LSHR_SETUP_MASK) >>> n));
+             case VECTOR_OP_LROTATE: return (v, n, m) ->
+                     v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
+             case VECTOR_OP_RROTATE: return (v, n, m) ->
+                     v.uOp(m, (i, a) -> rotateRight(a, (int)n));
+             default: return null;
+         }
+     }
+ 
      // As per shift specification for Java, mask the shift count.
      // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
      // The latter two maskings go beyond the JLS, but seem reasonable
      // since our lane types are first-class types, not just dressed
      // up ints.

*** 875,34 ***
              that = this.lanewise(XOR, that).lanewise(AND, tother);
              return this.lanewise(XOR, that);
          }
          int opc = opCode(op);
          return VectorSupport.ternaryOp(
!             opc, getClass(), int.class, length(),
!             this, that, tother,
!             TERN_IMPL.find(op, opc, (opc_) -> {
-               switch (opc_) {
-                 default: return null;
-                 }}));
      }
-     private static final
-     ImplCache<Ternary,TernaryOperation<IntVector>> TERN_IMPL
-         = new ImplCache<>(Ternary.class, IntVector.class);
  
      /**
       * {@inheritDoc} <!--workaround-->
       * @see #lanewise(VectorOperators.Ternary,int,int,VectorMask)
       * @see #lanewise(VectorOperators.Ternary,Vector,int,VectorMask)
       * @see #lanewise(VectorOperators.Ternary,int,Vector,VectorMask)
       */
!     @ForceInline
!     public final
      IntVector lanewise(VectorOperators.Ternary op,
                                    Vector<Integer> v1,
                                    Vector<Integer> v2,
!                                   VectorMask<Integer> m) {
!         return blend(lanewise(op, v1, v2), m);
      }
  
      /**
       * Combines the lane values of this vector
       * with the values of two broadcast scalars.
--- 995,63 ---
              that = this.lanewise(XOR, that).lanewise(AND, tother);
              return this.lanewise(XOR, that);
          }
          int opc = opCode(op);
          return VectorSupport.ternaryOp(
!             opc, getClass(), null, int.class, length(),
!             this, that, tother, null,
!             TERN_IMPL.find(op, opc, IntVector::ternaryOperations));
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       * @see #lanewise(VectorOperators.Ternary,int,int,VectorMask)
       * @see #lanewise(VectorOperators.Ternary,Vector,int,VectorMask)
       * @see #lanewise(VectorOperators.Ternary,int,Vector,VectorMask)
       */
!     @Override
!     public abstract
      IntVector lanewise(VectorOperators.Ternary op,
                                    Vector<Integer> v1,
                                    Vector<Integer> v2,
!                                   VectorMask<Integer> m);
!     @ForceInline
+     final
+     IntVector lanewiseTemplate(VectorOperators.Ternary op,
+                                           Class<? extends VectorMask<Integer>> maskClass,
+                                           Vector<Integer> v1,
+                                           Vector<Integer> v2,
+                                           VectorMask<Integer> m) {
+         IntVector that = (IntVector) v1;
+         IntVector tother = (IntVector) v2;
+         // It's a word: https://www.dictionary.com/browse/tother
+         // See also Chapter 11 of Dickens, Our Mutual Friend:
+         // "Totherest Governor," replied Mr Riderhood...
+         that.check(this);
+         tother.check(this);
+         m.check(maskClass, this);
+ 
+         if (op == BITWISE_BLEND) {
+             // FIXME: Support this in the JIT.
+             that = this.lanewise(XOR, that).lanewise(AND, tother);
+             return this.lanewise(XOR, that, m);
+         }
+         int opc = opCode(op);
+         return VectorSupport.ternaryOp(
+             opc, getClass(), maskClass, int.class, length(),
+             this, that, tother, m,
+             TERN_IMPL.find(op, opc, IntVector::ternaryOperations));
+     }
+ 
+     private static final
+     ImplCache<Ternary, TernaryOperation<IntVector, VectorMask<Integer>>>
+         TERN_IMPL = new ImplCache<>(Ternary.class, IntVector.class);
+ 
+     private static TernaryOperation<IntVector, VectorMask<Integer>> ternaryOperations(int opc_) {
+         switch (opc_) {
+             default: return null;
+         }
      }
  
      /**
       * Combines the lane values of this vector
       * with the values of two broadcast scalars.

*** 955,11 ***
      public final
      IntVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
                                    int e1,
                                    int e2,
                                    VectorMask<Integer> m) {
!         return blend(lanewise(op, e1, e2), m);
      }
  
      /**
       * Combines the lane values of this vector
       * with the values of another vector and a broadcast scalar.
--- 1104,11 ---
      public final
      IntVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
                                    int e1,
                                    int e2,
                                    VectorMask<Integer> m) {
!         return lanewise(op, broadcast(e1), broadcast(e2), m);
      }
  
      /**
       * Combines the lane values of this vector
       * with the values of another vector and a broadcast scalar.

*** 1013,11 ***
      public final
      IntVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
                                    Vector<Integer> v1,
                                    int e2,
                                    VectorMask<Integer> m) {
!         return blend(lanewise(op, v1, e2), m);
      }
  
      /**
       * Combines the lane values of this vector
       * with the values of another vector and a broadcast scalar.
--- 1162,11 ---
      public final
      IntVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
                                    Vector<Integer> v1,
                                    int e2,
                                    VectorMask<Integer> m) {
!         return lanewise(op, v1, broadcast(e2), m);
      }
  
      /**
       * Combines the lane values of this vector
       * with the values of another vector and a broadcast scalar.

*** 1070,11 ***
      public final
      IntVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
                                    int e1,
                                    Vector<Integer> v2,
                                    VectorMask<Integer> m) {
!         return blend(lanewise(op, e1, v2), m);
      }
  
      // (Thus endeth the Great and Mighty Ternary Ogdoad.)
      // https://en.wikipedia.org/wiki/Ogdoad
  
--- 1219,11 ---
      public final
      IntVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
                                    int e1,
                                    Vector<Integer> v2,
                                    VectorMask<Integer> m) {
!         return lanewise(op, broadcast(e1), v2, m);
      }
  
      // (Thus endeth the Great and Mighty Ternary Ogdoad.)
      // https://en.wikipedia.org/wiki/Ogdoad
  

*** 1742,28 ***
      /*package-private*/
      @ForceInline
      final
      <M extends VectorMask<Integer>>
      M compareTemplate(Class<M> maskType, Comparison op, Vector<Integer> v) {
-         Objects.requireNonNull(v);
-         IntSpecies vsp = vspecies();
          IntVector that = (IntVector) v;
          that.check(this);
          int opc = opCode(op);
          return VectorSupport.compare(
              opc, getClass(), maskType, int.class, length(),
!             this, that,
!             (cond, v0, v1) -> {
                  AbstractMask<Integer> m
                      = v0.bTest(cond, v1, (cond_, i, a, b)
                                 -> compareWithOp(cond, a, b));
                  @SuppressWarnings("unchecked")
                  M m2 = (M) m;
                  return m2;
              });
      }
  
      @ForceInline
      private static boolean compareWithOp(int cond, int a, int b) {
          return switch (cond) {
              case BT_eq -> a == b;
              case BT_ne -> a != b;
--- 1891,48 ---
      /*package-private*/
      @ForceInline
      final
      <M extends VectorMask<Integer>>
      M compareTemplate(Class<M> maskType, Comparison op, Vector<Integer> v) {
          IntVector that = (IntVector) v;
          that.check(this);
          int opc = opCode(op);
          return VectorSupport.compare(
              opc, getClass(), maskType, int.class, length(),
!             this, that, null,
!             (cond, v0, v1, m1) -> {
                  AbstractMask<Integer> m
                      = v0.bTest(cond, v1, (cond_, i, a, b)
                                 -> compareWithOp(cond, a, b));
                  @SuppressWarnings("unchecked")
                  M m2 = (M) m;
                  return m2;
              });
      }
  
+     /*package-private*/
+     @ForceInline
+     final
+     <M extends VectorMask<Integer>>
+     M compareTemplate(Class<M> maskType, Comparison op, Vector<Integer> v, M m) {
+         IntVector that = (IntVector) v;
+         that.check(this);
+         m.check(maskType, this);
+         int opc = opCode(op);
+         return VectorSupport.compare(
+             opc, getClass(), maskType, int.class, length(),
+             this, that, m,
+             (cond, v0, v1, m1) -> {
+                 AbstractMask<Integer> cmpM
+                     = v0.bTest(cond, v1, (cond_, i, a, b)
+                                -> compareWithOp(cond, a, b));
+                 @SuppressWarnings("unchecked")
+                 M m2 = (M) cmpM.and(m1);
+                 return m2;
+             });
+     }
+ 
      @ForceInline
      private static boolean compareWithOp(int cond, int a, int b) {
          return switch (cond) {
              case BT_eq -> a == b;
              case BT_ne -> a != b;

*** 1777,22 ***
              case BT_uge -> Integer.compareUnsigned(a, b) >= 0;
              default -> throw new AssertionError();
          };
      }
  
-     /**
-      * {@inheritDoc} <!--workaround-->
-      */
-     @Override
-     @ForceInline
-     public final
-     VectorMask<Integer> compare(VectorOperators.Comparison op,
-                                   Vector<Integer> v,
-                                   VectorMask<Integer> m) {
-         return compare(op, v).and(m);
-     }
- 
      /**
       * Tests this vector by comparing it with an input scalar,
       * according to the given comparison operation.
       *
       * This is a lane-wise binary test operation which applies
--- 1946,10 ---

*** 1847,11 ***
       */
      @ForceInline
      public final VectorMask<Integer> compare(VectorOperators.Comparison op,
                                                 int e,
                                                 VectorMask<Integer> m) {
!         return compare(op, e).and(m);
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       */
--- 2004,11 ---
       */
      @ForceInline
      public final VectorMask<Integer> compare(VectorOperators.Comparison op,
                                                 int e,
                                                 VectorMask<Integer> m) {
!         return compare(op, broadcast(e), m);
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       */

*** 2098,13 ***
      final
      <S extends VectorShuffle<Integer>>
      IntVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
          shuffle.checkIndexes();
          return VectorSupport.rearrangeOp(
!             getClass(), shuffletype, int.class, length(),
!             this, shuffle,
!             (v1, s_) -> v1.uOp((i, a) -> {
                  int ei = s_.laneSource(i);
                  return v1.lane(ei);
              }));
      }
  
--- 2255,13 ---
      final
      <S extends VectorShuffle<Integer>>
      IntVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
          shuffle.checkIndexes();
          return VectorSupport.rearrangeOp(
!             getClass(), shuffletype, null, int.class, length(),
!             this, shuffle, null,
!             (v1, s_, m_) -> v1.uOp((i, a) -> {
                  int ei = s_.laneSource(i);
                  return v1.lane(ei);
              }));
      }
  

*** 2117,28 ***
                                     VectorMask<Integer> m);
  
      /*package-private*/
      @ForceInline
      final
!     <S extends VectorShuffle<Integer>>
      IntVector rearrangeTemplate(Class<S> shuffletype,
                                             S shuffle,
!                                            VectorMask<Integer> m) {
!         IntVector unmasked =
!             VectorSupport.rearrangeOp(
-                 getClass(), shuffletype, int.class, length(),
-                 this, shuffle,
-                 (v1, s_) -> v1.uOp((i, a) -> {
-                     int ei = s_.laneSource(i);
-                     return ei < 0 ? 0 : v1.lane(ei);
-                 }));
          VectorMask<Integer> valid = shuffle.laneIsValid();
          if (m.andNot(valid).anyTrue()) {
              shuffle.checkIndexes();
              throw new AssertionError();
          }
!         return broadcast((int)0).blend(unmasked, m);
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       */
--- 2274,29 ---
                                     VectorMask<Integer> m);
  
      /*package-private*/
      @ForceInline
      final
!     <S extends VectorShuffle<Integer>, M extends VectorMask<Integer>>
      IntVector rearrangeTemplate(Class<S> shuffletype,
+                                            Class<M> masktype,
                                             S shuffle,
!                                            M m) {
! 
!         m.check(masktype, this);
          VectorMask<Integer> valid = shuffle.laneIsValid();
          if (m.andNot(valid).anyTrue()) {
              shuffle.checkIndexes();
              throw new AssertionError();
          }
!         return VectorSupport.rearrangeOp(
+                    getClass(), shuffletype, masktype, int.class, length(),
+                    this, shuffle, m,
+                    (v1, s_, m_) -> v1.uOp((i, a) -> {
+                         int ei = s_.laneSource(i);
+                         return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
+                    }));
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       */

*** 2157,21 ***
          VectorMask<Integer> valid = shuffle.laneIsValid();
          @SuppressWarnings("unchecked")
          S ws = (S) shuffle.wrapIndexes();
          IntVector r0 =
              VectorSupport.rearrangeOp(
!                 getClass(), shuffletype, int.class, length(),
!                 this, ws,
!                 (v0, s_) -> v0.uOp((i, a) -> {
                      int ei = s_.laneSource(i);
                      return v0.lane(ei);
                  }));
          IntVector r1 =
              VectorSupport.rearrangeOp(
!                 getClass(), shuffletype, int.class, length(),
!                 v, ws,
!                 (v1, s_) -> v1.uOp((i, a) -> {
                      int ei = s_.laneSource(i);
                      return v1.lane(ei);
                  }));
          return r1.blend(r0, valid);
      }
--- 2315,21 ---
          VectorMask<Integer> valid = shuffle.laneIsValid();
          @SuppressWarnings("unchecked")
          S ws = (S) shuffle.wrapIndexes();
          IntVector r0 =
              VectorSupport.rearrangeOp(
!                 getClass(), shuffletype, null, int.class, length(),
!                 this, ws, null,
!                 (v0, s_, m_) -> v0.uOp((i, a) -> {
                      int ei = s_.laneSource(i);
                      return v0.lane(ei);
                  }));
          IntVector r1 =
              VectorSupport.rearrangeOp(
!                 getClass(), shuffletype, null, int.class, length(),
!                 v, ws, null,
!                 (v1, s_, m_) -> v1.uOp((i, a) -> {
                      int ei = s_.laneSource(i);
                      return v1.lane(ei);
                  }));
          return r1.blend(r0, valid);
      }

*** 2430,13 ***
  
      /*package-private*/
      @ForceInline
      final
      int reduceLanesTemplate(VectorOperators.Associative op,
                                 VectorMask<Integer> m) {
!         IntVector v = reduceIdentityVector(op).blend(this, m);
!         return v.reduceLanesTemplate(op);
      }
  
      /*package-private*/
      @ForceInline
      final
--- 2588,22 ---
  
      /*package-private*/
      @ForceInline
      final
      int reduceLanesTemplate(VectorOperators.Associative op,
+                                Class<? extends VectorMask<Integer>> maskClass,
                                 VectorMask<Integer> m) {
!         m.check(maskClass, this);
!         if (op == FIRST_NONZERO) {
+             IntVector v = reduceIdentityVector(op).blend(this, m);
+             return v.reduceLanesTemplate(op);
+         }
+         int opc = opCode(op);
+         return fromBits(VectorSupport.reductionCoerced(
+             opc, getClass(), maskClass, int.class, length(),
+             this, m,
+             REDUCE_IMPL.find(op, opc, IntVector::reductionOperations)));
      }
  
      /*package-private*/
      @ForceInline
      final

*** 2447,34 ***
                  = this.viewAsIntegralLanes().compare(NE, (int) 0);
              return this.lane(thisNZ.firstTrue());
          }
          int opc = opCode(op);
          return fromBits(VectorSupport.reductionCoerced(
!             opc, getClass(), int.class, length(),
!             this,
!             REDUCE_IMPL.find(op, opc, (opc_) -> {
-               switch (opc_) {
-               case VECTOR_OP_ADD: return v ->
-                       toBits(v.rOp((int)0, (i, a, b) -> (int)(a + b)));
-               case VECTOR_OP_MUL: return v ->
-                       toBits(v.rOp((int)1, (i, a, b) -> (int)(a * b)));
-               case VECTOR_OP_MIN: return v ->
-                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (int) Math.min(a, b)));
-               case VECTOR_OP_MAX: return v ->
-                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (int) Math.max(a, b)));
-               case VECTOR_OP_AND: return v ->
-                       toBits(v.rOp((int)-1, (i, a, b) -> (int)(a & b)));
-               case VECTOR_OP_OR: return v ->
-                       toBits(v.rOp((int)0, (i, a, b) -> (int)(a | b)));
-               case VECTOR_OP_XOR: return v ->
-                       toBits(v.rOp((int)0, (i, a, b) -> (int)(a ^ b)));
-               default: return null;
-               }})));
      }
      private static final
!     ImplCache<Associative,Function<IntVector,Long>> REDUCE_IMPL
!         = new ImplCache<>(Associative.class, IntVector.class);
  
      private
      @ForceInline
      IntVector reduceIdentityVector(VectorOperators.Associative op) {
          int opc = opCode(op);
--- 2614,38 ---
                  = this.viewAsIntegralLanes().compare(NE, (int) 0);
              return this.lane(thisNZ.firstTrue());
          }
          int opc = opCode(op);
          return fromBits(VectorSupport.reductionCoerced(
!             opc, getClass(), null, int.class, length(),
!             this, null,
!             REDUCE_IMPL.find(op, opc, IntVector::reductionOperations)));
      }
+ 
      private static final
!     ImplCache<Associative, ReductionOperation<IntVector, VectorMask<Integer>>>
!         REDUCE_IMPL = new ImplCache<>(Associative.class, IntVector.class);
+ 
+     private static ReductionOperation<IntVector, VectorMask<Integer>> reductionOperations(int opc_) {
+         switch (opc_) {
+             case VECTOR_OP_ADD: return (v, m) ->
+                     toBits(v.rOp((int)0, m, (i, a, b) -> (int)(a + b)));
+             case VECTOR_OP_MUL: return (v, m) ->
+                     toBits(v.rOp((int)1, m, (i, a, b) -> (int)(a * b)));
+             case VECTOR_OP_MIN: return (v, m) ->
+                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (int) Math.min(a, b)));
+             case VECTOR_OP_MAX: return (v, m) ->
+                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (int) Math.max(a, b)));
+             case VECTOR_OP_AND: return (v, m) ->
+                     toBits(v.rOp((int)-1, m, (i, a, b) -> (int)(a & b)));
+             case VECTOR_OP_OR: return (v, m) ->
+                     toBits(v.rOp((int)0, m, (i, a, b) -> (int)(a | b)));
+             case VECTOR_OP_XOR: return (v, m) ->
+                     toBits(v.rOp((int)0, m, (i, a, b) -> (int)(a ^ b)));
+             default: return null;
+         }
+     }
  
      private
      @ForceInline
      IntVector reduceIdentityVector(VectorOperators.Associative op) {
          int opc = opCode(op);

*** 2689,13 ***
                                         byte[] a, int offset,
                                         ByteOrder bo,
                                         VectorMask<Integer> m) {
          IntSpecies vsp = (IntSpecies) species;
          if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
!             IntVector zero = vsp.zero();
-             IntVector v = zero.fromByteArray0(a, offset);
-             return zero.blend(v.maybeSwap(bo), m);
          }
  
          // FIXME: optimize
          checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
          ByteBuffer wb = wrapper(a, bo);
--- 2860,11 ---
                                         byte[] a, int offset,
                                         ByteOrder bo,
                                         VectorMask<Integer> m) {
          IntSpecies vsp = (IntSpecies) species;
          if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
!             return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
          }
  
          // FIXME: optimize
          checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
          ByteBuffer wb = wrapper(a, bo);

*** 2753,12 ***
      IntVector fromArray(VectorSpecies<Integer> species,
                                     int[] a, int offset,
                                     VectorMask<Integer> m) {
          IntSpecies vsp = (IntSpecies) species;
          if (offset >= 0 && offset <= (a.length - species.length())) {
!             IntVector zero = vsp.zero();
-             return zero.blend(zero.fromArray0(a, offset), m);
          }
  
          // FIXME: optimize
          checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
          return vsp.vOp(m, i -> a[offset + i]);
--- 2922,11 ---
      IntVector fromArray(VectorSpecies<Integer> species,
                                     int[] a, int offset,
                                     VectorMask<Integer> m) {
          IntSpecies vsp = (IntSpecies) species;
          if (offset >= 0 && offset <= (a.length - species.length())) {
!             return vsp.dummyVector().fromArray0(a, offset, m);
          }
  
          // FIXME: optimize
          checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
          return vsp.vOp(m, i -> a[offset + i]);

*** 2812,17 ***
              .add(offset);
  
          vix = VectorIntrinsics.checkIndex(vix, a.length);
  
          return VectorSupport.loadWithMap(
!             vectorType, int.class, vsp.laneCount(),
!             IntVector.species(vsp.indexShape()).vectorType(),
!             a, ARRAY_BASE, vix,
              a, offset, indexMap, mapOffset, vsp,
!             (int[] c, int idx, int[] iMap, int idy, IntSpecies s) ->
              s.vOp(n -> c[idx + iMap[idy+n]]));
!         }
  
      /**
       * Gathers a new vector composed of elements from an array of type
       * {@code int[]},
       * under the control of a mask, and
--- 2980,17 ---
              .add(offset);
  
          vix = VectorIntrinsics.checkIndex(vix, a.length);
  
          return VectorSupport.loadWithMap(
!             vectorType, null, int.class, vsp.laneCount(),
!             isp.vectorType(),
!             a, ARRAY_BASE, vix, null,
              a, offset, indexMap, mapOffset, vsp,
!             (c, idx, iMap, idy, s, vm) ->
              s.vOp(n -> c[idx + iMap[idy+n]]));
!     }
  
      /**
       * Gathers a new vector composed of elements from an array of type
       * {@code int[]},
       * under the control of a mask, and

*** 2866,13 ***
                                     VectorMask<Integer> m) {
          if (m.allTrue()) {
              return fromArray(species, a, offset, indexMap, mapOffset);
          }
          else {
-             // FIXME: Cannot vectorize yet, if there's a mask.
              IntSpecies vsp = (IntSpecies) species;
!             return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
          }
      }
  
  
  
--- 3034,12 ---
                                     VectorMask<Integer> m) {
          if (m.allTrue()) {
              return fromArray(species, a, offset, indexMap, mapOffset);
          }
          else {
              IntSpecies vsp = (IntSpecies) species;
!             return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m);
          }
      }
  
  
  

*** 2962,13 ***
                                          ByteBuffer bb, int offset,
                                          ByteOrder bo,
                                          VectorMask<Integer> m) {
          IntSpecies vsp = (IntSpecies) species;
          if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
!             IntVector zero = vsp.zero();
-             IntVector v = zero.fromByteBuffer0(bb, offset);
-             return zero.blend(v.maybeSwap(bo), m);
          }
  
          // FIXME: optimize
          checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
          ByteBuffer wb = wrapper(bb, bo);
--- 3129,11 ---
                                          ByteBuffer bb, int offset,
                                          ByteOrder bo,
                                          VectorMask<Integer> m) {
          IntSpecies vsp = (IntSpecies) species;
          if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
!             return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
          }
  
          // FIXME: optimize
          checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
          ByteBuffer wb = wrapper(bb, bo);

*** 3036,14 ***
      void intoArray(int[] a, int offset,
                     VectorMask<Integer> m) {
          if (m.allTrue()) {
              intoArray(a, offset);
          } else {
-             // FIXME: optimize
              IntSpecies vsp = vspecies();
              checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
!             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
          }
      }
  
      /**
       * Scatters this vector into an array of type {@code int[]}
--- 3201,13 ---
      void intoArray(int[] a, int offset,
                     VectorMask<Integer> m) {
          if (m.allTrue()) {
              intoArray(a, offset);
          } else {
              IntSpecies vsp = vspecies();
              checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
!             intoArray0(a, offset, m);
          }
      }
  
      /**
       * Scatters this vector into an array of type {@code int[]}

*** 3083,16 ***
              .add(offset);
  
          vix = VectorIntrinsics.checkIndex(vix, a.length);
  
          VectorSupport.storeWithMap(
!             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              isp.vectorType(),
              a, arrayAddress(a, 0), vix,
!             this,
              a, offset, indexMap, mapOffset,
!             (arr, off, v, map, mo)
              -> v.stOp(arr, off,
                        (arr_, off_, i, e) -> {
                            int j = map[mo + i];
                            arr[off + j] = e;
                        }));
--- 3247,16 ---
              .add(offset);
  
          vix = VectorIntrinsics.checkIndex(vix, a.length);
  
          VectorSupport.storeWithMap(
!             vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(),
              isp.vectorType(),
              a, arrayAddress(a, 0), vix,
!             this, null,
              a, offset, indexMap, mapOffset,
!             (arr, off, v, map, mo, vm)
              -> v.stOp(arr, off,
                        (arr_, off_, i, e) -> {
                            int j = map[mo + i];
                            arr[off + j] = e;
                        }));

*** 3135,16 ***
                     VectorMask<Integer> m) {
          if (m.allTrue()) {
              intoArray(a, offset, indexMap, mapOffset);
          }
          else {
!             // FIXME: Cannot vectorize yet, if there's a mask.
-             stOp(a, offset, m,
-                  (arr, off, i, e) -> {
-                      int j = indexMap[mapOffset + i];
-                      arr[off + j] = e;
-                  });
          }
      }
  
  
  
--- 3299,11 ---
                     VectorMask<Integer> m) {
          if (m.allTrue()) {
              intoArray(a, offset, indexMap, mapOffset);
          }
          else {
!             intoArray0(a, offset, indexMap, mapOffset, m);
          }
      }
  
  
  

*** 3170,16 ***
                         ByteOrder bo,
                         VectorMask<Integer> m) {
          if (m.allTrue()) {
              intoByteArray(a, offset, bo);
          } else {
-             // FIXME: optimize
              IntSpecies vsp = vspecies();
              checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
!             ByteBuffer wb = wrapper(a, bo);
-             this.stOp(wb, offset, m,
-                     (wb_, o, i, e) -> wb_.putInt(o + i * 4, e));
          }
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
--- 3329,13 ---
                         ByteOrder bo,
                         VectorMask<Integer> m) {
          if (m.allTrue()) {
              intoByteArray(a, offset, bo);
          } else {
              IntSpecies vsp = vspecies();
              checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
!             maybeSwap(bo).intoByteArray0(a, offset, m);
          }
      }
  
      /**
       * {@inheritDoc} <!--workaround-->

*** 3187,11 ***
      @Override
      @ForceInline
      public final
      void intoByteBuffer(ByteBuffer bb, int offset,
                          ByteOrder bo) {
!         if (bb.isReadOnly()) {
              throw new ReadOnlyBufferException();
          }
          offset = checkFromIndexSize(offset, byteSize(), bb.limit());
          maybeSwap(bo).intoByteBuffer0(bb, offset);
      }
--- 3343,11 ---
      @Override
      @ForceInline
      public final
      void intoByteBuffer(ByteBuffer bb, int offset,
                          ByteOrder bo) {
!         if (ScopedMemoryAccess.isReadOnly(bb)) {
              throw new ReadOnlyBufferException();
          }
          offset = checkFromIndexSize(offset, byteSize(), bb.limit());
          maybeSwap(bo).intoByteBuffer0(bb, offset);
      }

*** 3206,19 ***
                          ByteOrder bo,
                          VectorMask<Integer> m) {
          if (m.allTrue()) {
              intoByteBuffer(bb, offset, bo);
          } else {
-             // FIXME: optimize
              if (bb.isReadOnly()) {
                  throw new ReadOnlyBufferException();
              }
              IntSpecies vsp = vspecies();
              checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
!             ByteBuffer wb = wrapper(bb, bo);
-             this.stOp(wb, offset, m,
-                     (wb_, o, i, e) -> wb_.putInt(o + i * 4, e));
          }
      }
  
      // ================================================
  
--- 3362,16 ---
                          ByteOrder bo,
                          VectorMask<Integer> m) {
          if (m.allTrue()) {
              intoByteBuffer(bb, offset, bo);
          } else {
              if (bb.isReadOnly()) {
                  throw new ReadOnlyBufferException();
              }
              IntSpecies vsp = vspecies();
              checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
!             maybeSwap(bo).intoByteBuffer0(bb, offset, m);
          }
      }
  
      // ================================================
  

*** 3252,10 ***
--- 3405,61 ---
              a, offset, vsp,
              (arr, off, s) -> s.ldOp(arr, off,
                                      (arr_, off_, i) -> arr_[off_ + i]));
      }
  
+     /*package-private*/
+     abstract
+     IntVector fromArray0(int[] a, int offset, VectorMask<Integer> m);
+     @ForceInline
+     final
+     <M extends VectorMask<Integer>>
+     IntVector fromArray0Template(Class<M> maskClass, int[] a, int offset, M m) {
+         m.check(species());
+         IntSpecies vsp = vspecies();
+         return VectorSupport.loadMasked(
+             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+             a, arrayAddress(a, offset), m,
+             a, offset, vsp,
+             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
+                                         (arr_, off_, i) -> arr_[off_ + i]));
+     }
+ 
+     /*package-private*/
+     abstract
+     IntVector fromArray0(int[] a, int offset,
+                                     int[] indexMap, int mapOffset,
+                                     VectorMask<Integer> m);
+     @ForceInline
+     final
+     <M extends VectorMask<Integer>>
+     IntVector fromArray0Template(Class<M> maskClass, int[] a, int offset,
+                                             int[] indexMap, int mapOffset, M m) {
+         IntSpecies vsp = vspecies();
+         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
+         Objects.requireNonNull(a);
+         Objects.requireNonNull(indexMap);
+         m.check(vsp);
+         Class<? extends IntVector> vectorType = vsp.vectorType();
+ 
+         // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
+         IntVector vix = IntVector
+             .fromArray(isp, indexMap, mapOffset)
+             .add(offset);
+ 
+         // FIXME: Check index under mask controlling.
+         vix = VectorIntrinsics.checkIndex(vix, a.length);
+ 
+         return VectorSupport.loadWithMap(
+             vectorType, maskClass, int.class, vsp.laneCount(),
+             isp.vectorType(),
+             a, ARRAY_BASE, vix, m,
+             a, offset, indexMap, mapOffset, vsp,
+             (c, idx, iMap, idy, s, vm) ->
+             s.vOp(vm, n -> c[idx + iMap[idy+n]]));
+     }
+ 
  
  
      @Override
      abstract
      IntVector fromByteArray0(byte[] a, int offset);

*** 3272,10 ***
--- 3476,29 ---
                  return s.ldOp(wb, off,
                          (wb_, o, i) -> wb_.getInt(o + i * 4));
              });
      }
  
+     abstract
+     IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m);
+     @ForceInline
+     final
+     <M extends VectorMask<Integer>>
+     IntVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
+         IntSpecies vsp = vspecies();
+         m.check(vsp);
+         return VectorSupport.loadMasked(
+             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+             a, byteArrayAddress(a, offset), m,
+             a, offset, vsp,
+             (arr, off, s, vm) -> {
+                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
+                 return s.ldOp(wb, off, vm,
+                         (wb_, o, i) -> wb_.getInt(o + i * 4));
+             });
+     }
+ 
      abstract
      IntVector fromByteBuffer0(ByteBuffer bb, int offset);
      @ForceInline
      final
      IntVector fromByteBuffer0Template(ByteBuffer bb, int offset) {

*** 3288,10 ***
--- 3511,28 ---
                      return s.ldOp(wb, off,
                              (wb_, o, i) -> wb_.getInt(o + i * 4));
                  });
      }
  
+     abstract
+     IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m);
+     @ForceInline
+     final
+     <M extends VectorMask<Integer>>
+     IntVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
+         IntSpecies vsp = vspecies();
+         m.check(vsp);
+         return ScopedMemoryAccess.loadFromByteBufferMasked(
+                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+                 bb, offset, m, vsp,
+                 (buf, off, s, vm) -> {
+                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
+                     return s.ldOp(wb, off, vm,
+                             (wb_, o, i) -> wb_.getInt(o + i * 4));
+                 });
+     }
+ 
      // Unchecked storing operations in native byte order.
      // Caller is responsible for applying index checks, masking, and
      // byte swapping.
  
      abstract

*** 3307,10 ***
--- 3548,62 ---
              (arr, off, v)
              -> v.stOp(arr, off,
                        (arr_, off_, i, e) -> arr_[off_+i] = e));
      }
  
+     abstract
+     void intoArray0(int[] a, int offset, VectorMask<Integer> m);
+     @ForceInline
+     final
+     <M extends VectorMask<Integer>>
+     void intoArray0Template(Class<M> maskClass, int[] a, int offset, M m) {
+         m.check(species());
+         IntSpecies vsp = vspecies();
+         VectorSupport.storeMasked(
+             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+             a, arrayAddress(a, offset),
+             this, m, a, offset,
+             (arr, off, v, vm)
+             -> v.stOp(arr, off, vm,
+                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
+     }
+ 
+     abstract
+     void intoArray0(int[] a, int offset,
+                     int[] indexMap, int mapOffset,
+                     VectorMask<Integer> m);
+     @ForceInline
+     final
+     <M extends VectorMask<Integer>>
+     void intoArray0Template(Class<M> maskClass, int[] a, int offset,
+                             int[] indexMap, int mapOffset, M m) {
+         m.check(species());
+         IntSpecies vsp = vspecies();
+         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
+         // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
+         IntVector vix = IntVector
+             .fromArray(isp, indexMap, mapOffset)
+             .add(offset);
+ 
+         // FIXME: Check index under mask controlling.
+         vix = VectorIntrinsics.checkIndex(vix, a.length);
+ 
+         VectorSupport.storeWithMap(
+             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+             isp.vectorType(),
+             a, arrayAddress(a, 0), vix,
+             this, m,
+             a, offset, indexMap, mapOffset,
+             (arr, off, v, map, mo, vm)
+             -> v.stOp(arr, off, vm,
+                       (arr_, off_, i, e) -> {
+                           int j = map[mo + i];
+                           arr[off + j] = e;
+                       }));
+     }
+ 
+ 
      abstract
      void intoByteArray0(byte[] a, int offset);
      @ForceInline
      final
      void intoByteArray0Template(byte[] a, int offset) {

*** 3324,10 ***
--- 3617,29 ---
                  v.stOp(wb, off,
                          (tb_, o, i, e) -> tb_.putInt(o + i * 4, e));
              });
      }
  
+     abstract
+     void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m);
+     @ForceInline
+     final
+     <M extends VectorMask<Integer>>
+     void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
+         IntSpecies vsp = vspecies();
+         m.check(vsp);
+         VectorSupport.storeMasked(
+             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+             a, byteArrayAddress(a, offset),
+             this, m, a, offset,
+             (arr, off, v, vm) -> {
+                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
+                 v.stOp(wb, off, vm,
+                         (tb_, o, i, e) -> tb_.putInt(o + i * 4, e));
+             });
+     }
+ 
      @ForceInline
      final
      void intoByteBuffer0(ByteBuffer bb, int offset) {
          IntSpecies vsp = vspecies();
          ScopedMemoryAccess.storeIntoByteBuffer(

*** 3338,10 ***
--- 3650,29 ---
                      v.stOp(wb, off,
                              (wb_, o, i, e) -> wb_.putInt(o + i * 4, e));
                  });
      }
  
+     abstract
+     void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m);
+     @ForceInline
+     final
+     <M extends VectorMask<Integer>>
+     void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
+         IntSpecies vsp = vspecies();
+         m.check(vsp);
+         ScopedMemoryAccess.storeIntoByteBufferMasked(
+                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+                 this, m, bb, offset,
+                 (buf, off, v, vm) -> {
+                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
+                     v.stOp(wb, off, vm,
+                             (wb_, o, i, e) -> wb_.putInt(o + i * 4, e));
+                 });
+     }
+ 
+ 
      // End of low-level memory operations.
  
      private static
      void checkMaskFromIndexSize(int offset,
                                  IntSpecies vsp,

*** 3655,11 ***
          }
  
          /*package-private*/
          @ForceInline
          <M> IntVector ldOp(M memory, int offset,
!                                       AbstractMask<Integer> m,
                                        FLdOp<M> f) {
              return dummyVector().ldOp(memory, offset, m, f);
          }
  
          /*package-private*/
--- 3986,11 ---
          }
  
          /*package-private*/
          @ForceInline
          <M> IntVector ldOp(M memory, int offset,
!                                       VectorMask<Integer> m,
                                        FLdOp<M> f) {
              return dummyVector().ldOp(memory, offset, m, f);
          }
  
          /*package-private*/
< prev index next >