< prev index next >

src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java

Print this page
@@ -22,18 +22,18 @@
   * or visit www.oracle.com if you need additional information or have any
   * questions.
   */
  package jdk.incubator.vector;
  
- import java.nio.ByteBuffer;
  import java.nio.ByteOrder;
- import java.nio.ReadOnlyBufferException;
  import java.util.Arrays;
  import java.util.Objects;
  import java.util.function.Function;
- import java.util.function.UnaryOperator;
  
+ import jdk.incubator.foreign.MemorySegment;
+ import jdk.incubator.foreign.ValueLayout;
+ import jdk.internal.access.foreign.MemorySegmentProxy;
  import jdk.internal.misc.ScopedMemoryAccess;
  import jdk.internal.misc.Unsafe;
  import jdk.internal.vm.annotation.ForceInline;
  import jdk.internal.vm.vector.VectorSupport;
  

@@ -55,10 +55,12 @@
          super(vec);
      }
  
      static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  
+     static final ValueLayout.OfByte ELEMENT_LAYOUT = ValueLayout.JAVA_BYTE.withBitAlignment(8);
+ 
      @ForceInline
      static int opCode(Operator op) {
          return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
      }
      @ForceInline

@@ -349,10 +351,49 @@
              }
          }
          return vectorFactory(res);
      }
  
+     /*package-private*/
+     interface FLdLongOp {
+         byte apply(MemorySegment memory, long offset, int i);
+     }
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     ByteVector ldLongOp(MemorySegment memory, long offset,
+                                   FLdLongOp f) {
+         //dummy; no vec = vec();
+         byte[] res = new byte[length()];
+         for (int i = 0; i < res.length; i++) {
+             res[i] = f.apply(memory, offset, i);
+         }
+         return vectorFactory(res);
+     }
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     ByteVector ldLongOp(MemorySegment memory, long offset,
+                                   VectorMask<Byte> m,
+                                   FLdLongOp f) {
+         //byte[] vec = vec();
+         byte[] res = new byte[length()];
+         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
+         for (int i = 0; i < res.length; i++) {
+             if (mbits[i]) {
+                 res[i] = f.apply(memory, offset, i);
+             }
+         }
+         return vectorFactory(res);
+     }
+ 
+     static byte memorySegmentGet(MemorySegment ms, long o, int i) {
+         return ms.get(ELEMENT_LAYOUT, o + i * 1L);
+     }
+ 
      interface FStOp<M> {
          void apply(M memory, int offset, int i, byte a);
      }
  
      /*package-private*/

@@ -379,10 +420,44 @@
                  f.apply(memory, offset, i, vec[i]);
              }
          }
      }
  
+     interface FStLongOp {
+         void apply(MemorySegment memory, long offset, int i, byte a);
+     }
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     void stLongOp(MemorySegment memory, long offset,
+                   FStLongOp f) {
+         byte[] vec = vec();
+         for (int i = 0; i < vec.length; i++) {
+             f.apply(memory, offset, i, vec[i]);
+         }
+     }
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     void stLongOp(MemorySegment memory, long offset,
+                   VectorMask<Byte> m,
+                   FStLongOp f) {
+         byte[] vec = vec();
+         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
+         for (int i = 0; i < vec.length; i++) {
+             if (mbits[i]) {
+                 f.apply(memory, offset, i, vec[i]);
+             }
+         }
+     }
+ 
+     static void memorySegmentSet(MemorySegment ms, long o, int i, byte e) {
+         ms.set(ELEMENT_LAYOUT, o + i * 1L, e);
+     }
+ 
      // Binary test
  
      /*package-private*/
      interface FBinTest {
          boolean apply(int cond, int i, byte a, byte b);

@@ -429,10 +504,40 @@
      @ForceInline
      static byte fromBits(long bits) {
          return ((byte)bits);
      }
  
+     static ByteVector expandHelper(Vector<Byte> v, VectorMask<Byte> m) {
+         VectorSpecies<Byte> vsp = m.vectorSpecies();
+         ByteVector r  = (ByteVector) vsp.zero();
+         ByteVector vi = (ByteVector) v;
+         if (m.allTrue()) {
+             return vi;
+         }
+         for (int i = 0, j = 0; i < vsp.length(); i++) {
+             if (m.laneIsSet(i)) {
+                 r = r.withLane(i, vi.lane(j++));
+             }
+         }
+         return r;
+     }
+ 
+     static ByteVector compressHelper(Vector<Byte> v, VectorMask<Byte> m) {
+         VectorSpecies<Byte> vsp = m.vectorSpecies();
+         ByteVector r  = (ByteVector) vsp.zero();
+         ByteVector vi = (ByteVector) v;
+         if (m.allTrue()) {
+             return vi;
+         }
+         for (int i = 0, j = 0; i < vsp.length(); i++) {
+             if (m.laneIsSet(i)) {
+                 r = r.withLane(j++, vi.lane(i));
+             }
+         }
+         return r;
+     }
+ 
      // Static factories (other than memory operations)
  
      // Note: A surprising behavior in javadoc
      // sometimes makes a lone /** {@inheritDoc} */
      // comment drop the method altogether,

@@ -618,10 +723,20 @@
          switch (opc_) {
              case VECTOR_OP_NEG: return (v0, m) ->
                      v0.uOp(m, (i, a) -> (byte) -a);
              case VECTOR_OP_ABS: return (v0, m) ->
                      v0.uOp(m, (i, a) -> (byte) Math.abs(a));
+             case VECTOR_OP_BIT_COUNT: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> (byte) bitCount(a));
+             case VECTOR_OP_TZ_COUNT: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> (byte) numberOfTrailingZeros(a));
+             case VECTOR_OP_LZ_COUNT: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> (byte) numberOfLeadingZeros(a));
+             case VECTOR_OP_REVERSE: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> reverse(a));
+             case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> a);
              default: return null;
          }
      }
  
      // Binary lanewise support

@@ -1744,10 +1859,29 @@
      public final
      ByteVector abs() {
          return lanewise(ABS);
      }
  
+     static int bitCount(byte a) {
+         return Integer.bitCount((int)a & 0xFF);
+     }
+     static int numberOfTrailingZeros(byte a) {
+         return a != 0 ? Integer.numberOfTrailingZeros(a) : 8;
+     }
+     static int numberOfLeadingZeros(byte a) {
+         return a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0;
+     }
+ 
+     static byte reverse(byte a) {
+         if (a == 0 || a == -1) return a;
+ 
+         byte b = rotateLeft(a, 4);
+         b = (byte) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
+         b = (byte) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
+         return b;
+     }
+ 
      // not (~)
      /**
       * Computes the bitwise logical complement ({@code ~})
       * of this vector.
       *

@@ -2370,10 +2504,49 @@
                                       shuffleType, byte.class, length(),
                                       this, vsp,
                                       ByteVector::toShuffle0);
      }
  
+     /**
+      * {@inheritDoc} <!--workaround-->
+      * @since 19
+      */
+     @Override
+     public abstract
+     ByteVector compress(VectorMask<Byte> m);
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     <M extends AbstractMask<Byte>>
+     ByteVector compressTemplate(Class<M> masktype, M m) {
+       m.check(masktype, this);
+       return (ByteVector) VectorSupport.comExpOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
+                                                    byte.class, length(), this, m,
+                                                    (v1, m1) -> compressHelper(v1, m1));
+     }
+ 
+     /**
+      * {@inheritDoc} <!--workaround-->
+      * @since 19
+      */
+     @Override
+     public abstract
+     ByteVector expand(VectorMask<Byte> m);
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     <M extends AbstractMask<Byte>>
+     ByteVector expandTemplate(Class<M> masktype, M m) {
+       m.check(masktype, this);
+       return (ByteVector) VectorSupport.comExpOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
+                                                    byte.class, length(), this, m,
+                                                    (v1, m1) -> expandHelper(v1, m1));
+     }
+ 
+ 
      /**
       * {@inheritDoc} <!--workaround-->
       */
      @Override
      public abstract

@@ -2782,94 +2955,10 @@
              res[i] = (double) a[i];
          }
          return res;
      }
  
-     /**
-      * Loads a vector from a byte array starting at an offset.
-      * Bytes are composed into primitive lane elements according
-      * to the specified byte order.
-      * The vector is arranged into lanes according to
-      * <a href="Vector.html#lane-order">memory ordering</a>.
-      * <p>
-      * This method behaves as if it returns the result of calling
-      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
-      * fromByteBuffer()} as follows:
-      * <pre>{@code
-      * var bb = ByteBuffer.wrap(a);
-      * var m = species.maskAll(true);
-      * return fromByteBuffer(species, bb, offset, bo, m);
-      * }</pre>
-      *
-      * @param species species of desired vector
-      * @param a the byte array
-      * @param offset the offset into the array
-      * @param bo the intended byte order
-      * @return a vector loaded from a byte array
-      * @throws IndexOutOfBoundsException
-      *         if {@code offset+N*ESIZE < 0}
-      *         or {@code offset+(N+1)*ESIZE > a.length}
-      *         for any lane {@code N} in the vector
-      */
-     @ForceInline
-     public static
-     ByteVector fromByteArray(VectorSpecies<Byte> species,
-                                        byte[] a, int offset,
-                                        ByteOrder bo) {
-         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
-         ByteSpecies vsp = (ByteSpecies) species;
-         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
-     }
- 
-     /**
-      * Loads a vector from a byte array starting at an offset
-      * and using a mask.
-      * Lanes where the mask is unset are filled with the default
-      * value of {@code byte} (zero).
-      * Bytes are composed into primitive lane elements according
-      * to the specified byte order.
-      * The vector is arranged into lanes according to
-      * <a href="Vector.html#lane-order">memory ordering</a>.
-      * <p>
-      * This method behaves as if it returns the result of calling
-      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
-      * fromByteBuffer()} as follows:
-      * <pre>{@code
-      * var bb = ByteBuffer.wrap(a);
-      * return fromByteBuffer(species, bb, offset, bo, m);
-      * }</pre>
-      *
-      * @param species species of desired vector
-      * @param a the byte array
-      * @param offset the offset into the array
-      * @param bo the intended byte order
-      * @param m the mask controlling lane selection
-      * @return a vector loaded from a byte array
-      * @throws IndexOutOfBoundsException
-      *         if {@code offset+N*ESIZE < 0}
-      *         or {@code offset+(N+1)*ESIZE > a.length}
-      *         for any lane {@code N} in the vector
-      *         where the mask is set
-      */
-     @ForceInline
-     public static
-     ByteVector fromByteArray(VectorSpecies<Byte> species,
-                                        byte[] a, int offset,
-                                        ByteOrder bo,
-                                        VectorMask<Byte> m) {
-         ByteSpecies vsp = (ByteSpecies) species;
-         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
-             return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
-         }
- 
-         // FIXME: optimize
-         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
-         ByteBuffer wb = wrapper(a, bo);
-         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
-                    (wb_, o, i)  -> wb_.get(o + i * 1));
-     }
- 
      /**
       * Loads a vector from an array of type {@code byte[]}
       * starting at an offset.
       * For each vector lane, where {@code N} is the vector lane index, the
       * array element at index {@code offset + N} is placed into the

@@ -3172,99 +3261,106 @@
          ByteSpecies vsp = (ByteSpecies) species;
          return vsp.vOp(m, n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
      }
  
      /**
-      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
-      * starting at an offset into the byte buffer.
+      * Loads a vector from a {@linkplain MemorySegment memory segment}
+      * starting at an offset into the memory segment.
       * Bytes are composed into primitive lane elements according
       * to the specified byte order.
       * The vector is arranged into lanes according to
       * <a href="Vector.html#lane-order">memory ordering</a>.
       * <p>
       * This method behaves as if it returns the result of calling
-      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
-      * fromByteBuffer()} as follows:
+      * {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
+      * fromMemorySegment()} as follows:
       * <pre>{@code
       * var m = species.maskAll(true);
-      * return fromByteBuffer(species, bb, offset, bo, m);
+      * return fromMemorySegment(species, ms, offset, bo, m);
       * }</pre>
       *
       * @param species species of desired vector
-      * @param bb the byte buffer
-      * @param offset the offset into the byte buffer
+      * @param ms the memory segment
+      * @param offset the offset into the memory segment
       * @param bo the intended byte order
-      * @return a vector loaded from a byte buffer
+      * @return a vector loaded from the memory segment
       * @throws IndexOutOfBoundsException
       *         if {@code offset+N*1 < 0}
-      *         or {@code offset+N*1 >= bb.limit()}
+      *         or {@code offset+N*1 >= ms.byteSize()}
       *         for any lane {@code N} in the vector
+      * @throws IllegalArgumentException if the memory segment is a heap segment that is
+      *         not backed by a {@code byte[]} array.
+      * @throws IllegalStateException if the memory segment's session is not alive,
+      *         or if access occurs from a thread other than the thread owning the session.
+      * @since 19
       */
      @ForceInline
      public static
-     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
-                                         ByteBuffer bb, int offset,
-                                         ByteOrder bo) {
-         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
+     ByteVector fromMemorySegment(VectorSpecies<Byte> species,
+                                            MemorySegment ms, long offset,
+                                            ByteOrder bo) {
+         offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
          ByteSpecies vsp = (ByteSpecies) species;
-         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
+         return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
      }
  
      /**
-      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
-      * starting at an offset into the byte buffer
+      * Loads a vector from a {@linkplain MemorySegment memory segment}
+      * starting at an offset into the memory segment
       * and using a mask.
       * Lanes where the mask is unset are filled with the default
       * value of {@code byte} (zero).
       * Bytes are composed into primitive lane elements according
       * to the specified byte order.
       * The vector is arranged into lanes according to
       * <a href="Vector.html#lane-order">memory ordering</a>.
       * <p>
       * The following pseudocode illustrates the behavior:
       * <pre>{@code
-      * ByteBuffer eb = bb.duplicate()
-      *     .position(offset);
+      * var slice = ms.asSlice(offset);
       * byte[] ar = new byte[species.length()];
       * for (int n = 0; n < ar.length; n++) {
       *     if (m.laneIsSet(n)) {
-      *         ar[n] = eb.get(n);
+      *         ar[n] = slice.getAtIndex(ValuaLayout.JAVA_BYTE.withBitAlignment(8), n);
       *     }
       * }
       * ByteVector r = ByteVector.fromArray(species, ar, 0);
       * }</pre>
       * @implNote
       * The byte order argument is ignored.
       *
       * @param species species of desired vector
-      * @param bb the byte buffer
-      * @param offset the offset into the byte buffer
+      * @param ms the memory segment
+      * @param offset the offset into the memory segment
       * @param bo the intended byte order
       * @param m the mask controlling lane selection
-      * @return a vector loaded from a byte buffer
+      * @return a vector loaded from the memory segment
       * @throws IndexOutOfBoundsException
       *         if {@code offset+N*1 < 0}
-      *         or {@code offset+N*1 >= bb.limit()}
+      *         or {@code offset+N*1 >= ms.byteSize()}
       *         for any lane {@code N} in the vector
       *         where the mask is set
+      * @throws IllegalArgumentException if the memory segment is a heap segment that is
+      *         not backed by a {@code byte[]} array.
+      * @throws IllegalStateException if the memory segment's session is not alive,
+      *         or if access occurs from a thread other than the thread owning the session.
+      * @since 19
       */
      @ForceInline
      public static
-     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
-                                         ByteBuffer bb, int offset,
-                                         ByteOrder bo,
-                                         VectorMask<Byte> m) {
+     ByteVector fromMemorySegment(VectorSpecies<Byte> species,
+                                            MemorySegment ms, long offset,
+                                            ByteOrder bo,
+                                            VectorMask<Byte> m) {
          ByteSpecies vsp = (ByteSpecies) species;
-         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
-             return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
+         if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
+             return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
          }
  
          // FIXME: optimize
-         checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
-         ByteBuffer wb = wrapper(bb, bo);
-         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
-                    (wb_, o, i)  -> wb_.get(o + i * 1));
+         checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
+         return vsp.ldLongOp(ms, offset, m, ByteVector::memorySegmentGet);
      }
  
      // Memory store operations
  
      /**

@@ -3290,11 +3386,11 @@
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              this,
              a, offset,
              (arr, off, v)
-             -> v.stOp(arr, off,
+             -> v.stOp(arr, (int) off,
                        (arr_, off_, i, e) -> arr_[off_ + i] = e));
      }
  
      /**
       * Stores this vector into an array of type {@code byte[]}

@@ -3441,11 +3537,11 @@
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset),
              normalized,
              a, offset,
              (arr, off, v)
-             -> v.stOp(arr, off,
+             -> v.stOp(arr, (int) off,
                        (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
      }
  
      /**
       * Stores this vector into an array of type {@code boolean[]}

@@ -3579,71 +3675,44 @@
               });
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
+      * @since 19
       */
      @Override
      @ForceInline
      public final
-     void intoByteArray(byte[] a, int offset,
-                        ByteOrder bo) {
-         offset = checkFromIndexSize(offset, byteSize(), a.length);
-         maybeSwap(bo).intoByteArray0(a, offset);
-     }
- 
-     /**
-      * {@inheritDoc} <!--workaround-->
-      */
-     @Override
-     @ForceInline
-     public final
-     void intoByteArray(byte[] a, int offset,
-                        ByteOrder bo,
-                        VectorMask<Byte> m) {
-         if (m.allTrue()) {
-             intoByteArray(a, offset, bo);
-         } else {
-             ByteSpecies vsp = vspecies();
-             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
-             maybeSwap(bo).intoByteArray0(a, offset, m);
+     void intoMemorySegment(MemorySegment ms, long offset,
+                            ByteOrder bo) {
+         if (ms.isReadOnly()) {
+             throw new UnsupportedOperationException("Attempt to write a read-only segment");
          }
-     }
  
-     /**
-      * {@inheritDoc} <!--workaround-->
-      */
-     @Override
-     @ForceInline
-     public final
-     void intoByteBuffer(ByteBuffer bb, int offset,
-                         ByteOrder bo) {
-         if (ScopedMemoryAccess.isReadOnly(bb)) {
-             throw new ReadOnlyBufferException();
-         }
-         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
-         maybeSwap(bo).intoByteBuffer0(bb, offset);
+         offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
+         maybeSwap(bo).intoMemorySegment0(ms, offset);
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
+      * @since 19
       */
      @Override
      @ForceInline
      public final
-     void intoByteBuffer(ByteBuffer bb, int offset,
-                         ByteOrder bo,
-                         VectorMask<Byte> m) {
+     void intoMemorySegment(MemorySegment ms, long offset,
+                            ByteOrder bo,
+                            VectorMask<Byte> m) {
          if (m.allTrue()) {
-             intoByteBuffer(bb, offset, bo);
+             intoMemorySegment(ms, offset, bo);
          } else {
-             if (bb.isReadOnly()) {
-                 throw new ReadOnlyBufferException();
+             if (ms.isReadOnly()) {
+                 throw new UnsupportedOperationException("Attempt to write a read-only segment");
              }
              ByteSpecies vsp = vspecies();
-             checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
-             maybeSwap(bo).intoByteBuffer0(bb, offset, m);
+             checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
+             maybeSwap(bo).intoMemorySegment0(ms, offset, m);
          }
      }
  
      // ================================================
  

@@ -3673,11 +3742,11 @@
          ByteSpecies vsp = vspecies();
          return VectorSupport.load(
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              a, offset, vsp,
-             (arr, off, s) -> s.ldOp(arr, off,
+             (arr, off, s) -> s.ldOp(arr, (int) off,
                                      (arr_, off_, i) -> arr_[off_ + i]));
      }
  
      /*package-private*/
      abstract

@@ -3690,11 +3759,11 @@
          ByteSpecies vsp = vspecies();
          return VectorSupport.loadMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset), m,
              a, offset, vsp,
-             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
+             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                          (arr_, off_, i) -> arr_[off_ + i]));
      }
  
  
  

@@ -3707,11 +3776,11 @@
          ByteSpecies vsp = vspecies();
          return VectorSupport.load(
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset),
              a, offset, vsp,
-             (arr, off, s) -> s.ldOp(arr, off,
+             (arr, off, s) -> s.ldOp(arr, (int) off,
                                      (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
      }
  
      /*package-private*/
      abstract

@@ -3724,82 +3793,41 @@
          ByteSpecies vsp = vspecies();
          return VectorSupport.loadMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset), m,
              a, offset, vsp,
-             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
+             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                          (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
      }
  
-     @Override
      abstract
-     ByteVector fromByteArray0(byte[] a, int offset);
+     ByteVector fromMemorySegment0(MemorySegment bb, long offset);
      @ForceInline
      final
-     ByteVector fromByteArray0Template(byte[] a, int offset) {
+     ByteVector fromMemorySegment0Template(MemorySegment ms, long offset) {
          ByteSpecies vsp = vspecies();
-         return VectorSupport.load(
-             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-             a, byteArrayAddress(a, offset),
-             a, offset, vsp,
-             (arr, off, s) -> {
-                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
-                 return s.ldOp(wb, off,
-                         (wb_, o, i) -> wb_.get(o + i * 1));
-             });
-     }
- 
-     abstract
-     ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m);
-     @ForceInline
-     final
-     <M extends VectorMask<Byte>>
-     ByteVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
-         ByteSpecies vsp = vspecies();
-         m.check(vsp);
-         return VectorSupport.loadMasked(
-             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-             a, byteArrayAddress(a, offset), m,
-             a, offset, vsp,
-             (arr, off, s, vm) -> {
-                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
-                 return s.ldOp(wb, off, vm,
-                         (wb_, o, i) -> wb_.get(o + i * 1));
-             });
-     }
- 
-     abstract
-     ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
-     @ForceInline
-     final
-     ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
-         ByteSpecies vsp = vspecies();
-         return ScopedMemoryAccess.loadFromByteBuffer(
+         return ScopedMemoryAccess.loadFromMemorySegment(
                  vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-                 bb, offset, vsp,
-                 (buf, off, s) -> {
-                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
-                     return s.ldOp(wb, off,
-                             (wb_, o, i) -> wb_.get(o + i * 1));
+                 (MemorySegmentProxy) ms, offset, vsp,
+                 (msp, off, s) -> {
+                     return s.ldLongOp((MemorySegment) msp, off, ByteVector::memorySegmentGet);
                  });
      }
  
      abstract
-     ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
+     ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m);
      @ForceInline
      final
      <M extends VectorMask<Byte>>
-     ByteVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
+     ByteVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
          ByteSpecies vsp = vspecies();
          m.check(vsp);
-         return ScopedMemoryAccess.loadFromByteBufferMasked(
+         return ScopedMemoryAccess.loadFromMemorySegmentMasked(
                  vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-                 bb, offset, m, vsp,
-                 (buf, off, s, vm) -> {
-                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
-                     return s.ldOp(wb, off, vm,
-                             (wb_, o, i) -> wb_.get(o + i * 1));
+                 (MemorySegmentProxy) ms, offset, m, vsp,
+                 (msp, off, s, vm) -> {
+                     return s.ldLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentGet);
                  });
      }
  
      // Unchecked storing operations in native byte order.
      // Caller is responsible for applying index checks, masking, and

@@ -3814,11 +3842,11 @@
          VectorSupport.store(
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              this, a, offset,
              (arr, off, v)
-             -> v.stOp(arr, off,
+             -> v.stOp(arr, (int) off,
                        (arr_, off_, i, e) -> arr_[off_+i] = e));
      }
  
      abstract
      void intoArray0(byte[] a, int offset, VectorMask<Byte> m);

@@ -3831,11 +3859,11 @@
          VectorSupport.storeMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              this, m, a, offset,
              (arr, off, v, vm)
-             -> v.stOp(arr, off, vm,
+             -> v.stOp(arr, (int) off, vm,
                        (arr_, off_, i, e) -> arr_[off_ + i] = e));
      }
  
  
      abstract

@@ -3850,79 +3878,41 @@
          VectorSupport.storeMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset),
              normalized, m, a, offset,
              (arr, off, v, vm)
-             -> v.stOp(arr, off, vm,
+             -> v.stOp(arr, (int) off, vm,
                        (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
      }
  
-     abstract
-     void intoByteArray0(byte[] a, int offset);
-     @ForceInline
-     final
-     void intoByteArray0Template(byte[] a, int offset) {
-         ByteSpecies vsp = vspecies();
-         VectorSupport.store(
-             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-             a, byteArrayAddress(a, offset),
-             this, a, offset,
-             (arr, off, v) -> {
-                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
-                 v.stOp(wb, off,
-                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
-             });
-     }
- 
-     abstract
-     void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m);
-     @ForceInline
-     final
-     <M extends VectorMask<Byte>>
-     void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
-         ByteSpecies vsp = vspecies();
-         m.check(vsp);
-         VectorSupport.storeMasked(
-             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-             a, byteArrayAddress(a, offset),
-             this, m, a, offset,
-             (arr, off, v, vm) -> {
-                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
-                 v.stOp(wb, off, vm,
-                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
-             });
-     }
- 
      @ForceInline
      final
-     void intoByteBuffer0(ByteBuffer bb, int offset) {
+     void intoMemorySegment0(MemorySegment ms, long offset) {
          ByteSpecies vsp = vspecies();
-         ScopedMemoryAccess.storeIntoByteBuffer(
+         ScopedMemoryAccess.storeIntoMemorySegment(
                  vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-                 this, bb, offset,
-                 (buf, off, v) -> {
-                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
-                     v.stOp(wb, off,
-                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
+                 this,
+                 (MemorySegmentProxy) ms, offset,
+                 (msp, off, v) -> {
+                     v.stLongOp((MemorySegment) msp, off, ByteVector::memorySegmentSet);
                  });
      }
  
      abstract
-     void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
+     void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Byte> m);
      @ForceInline
      final
      <M extends VectorMask<Byte>>
-     void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
+     void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
          ByteSpecies vsp = vspecies();
          m.check(vsp);
-         ScopedMemoryAccess.storeIntoByteBufferMasked(
+         ScopedMemoryAccess.storeIntoMemorySegmentMasked(
                  vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-                 this, m, bb, offset,
-                 (buf, off, v, vm) -> {
-                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
-                     v.stOp(wb, off, vm,
-                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
+                 this, m,
+                 (MemorySegmentProxy) ms, offset,
+                 (msp, off, v, vm) -> {
+                     v.stLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentSet);
                  });
      }
  
  
      // End of low-level memory operations.

@@ -3935,10 +3925,20 @@
                                  int limit) {
          ((AbstractMask<Byte>)m)
              .checkIndexByLane(offset, limit, vsp.iota(), scale);
      }
  
+     private static
+     void checkMaskFromIndexSize(long offset,
+                                 ByteSpecies vsp,
+                                 VectorMask<Byte> m,
+                                 int scale,
+                                 long limit) {
+         ((AbstractMask<Byte>)m)
+             .checkIndexByLane(offset, limit, vsp.iota(), scale);
+     }
+ 
      @ForceInline
      private void conditionalStoreNYI(int offset,
                                       ByteSpecies vsp,
                                       VectorMask<Byte> m,
                                       int scale,

@@ -4254,10 +4254,25 @@
                                        VectorMask<Byte> m,
                                        FLdOp<M> f) {
              return dummyVector().ldOp(memory, offset, m, f);
          }
  
+         /*package-private*/
+         @ForceInline
+         ByteVector ldLongOp(MemorySegment memory, long offset,
+                                       FLdLongOp f) {
+             return dummyVector().ldLongOp(memory, offset, f);
+         }
+ 
+         /*package-private*/
+         @ForceInline
+         ByteVector ldLongOp(MemorySegment memory, long offset,
+                                       VectorMask<Byte> m,
+                                       FLdLongOp f) {
+             return dummyVector().ldLongOp(memory, offset, m, f);
+         }
+ 
          /*package-private*/
          @ForceInline
          <M> void stOp(M memory, int offset, FStOp<M> f) {
              dummyVector().stOp(memory, offset, f);
          }

@@ -4268,10 +4283,24 @@
                        AbstractMask<Byte> m,
                        FStOp<M> f) {
              dummyVector().stOp(memory, offset, m, f);
          }
  
+         /*package-private*/
+         @ForceInline
+         void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
+             dummyVector().stLongOp(memory, offset, f);
+         }
+ 
+         /*package-private*/
+         @ForceInline
+         void stLongOp(MemorySegment memory, long offset,
+                       AbstractMask<Byte> m,
+                       FStLongOp f) {
+             dummyVector().stLongOp(memory, offset, m, f);
+         }
+ 
          // N.B. Make sure these constant vectors and
          // masks load up correctly into registers.
          //
          // Also, see if we can avoid all that switching.
          // Could we cache both vectors and both masks in
< prev index next >