< prev index next >

src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java

Print this page
*** 22,18 ***
   * or visit www.oracle.com if you need additional information or have any
   * questions.
   */
  package jdk.incubator.vector;
  
- import java.nio.ByteBuffer;
  import java.nio.ByteOrder;
- import java.nio.ReadOnlyBufferException;
  import java.util.Arrays;
  import java.util.Objects;
  import java.util.function.Function;
- import java.util.function.UnaryOperator;
  
  import jdk.internal.misc.ScopedMemoryAccess;
  import jdk.internal.misc.Unsafe;
  import jdk.internal.vm.annotation.ForceInline;
  import jdk.internal.vm.vector.VectorSupport;
  
--- 22,18 ---
   * or visit www.oracle.com if you need additional information or have any
   * questions.
   */
  package jdk.incubator.vector;
  
  import java.nio.ByteOrder;
  import java.util.Arrays;
  import java.util.Objects;
  import java.util.function.Function;
  
+ import jdk.incubator.foreign.MemorySegment;
+ import jdk.incubator.foreign.ValueLayout;
+ import jdk.internal.access.foreign.MemorySegmentProxy;
  import jdk.internal.misc.ScopedMemoryAccess;
  import jdk.internal.misc.Unsafe;
  import jdk.internal.vm.annotation.ForceInline;
  import jdk.internal.vm.vector.VectorSupport;
  

*** 55,10 ***
--- 55,12 ---
          super(vec);
      }
  
      static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  
+     static final ValueLayout.OfByte ELEMENT_LAYOUT = ValueLayout.JAVA_BYTE.withBitAlignment(8);
+ 
      @ForceInline
      static int opCode(Operator op) {
          return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
      }
      @ForceInline

*** 349,10 ***
--- 351,49 ---
              }
          }
          return vectorFactory(res);
      }
  
+     /*package-private*/
+     interface FLdLongOp {
+         byte apply(MemorySegment memory, long offset, int i);
+     }
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     ByteVector ldLongOp(MemorySegment memory, long offset,
+                                   FLdLongOp f) {
+         //dummy; no vec = vec();
+         byte[] res = new byte[length()];
+         for (int i = 0; i < res.length; i++) {
+             res[i] = f.apply(memory, offset, i);
+         }
+         return vectorFactory(res);
+     }
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     ByteVector ldLongOp(MemorySegment memory, long offset,
+                                   VectorMask<Byte> m,
+                                   FLdLongOp f) {
+         //byte[] vec = vec();
+         byte[] res = new byte[length()];
+         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
+         for (int i = 0; i < res.length; i++) {
+             if (mbits[i]) {
+                 res[i] = f.apply(memory, offset, i);
+             }
+         }
+         return vectorFactory(res);
+     }
+ 
+     static byte memorySegmentGet(MemorySegment ms, long o, int i) {
+         return ms.get(ELEMENT_LAYOUT, o + i * 1L);
+     }
+ 
      interface FStOp<M> {
          void apply(M memory, int offset, int i, byte a);
      }
  
      /*package-private*/

*** 379,10 ***
--- 420,44 ---
                  f.apply(memory, offset, i, vec[i]);
              }
          }
      }
  
+     interface FStLongOp {
+         void apply(MemorySegment memory, long offset, int i, byte a);
+     }
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     void stLongOp(MemorySegment memory, long offset,
+                   FStLongOp f) {
+         byte[] vec = vec();
+         for (int i = 0; i < vec.length; i++) {
+             f.apply(memory, offset, i, vec[i]);
+         }
+     }
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     void stLongOp(MemorySegment memory, long offset,
+                   VectorMask<Byte> m,
+                   FStLongOp f) {
+         byte[] vec = vec();
+         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
+         for (int i = 0; i < vec.length; i++) {
+             if (mbits[i]) {
+                 f.apply(memory, offset, i, vec[i]);
+             }
+         }
+     }
+ 
+     static void memorySegmentSet(MemorySegment ms, long o, int i, byte e) {
+         ms.set(ELEMENT_LAYOUT, o + i * 1L, e);
+     }
+ 
      // Binary test
  
      /*package-private*/
      interface FBinTest {
          boolean apply(int cond, int i, byte a, byte b);

*** 429,10 ***
--- 504,40 ---
      @ForceInline
      static byte fromBits(long bits) {
          return ((byte)bits);
      }
  
+     static ByteVector expandHelper(Vector<Byte> v, VectorMask<Byte> m) {
+         VectorSpecies<Byte> vsp = m.vectorSpecies();
+         ByteVector r  = (ByteVector) vsp.zero();
+         ByteVector vi = (ByteVector) v;
+         if (m.allTrue()) {
+             return vi;
+         }
+         for (int i = 0, j = 0; i < vsp.length(); i++) {
+             if (m.laneIsSet(i)) {
+                 r = r.withLane(i, vi.lane(j++));
+             }
+         }
+         return r;
+     }
+ 
+     static ByteVector compressHelper(Vector<Byte> v, VectorMask<Byte> m) {
+         VectorSpecies<Byte> vsp = m.vectorSpecies();
+         ByteVector r  = (ByteVector) vsp.zero();
+         ByteVector vi = (ByteVector) v;
+         if (m.allTrue()) {
+             return vi;
+         }
+         for (int i = 0, j = 0; i < vsp.length(); i++) {
+             if (m.laneIsSet(i)) {
+                 r = r.withLane(j++, vi.lane(i));
+             }
+         }
+         return r;
+     }
+ 
      // Static factories (other than memory operations)
  
      // Note: A surprising behavior in javadoc
      // sometimes makes a lone /** {@inheritDoc} */
      // comment drop the method altogether,

*** 618,10 ***
--- 723,20 ---
          switch (opc_) {
              case VECTOR_OP_NEG: return (v0, m) ->
                      v0.uOp(m, (i, a) -> (byte) -a);
              case VECTOR_OP_ABS: return (v0, m) ->
                      v0.uOp(m, (i, a) -> (byte) Math.abs(a));
+             case VECTOR_OP_BIT_COUNT: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> (byte) bitCount(a));
+             case VECTOR_OP_TZ_COUNT: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> (byte) numberOfTrailingZeros(a));
+             case VECTOR_OP_LZ_COUNT: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> (byte) numberOfLeadingZeros(a));
+             case VECTOR_OP_REVERSE: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> reverse(a));
+             case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
+                     v0.uOp(m, (i, a) -> a);
              default: return null;
          }
      }
  
      // Binary lanewise support

*** 1744,10 ***
--- 1859,29 ---
      public final
      ByteVector abs() {
          return lanewise(ABS);
      }
  
+     static int bitCount(byte a) {
+         return Integer.bitCount((int)a & 0xFF);
+     }
+     static int numberOfTrailingZeros(byte a) {
+         return a != 0 ? Integer.numberOfTrailingZeros(a) : 8;
+     }
+     static int numberOfLeadingZeros(byte a) {
+         return a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0;
+     }
+ 
+     static byte reverse(byte a) {
+         if (a == 0 || a == -1) return a;
+ 
+         byte b = rotateLeft(a, 4);
+         b = (byte) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
+         b = (byte) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
+         return b;
+     }
+ 
      // not (~)
      /**
       * Computes the bitwise logical complement ({@code ~})
       * of this vector.
       *

*** 2370,10 ***
--- 2504,49 ---
                                       shuffleType, byte.class, length(),
                                       this, vsp,
                                       ByteVector::toShuffle0);
      }
  
+     /**
+      * {@inheritDoc} <!--workaround-->
+      * @since 19
+      */
+     @Override
+     public abstract
+     ByteVector compress(VectorMask<Byte> m);
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     <M extends AbstractMask<Byte>>
+     ByteVector compressTemplate(Class<M> masktype, M m) {
+       m.check(masktype, this);
+       return (ByteVector) VectorSupport.comExpOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
+                                                    byte.class, length(), this, m,
+                                                    (v1, m1) -> compressHelper(v1, m1));
+     }
+ 
+     /**
+      * {@inheritDoc} <!--workaround-->
+      * @since 19
+      */
+     @Override
+     public abstract
+     ByteVector expand(VectorMask<Byte> m);
+ 
+     /*package-private*/
+     @ForceInline
+     final
+     <M extends AbstractMask<Byte>>
+     ByteVector expandTemplate(Class<M> masktype, M m) {
+       m.check(masktype, this);
+       return (ByteVector) VectorSupport.comExpOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
+                                                    byte.class, length(), this, m,
+                                                    (v1, m1) -> expandHelper(v1, m1));
+     }
+ 
+ 
      /**
       * {@inheritDoc} <!--workaround-->
       */
      @Override
      public abstract

*** 2782,94 ***
              res[i] = (double) a[i];
          }
          return res;
      }
  
-     /**
-      * Loads a vector from a byte array starting at an offset.
-      * Bytes are composed into primitive lane elements according
-      * to the specified byte order.
-      * The vector is arranged into lanes according to
-      * <a href="Vector.html#lane-order">memory ordering</a>.
-      * <p>
-      * This method behaves as if it returns the result of calling
-      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
-      * fromByteBuffer()} as follows:
-      * <pre>{@code
-      * var bb = ByteBuffer.wrap(a);
-      * var m = species.maskAll(true);
-      * return fromByteBuffer(species, bb, offset, bo, m);
-      * }</pre>
-      *
-      * @param species species of desired vector
-      * @param a the byte array
-      * @param offset the offset into the array
-      * @param bo the intended byte order
-      * @return a vector loaded from a byte array
-      * @throws IndexOutOfBoundsException
-      *         if {@code offset+N*ESIZE < 0}
-      *         or {@code offset+(N+1)*ESIZE > a.length}
-      *         for any lane {@code N} in the vector
-      */
-     @ForceInline
-     public static
-     ByteVector fromByteArray(VectorSpecies<Byte> species,
-                                        byte[] a, int offset,
-                                        ByteOrder bo) {
-         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
-         ByteSpecies vsp = (ByteSpecies) species;
-         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
-     }
- 
-     /**
-      * Loads a vector from a byte array starting at an offset
-      * and using a mask.
-      * Lanes where the mask is unset are filled with the default
-      * value of {@code byte} (zero).
-      * Bytes are composed into primitive lane elements according
-      * to the specified byte order.
-      * The vector is arranged into lanes according to
-      * <a href="Vector.html#lane-order">memory ordering</a>.
-      * <p>
-      * This method behaves as if it returns the result of calling
-      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
-      * fromByteBuffer()} as follows:
-      * <pre>{@code
-      * var bb = ByteBuffer.wrap(a);
-      * return fromByteBuffer(species, bb, offset, bo, m);
-      * }</pre>
-      *
-      * @param species species of desired vector
-      * @param a the byte array
-      * @param offset the offset into the array
-      * @param bo the intended byte order
-      * @param m the mask controlling lane selection
-      * @return a vector loaded from a byte array
-      * @throws IndexOutOfBoundsException
-      *         if {@code offset+N*ESIZE < 0}
-      *         or {@code offset+(N+1)*ESIZE > a.length}
-      *         for any lane {@code N} in the vector
-      *         where the mask is set
-      */
-     @ForceInline
-     public static
-     ByteVector fromByteArray(VectorSpecies<Byte> species,
-                                        byte[] a, int offset,
-                                        ByteOrder bo,
-                                        VectorMask<Byte> m) {
-         ByteSpecies vsp = (ByteSpecies) species;
-         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
-             return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
-         }
- 
-         // FIXME: optimize
-         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
-         ByteBuffer wb = wrapper(a, bo);
-         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
-                    (wb_, o, i)  -> wb_.get(o + i * 1));
-     }
- 
      /**
       * Loads a vector from an array of type {@code byte[]}
       * starting at an offset.
       * For each vector lane, where {@code N} is the vector lane index, the
       * array element at index {@code offset + N} is placed into the
--- 2955,10 ---

*** 3172,99 ***
          ByteSpecies vsp = (ByteSpecies) species;
          return vsp.vOp(m, n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
      }
  
      /**
!      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
!      * starting at an offset into the byte buffer.
       * Bytes are composed into primitive lane elements according
       * to the specified byte order.
       * The vector is arranged into lanes according to
       * <a href="Vector.html#lane-order">memory ordering</a>.
       * <p>
       * This method behaves as if it returns the result of calling
!      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
!      * fromByteBuffer()} as follows:
       * <pre>{@code
       * var m = species.maskAll(true);
!      * return fromByteBuffer(species, bb, offset, bo, m);
       * }</pre>
       *
       * @param species species of desired vector
!      * @param bb the byte buffer
!      * @param offset the offset into the byte buffer
       * @param bo the intended byte order
!      * @return a vector loaded from a byte buffer
       * @throws IndexOutOfBoundsException
       *         if {@code offset+N*1 < 0}
!      *         or {@code offset+N*1 >= bb.limit()}
       *         for any lane {@code N} in the vector
       */
      @ForceInline
      public static
!     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
!                                         ByteBuffer bb, int offset,
!                                         ByteOrder bo) {
!         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
          ByteSpecies vsp = (ByteSpecies) species;
!         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
      }
  
      /**
!      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
!      * starting at an offset into the byte buffer
       * and using a mask.
       * Lanes where the mask is unset are filled with the default
       * value of {@code byte} (zero).
       * Bytes are composed into primitive lane elements according
       * to the specified byte order.
       * The vector is arranged into lanes according to
       * <a href="Vector.html#lane-order">memory ordering</a>.
       * <p>
       * The following pseudocode illustrates the behavior:
       * <pre>{@code
!      * ByteBuffer eb = bb.duplicate()
-      *     .position(offset);
       * byte[] ar = new byte[species.length()];
       * for (int n = 0; n < ar.length; n++) {
       *     if (m.laneIsSet(n)) {
!      *         ar[n] = eb.get(n);
       *     }
       * }
       * ByteVector r = ByteVector.fromArray(species, ar, 0);
       * }</pre>
       * @implNote
       * The byte order argument is ignored.
       *
       * @param species species of desired vector
!      * @param bb the byte buffer
!      * @param offset the offset into the byte buffer
       * @param bo the intended byte order
       * @param m the mask controlling lane selection
!      * @return a vector loaded from a byte buffer
       * @throws IndexOutOfBoundsException
       *         if {@code offset+N*1 < 0}
!      *         or {@code offset+N*1 >= bb.limit()}
       *         for any lane {@code N} in the vector
       *         where the mask is set
       */
      @ForceInline
      public static
!     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
!                                         ByteBuffer bb, int offset,
!                                         ByteOrder bo,
!                                         VectorMask<Byte> m) {
          ByteSpecies vsp = (ByteSpecies) species;
!         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
!             return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
          }
  
          // FIXME: optimize
!         checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
!         ByteBuffer wb = wrapper(bb, bo);
-         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
-                    (wb_, o, i)  -> wb_.get(o + i * 1));
      }
  
      // Memory store operations
  
      /**
--- 3261,106 ---
          ByteSpecies vsp = (ByteSpecies) species;
          return vsp.vOp(m, n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
      }
  
      /**
!      * Loads a vector from a {@linkplain MemorySegment memory segment}
!      * starting at an offset into the memory segment.
       * Bytes are composed into primitive lane elements according
       * to the specified byte order.
       * The vector is arranged into lanes according to
       * <a href="Vector.html#lane-order">memory ordering</a>.
       * <p>
       * This method behaves as if it returns the result of calling
!      * {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
!      * fromMemorySegment()} as follows:
       * <pre>{@code
       * var m = species.maskAll(true);
!      * return fromMemorySegment(species, ms, offset, bo, m);
       * }</pre>
       *
       * @param species species of desired vector
!      * @param ms the memory segment
!      * @param offset the offset into the memory segment
       * @param bo the intended byte order
!      * @return a vector loaded from the memory segment
       * @throws IndexOutOfBoundsException
       *         if {@code offset+N*1 < 0}
!      *         or {@code offset+N*1 >= ms.byteSize()}
       *         for any lane {@code N} in the vector
+      * @throws IllegalArgumentException if the memory segment is a heap segment that is
+      *         not backed by a {@code byte[]} array.
+      * @throws IllegalStateException if the memory segment's session is not alive,
+      *         or if access occurs from a thread other than the thread owning the session.
+      * @since 19
       */
      @ForceInline
      public static
!     ByteVector fromMemorySegment(VectorSpecies<Byte> species,
!                                            MemorySegment ms, long offset,
!                                            ByteOrder bo) {
!         offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
          ByteSpecies vsp = (ByteSpecies) species;
!         return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
      }
  
      /**
!      * Loads a vector from a {@linkplain MemorySegment memory segment}
!      * starting at an offset into the memory segment
       * and using a mask.
       * Lanes where the mask is unset are filled with the default
       * value of {@code byte} (zero).
       * Bytes are composed into primitive lane elements according
       * to the specified byte order.
       * The vector is arranged into lanes according to
       * <a href="Vector.html#lane-order">memory ordering</a>.
       * <p>
       * The following pseudocode illustrates the behavior:
       * <pre>{@code
!      * var slice = ms.asSlice(offset);
       * byte[] ar = new byte[species.length()];
       * for (int n = 0; n < ar.length; n++) {
       *     if (m.laneIsSet(n)) {
!      *         ar[n] = slice.getAtIndex(ValuaLayout.JAVA_BYTE.withBitAlignment(8), n);
       *     }
       * }
       * ByteVector r = ByteVector.fromArray(species, ar, 0);
       * }</pre>
       * @implNote
       * The byte order argument is ignored.
       *
       * @param species species of desired vector
!      * @param ms the memory segment
!      * @param offset the offset into the memory segment
       * @param bo the intended byte order
       * @param m the mask controlling lane selection
!      * @return a vector loaded from the memory segment
       * @throws IndexOutOfBoundsException
       *         if {@code offset+N*1 < 0}
!      *         or {@code offset+N*1 >= ms.byteSize()}
       *         for any lane {@code N} in the vector
       *         where the mask is set
+      * @throws IllegalArgumentException if the memory segment is a heap segment that is
+      *         not backed by a {@code byte[]} array.
+      * @throws IllegalStateException if the memory segment's session is not alive,
+      *         or if access occurs from a thread other than the thread owning the session.
+      * @since 19
       */
      @ForceInline
      public static
!     ByteVector fromMemorySegment(VectorSpecies<Byte> species,
!                                            MemorySegment ms, long offset,
!                                            ByteOrder bo,
!                                            VectorMask<Byte> m) {
          ByteSpecies vsp = (ByteSpecies) species;
!         if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
!             return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
          }
  
          // FIXME: optimize
!         checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
!         return vsp.ldLongOp(ms, offset, m, ByteVector::memorySegmentGet);
      }
  
      // Memory store operations
  
      /**

*** 3290,11 ***
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              this,
              a, offset,
              (arr, off, v)
!             -> v.stOp(arr, off,
                        (arr_, off_, i, e) -> arr_[off_ + i] = e));
      }
  
      /**
       * Stores this vector into an array of type {@code byte[]}
--- 3386,11 ---
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              this,
              a, offset,
              (arr, off, v)
!             -> v.stOp(arr, (int) off,
                        (arr_, off_, i, e) -> arr_[off_ + i] = e));
      }
  
      /**
       * Stores this vector into an array of type {@code byte[]}

*** 3441,11 ***
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset),
              normalized,
              a, offset,
              (arr, off, v)
!             -> v.stOp(arr, off,
                        (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
      }
  
      /**
       * Stores this vector into an array of type {@code boolean[]}
--- 3537,11 ---
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset),
              normalized,
              a, offset,
              (arr, off, v)
!             -> v.stOp(arr, (int) off,
                        (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
      }
  
      /**
       * Stores this vector into an array of type {@code boolean[]}

*** 3579,71 ***
               });
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       */
      @Override
      @ForceInline
      public final
!     void intoByteArray(byte[] a, int offset,
!                        ByteOrder bo) {
!         offset = checkFromIndexSize(offset, byteSize(), a.length);
!         maybeSwap(bo).intoByteArray0(a, offset);
-     }
- 
-     /**
-      * {@inheritDoc} <!--workaround-->
-      */
-     @Override
-     @ForceInline
-     public final
-     void intoByteArray(byte[] a, int offset,
-                        ByteOrder bo,
-                        VectorMask<Byte> m) {
-         if (m.allTrue()) {
-             intoByteArray(a, offset, bo);
-         } else {
-             ByteSpecies vsp = vspecies();
-             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
-             maybeSwap(bo).intoByteArray0(a, offset, m);
          }
-     }
  
!     /**
!      * {@inheritDoc} <!--workaround-->
-      */
-     @Override
-     @ForceInline
-     public final
-     void intoByteBuffer(ByteBuffer bb, int offset,
-                         ByteOrder bo) {
-         if (ScopedMemoryAccess.isReadOnly(bb)) {
-             throw new ReadOnlyBufferException();
-         }
-         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
-         maybeSwap(bo).intoByteBuffer0(bb, offset);
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
       */
      @Override
      @ForceInline
      public final
!     void intoByteBuffer(ByteBuffer bb, int offset,
!                         ByteOrder bo,
!                         VectorMask<Byte> m) {
          if (m.allTrue()) {
!             intoByteBuffer(bb, offset, bo);
          } else {
!             if (bb.isReadOnly()) {
!                 throw new ReadOnlyBufferException();
              }
              ByteSpecies vsp = vspecies();
!             checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
!             maybeSwap(bo).intoByteBuffer0(bb, offset, m);
          }
      }
  
      // ================================================
  
--- 3675,44 ---
               });
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
+      * @since 19
       */
      @Override
      @ForceInline
      public final
!     void intoMemorySegment(MemorySegment ms, long offset,
!                            ByteOrder bo) {
!         if (ms.isReadOnly()) {
!             throw new UnsupportedOperationException("Attempt to write a read-only segment");
          }
  
!         offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
!         maybeSwap(bo).intoMemorySegment0(ms, offset);
      }
  
      /**
       * {@inheritDoc} <!--workaround-->
+      * @since 19
       */
      @Override
      @ForceInline
      public final
!     void intoMemorySegment(MemorySegment ms, long offset,
!                            ByteOrder bo,
!                            VectorMask<Byte> m) {
          if (m.allTrue()) {
!             intoMemorySegment(ms, offset, bo);
          } else {
!             if (ms.isReadOnly()) {
!                 throw new UnsupportedOperationException("Attempt to write a read-only segment");
              }
              ByteSpecies vsp = vspecies();
!             checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
!             maybeSwap(bo).intoMemorySegment0(ms, offset, m);
          }
      }
  
      // ================================================
  

*** 3673,11 ***
          ByteSpecies vsp = vspecies();
          return VectorSupport.load(
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              a, offset, vsp,
!             (arr, off, s) -> s.ldOp(arr, off,
                                      (arr_, off_, i) -> arr_[off_ + i]));
      }
  
      /*package-private*/
      abstract
--- 3742,11 ---
          ByteSpecies vsp = vspecies();
          return VectorSupport.load(
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              a, offset, vsp,
!             (arr, off, s) -> s.ldOp(arr, (int) off,
                                      (arr_, off_, i) -> arr_[off_ + i]));
      }
  
      /*package-private*/
      abstract

*** 3690,11 ***
          ByteSpecies vsp = vspecies();
          return VectorSupport.loadMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset), m,
              a, offset, vsp,
!             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
                                          (arr_, off_, i) -> arr_[off_ + i]));
      }
  
  
  
--- 3759,11 ---
          ByteSpecies vsp = vspecies();
          return VectorSupport.loadMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset), m,
              a, offset, vsp,
!             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                          (arr_, off_, i) -> arr_[off_ + i]));
      }
  
  
  

*** 3707,11 ***
          ByteSpecies vsp = vspecies();
          return VectorSupport.load(
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset),
              a, offset, vsp,
!             (arr, off, s) -> s.ldOp(arr, off,
                                      (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
      }
  
      /*package-private*/
      abstract
--- 3776,11 ---
          ByteSpecies vsp = vspecies();
          return VectorSupport.load(
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset),
              a, offset, vsp,
!             (arr, off, s) -> s.ldOp(arr, (int) off,
                                      (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
      }
  
      /*package-private*/
      abstract

*** 3724,82 ***
          ByteSpecies vsp = vspecies();
          return VectorSupport.loadMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset), m,
              a, offset, vsp,
!             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
                                          (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
      }
  
-     @Override
      abstract
!     ByteVector fromByteArray0(byte[] a, int offset);
      @ForceInline
      final
!     ByteVector fromByteArray0Template(byte[] a, int offset) {
          ByteSpecies vsp = vspecies();
!         return VectorSupport.load(
-             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-             a, byteArrayAddress(a, offset),
-             a, offset, vsp,
-             (arr, off, s) -> {
-                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
-                 return s.ldOp(wb, off,
-                         (wb_, o, i) -> wb_.get(o + i * 1));
-             });
-     }
- 
-     abstract
-     ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m);
-     @ForceInline
-     final
-     <M extends VectorMask<Byte>>
-     ByteVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
-         ByteSpecies vsp = vspecies();
-         m.check(vsp);
-         return VectorSupport.loadMasked(
-             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-             a, byteArrayAddress(a, offset), m,
-             a, offset, vsp,
-             (arr, off, s, vm) -> {
-                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
-                 return s.ldOp(wb, off, vm,
-                         (wb_, o, i) -> wb_.get(o + i * 1));
-             });
-     }
- 
-     abstract
-     ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
-     @ForceInline
-     final
-     ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
-         ByteSpecies vsp = vspecies();
-         return ScopedMemoryAccess.loadFromByteBuffer(
                  vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
!                 bb, offset, vsp,
!                 (buf, off, s) -> {
!                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
-                     return s.ldOp(wb, off,
-                             (wb_, o, i) -> wb_.get(o + i * 1));
                  });
      }
  
      abstract
!     ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
      @ForceInline
      final
      <M extends VectorMask<Byte>>
!     ByteVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
          ByteSpecies vsp = vspecies();
          m.check(vsp);
!         return ScopedMemoryAccess.loadFromByteBufferMasked(
                  vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
!                 bb, offset, m, vsp,
!                 (buf, off, s, vm) -> {
!                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
-                     return s.ldOp(wb, off, vm,
-                             (wb_, o, i) -> wb_.get(o + i * 1));
                  });
      }
  
      // Unchecked storing operations in native byte order.
      // Caller is responsible for applying index checks, masking, and
--- 3793,41 ---
          ByteSpecies vsp = vspecies();
          return VectorSupport.loadMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset), m,
              a, offset, vsp,
!             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                          (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
      }
  
      abstract
!     ByteVector fromMemorySegment0(MemorySegment bb, long offset);
      @ForceInline
      final
!     ByteVector fromMemorySegment0Template(MemorySegment ms, long offset) {
          ByteSpecies vsp = vspecies();
!         return ScopedMemoryAccess.loadFromMemorySegment(
                  vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
!                 (MemorySegmentProxy) ms, offset, vsp,
!                 (msp, off, s) -> {
!                     return s.ldLongOp((MemorySegment) msp, off, ByteVector::memorySegmentGet);
                  });
      }
  
      abstract
!     ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m);
      @ForceInline
      final
      <M extends VectorMask<Byte>>
!     ByteVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
          ByteSpecies vsp = vspecies();
          m.check(vsp);
!         return ScopedMemoryAccess.loadFromMemorySegmentMasked(
                  vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
!                 (MemorySegmentProxy) ms, offset, m, vsp,
!                 (msp, off, s, vm) -> {
!                     return s.ldLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentGet);
                  });
      }
  
      // Unchecked storing operations in native byte order.
      // Caller is responsible for applying index checks, masking, and

*** 3814,11 ***
          VectorSupport.store(
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              this, a, offset,
              (arr, off, v)
!             -> v.stOp(arr, off,
                        (arr_, off_, i, e) -> arr_[off_+i] = e));
      }
  
      abstract
      void intoArray0(byte[] a, int offset, VectorMask<Byte> m);
--- 3842,11 ---
          VectorSupport.store(
              vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              this, a, offset,
              (arr, off, v)
!             -> v.stOp(arr, (int) off,
                        (arr_, off_, i, e) -> arr_[off_+i] = e));
      }
  
      abstract
      void intoArray0(byte[] a, int offset, VectorMask<Byte> m);

*** 3831,11 ***
          VectorSupport.storeMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              this, m, a, offset,
              (arr, off, v, vm)
!             -> v.stOp(arr, off, vm,
                        (arr_, off_, i, e) -> arr_[off_ + i] = e));
      }
  
  
      abstract
--- 3859,11 ---
          VectorSupport.storeMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, arrayAddress(a, offset),
              this, m, a, offset,
              (arr, off, v, vm)
!             -> v.stOp(arr, (int) off, vm,
                        (arr_, off_, i, e) -> arr_[off_ + i] = e));
      }
  
  
      abstract

*** 3850,79 ***
          VectorSupport.storeMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset),
              normalized, m, a, offset,
              (arr, off, v, vm)
!             -> v.stOp(arr, off, vm,
                        (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
      }
  
-     abstract
-     void intoByteArray0(byte[] a, int offset);
-     @ForceInline
-     final
-     void intoByteArray0Template(byte[] a, int offset) {
-         ByteSpecies vsp = vspecies();
-         VectorSupport.store(
-             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-             a, byteArrayAddress(a, offset),
-             this, a, offset,
-             (arr, off, v) -> {
-                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
-                 v.stOp(wb, off,
-                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
-             });
-     }
- 
-     abstract
-     void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m);
-     @ForceInline
-     final
-     <M extends VectorMask<Byte>>
-     void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
-         ByteSpecies vsp = vspecies();
-         m.check(vsp);
-         VectorSupport.storeMasked(
-             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-             a, byteArrayAddress(a, offset),
-             this, m, a, offset,
-             (arr, off, v, vm) -> {
-                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
-                 v.stOp(wb, off, vm,
-                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
-             });
-     }
- 
      @ForceInline
      final
!     void intoByteBuffer0(ByteBuffer bb, int offset) {
          ByteSpecies vsp = vspecies();
!         ScopedMemoryAccess.storeIntoByteBuffer(
                  vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
!                 this, bb, offset,
!                 (buf, off, v) -> {
!                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
!                     v.stOp(wb, off,
-                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
                  });
      }
  
      abstract
!     void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
      @ForceInline
      final
      <M extends VectorMask<Byte>>
!     void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
          ByteSpecies vsp = vspecies();
          m.check(vsp);
!         ScopedMemoryAccess.storeIntoByteBufferMasked(
                  vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
!                 this, m, bb, offset,
!                 (buf, off, v, vm) -> {
!                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
!                     v.stOp(wb, off, vm,
-                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
                  });
      }
  
  
      // End of low-level memory operations.
--- 3878,41 ---
          VectorSupport.storeMasked(
              vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
              a, booleanArrayAddress(a, offset),
              normalized, m, a, offset,
              (arr, off, v, vm)
!             -> v.stOp(arr, (int) off, vm,
                        (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
      }
  
      @ForceInline
      final
!     void intoMemorySegment0(MemorySegment ms, long offset) {
          ByteSpecies vsp = vspecies();
!         ScopedMemoryAccess.storeIntoMemorySegment(
                  vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
!                 this,
!                 (MemorySegmentProxy) ms, offset,
!                 (msp, off, v) -> {
!                     v.stLongOp((MemorySegment) msp, off, ByteVector::memorySegmentSet);
                  });
      }
  
      abstract
!     void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Byte> m);
      @ForceInline
      final
      <M extends VectorMask<Byte>>
!     void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
          ByteSpecies vsp = vspecies();
          m.check(vsp);
!         ScopedMemoryAccess.storeIntoMemorySegmentMasked(
                  vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
!                 this, m,
!                 (MemorySegmentProxy) ms, offset,
!                 (msp, off, v, vm) -> {
!                     v.stLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentSet);
                  });
      }
  
  
      // End of low-level memory operations.

*** 3935,10 ***
--- 3925,20 ---
                                  int limit) {
          ((AbstractMask<Byte>)m)
              .checkIndexByLane(offset, limit, vsp.iota(), scale);
      }
  
+     private static
+     void checkMaskFromIndexSize(long offset,
+                                 ByteSpecies vsp,
+                                 VectorMask<Byte> m,
+                                 int scale,
+                                 long limit) {
+         ((AbstractMask<Byte>)m)
+             .checkIndexByLane(offset, limit, vsp.iota(), scale);
+     }
+ 
      @ForceInline
      private void conditionalStoreNYI(int offset,
                                       ByteSpecies vsp,
                                       VectorMask<Byte> m,
                                       int scale,

*** 4254,10 ***
--- 4254,25 ---
                                        VectorMask<Byte> m,
                                        FLdOp<M> f) {
              return dummyVector().ldOp(memory, offset, m, f);
          }
  
+         /*package-private*/
+         @ForceInline
+         ByteVector ldLongOp(MemorySegment memory, long offset,
+                                       FLdLongOp f) {
+             return dummyVector().ldLongOp(memory, offset, f);
+         }
+ 
+         /*package-private*/
+         @ForceInline
+         ByteVector ldLongOp(MemorySegment memory, long offset,
+                                       VectorMask<Byte> m,
+                                       FLdLongOp f) {
+             return dummyVector().ldLongOp(memory, offset, m, f);
+         }
+ 
          /*package-private*/
          @ForceInline
          <M> void stOp(M memory, int offset, FStOp<M> f) {
              dummyVector().stOp(memory, offset, f);
          }

*** 4268,10 ***
--- 4283,24 ---
                        AbstractMask<Byte> m,
                        FStOp<M> f) {
              dummyVector().stOp(memory, offset, m, f);
          }
  
+         /*package-private*/
+         @ForceInline
+         void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
+             dummyVector().stLongOp(memory, offset, f);
+         }
+ 
+         /*package-private*/
+         @ForceInline
+         void stLongOp(MemorySegment memory, long offset,
+                       AbstractMask<Byte> m,
+                       FStLongOp f) {
+             dummyVector().stLongOp(memory, offset, m, f);
+         }
+ 
          // N.B. Make sure these constant vectors and
          // masks load up correctly into registers.
          //
          // Also, see if we can avoid all that switching.
          // Could we cache both vectors and both masks in
< prev index next >