< prev index next >

src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java

Print this page

  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.BinaryOperator;
  33 import java.util.function.Function;
  34 import java.util.function.UnaryOperator;
  35 
  36 import jdk.internal.misc.ScopedMemoryAccess;
  37 import jdk.internal.misc.Unsafe;
  38 import jdk.internal.vm.annotation.ForceInline;
  39 import jdk.internal.vm.vector.VectorSupport;
  40 
  41 import static jdk.internal.vm.vector.VectorSupport.*;
  42 import static jdk.incubator.vector.VectorIntrinsics.*;
  43 
  44 import static jdk.incubator.vector.VectorOperators.*;
  45 
  46 // -- This file was mechanically generated: Do not edit! -- //
  47 
  48 /**
  49  * A specialized {@link Vector} representing an ordered immutable sequence of
  50  * {@code byte} values.
  51  */
  52 @SuppressWarnings("cast")  // warning: redundant cast

 156     ByteVector uOp(FUnOp f);
 157     @ForceInline
 158     final
 159     ByteVector uOpTemplate(FUnOp f) {
 160         byte[] vec = vec();
 161         byte[] res = new byte[length()];
 162         for (int i = 0; i < res.length; i++) {
 163             res[i] = f.apply(i, vec[i]);
 164         }
 165         return vectorFactory(res);
 166     }
 167 
 168     /*package-private*/
 169     abstract
 170     ByteVector uOp(VectorMask<Byte> m,
 171                              FUnOp f);
 172     @ForceInline
 173     final
 174     ByteVector uOpTemplate(VectorMask<Byte> m,
 175                                      FUnOp f) {



 176         byte[] vec = vec();
 177         byte[] res = new byte[length()];
 178         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 179         for (int i = 0; i < res.length; i++) {
 180             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 181         }
 182         return vectorFactory(res);
 183     }
 184 
 185     // Binary operator
 186 
 187     /*package-private*/
 188     interface FBinOp {
 189         byte apply(int i, byte a, byte b);
 190     }
 191 
 192     /*package-private*/
 193     abstract
 194     ByteVector bOp(Vector<Byte> o,
 195                              FBinOp f);

 199                                      FBinOp f) {
 200         byte[] res = new byte[length()];
 201         byte[] vec1 = this.vec();
 202         byte[] vec2 = ((ByteVector)o).vec();
 203         for (int i = 0; i < res.length; i++) {
 204             res[i] = f.apply(i, vec1[i], vec2[i]);
 205         }
 206         return vectorFactory(res);
 207     }
 208 
 209     /*package-private*/
 210     abstract
 211     ByteVector bOp(Vector<Byte> o,
 212                              VectorMask<Byte> m,
 213                              FBinOp f);
 214     @ForceInline
 215     final
 216     ByteVector bOpTemplate(Vector<Byte> o,
 217                                      VectorMask<Byte> m,
 218                                      FBinOp f) {



 219         byte[] res = new byte[length()];
 220         byte[] vec1 = this.vec();
 221         byte[] vec2 = ((ByteVector)o).vec();
 222         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 223         for (int i = 0; i < res.length; i++) {
 224             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 225         }
 226         return vectorFactory(res);
 227     }
 228 
 229     // Ternary operator
 230 
 231     /*package-private*/
 232     interface FTriOp {
 233         byte apply(int i, byte a, byte b, byte c);
 234     }
 235 
 236     /*package-private*/
 237     abstract
 238     ByteVector tOp(Vector<Byte> o1,

 248         byte[] vec2 = ((ByteVector)o1).vec();
 249         byte[] vec3 = ((ByteVector)o2).vec();
 250         for (int i = 0; i < res.length; i++) {
 251             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 252         }
 253         return vectorFactory(res);
 254     }
 255 
 256     /*package-private*/
 257     abstract
 258     ByteVector tOp(Vector<Byte> o1,
 259                              Vector<Byte> o2,
 260                              VectorMask<Byte> m,
 261                              FTriOp f);
 262     @ForceInline
 263     final
 264     ByteVector tOpTemplate(Vector<Byte> o1,
 265                                      Vector<Byte> o2,
 266                                      VectorMask<Byte> m,
 267                                      FTriOp f) {



 268         byte[] res = new byte[length()];
 269         byte[] vec1 = this.vec();
 270         byte[] vec2 = ((ByteVector)o1).vec();
 271         byte[] vec3 = ((ByteVector)o2).vec();
 272         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 273         for (int i = 0; i < res.length; i++) {
 274             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 275         }
 276         return vectorFactory(res);
 277     }
 278 
 279     // Reduction operator
 280 
 281     /*package-private*/
 282     abstract
 283     byte rOp(byte v, FBinOp f);















 284     @ForceInline
 285     final
 286     byte rOpTemplate(byte v, FBinOp f) {
 287         byte[] vec = vec();
 288         for (int i = 0; i < vec.length; i++) {
 289             v = f.apply(i, v, vec[i]);
 290         }
 291         return v;
 292     }
 293 
 294     // Memory reference
 295 
 296     /*package-private*/
 297     interface FLdOp<M> {
 298         byte apply(M memory, int offset, int i);
 299     }
 300 
 301     /*package-private*/
 302     @ForceInline
 303     final

 532     final ByteVector broadcastTemplate(long e) {
 533         return vspecies().broadcast(e);
 534     }
 535 
 536     // Unary lanewise support
 537 
 538     /**
 539      * {@inheritDoc} <!--workaround-->
 540      */
 541     public abstract
 542     ByteVector lanewise(VectorOperators.Unary op);
 543 
 544     @ForceInline
 545     final
 546     ByteVector lanewiseTemplate(VectorOperators.Unary op) {
 547         if (opKind(op, VO_SPECIAL)) {
 548             if (op == ZOMO) {
 549                 return blend(broadcast(-1), compare(NE, 0));
 550             }
 551             if (op == NOT) {
 552                 return broadcast(-1).lanewiseTemplate(XOR, this);
 553             } else if (op == NEG) {
 554                 // FIXME: Support this in the JIT.
 555                 return broadcast(0).lanewiseTemplate(SUB, this);
 556             }
 557         }
 558         int opc = opCode(op);
 559         return VectorSupport.unaryOp(
 560             opc, getClass(), byte.class, length(),
 561             this,
 562             UN_IMPL.find(op, opc, (opc_) -> {
 563               switch (opc_) {
 564                 case VECTOR_OP_NEG: return v0 ->
 565                         v0.uOp((i, a) -> (byte) -a);
 566                 case VECTOR_OP_ABS: return v0 ->
 567                         v0.uOp((i, a) -> (byte) Math.abs(a));
 568                 default: return null;
 569               }}));
 570     }
 571     private static final
 572     ImplCache<Unary,UnaryOperator<ByteVector>> UN_IMPL
 573         = new ImplCache<>(Unary.class, ByteVector.class);
 574 
 575     /**
 576      * {@inheritDoc} <!--workaround-->
 577      */
 578     @ForceInline
 579     public final
 580     ByteVector lanewise(VectorOperators.Unary op,
 581                                   VectorMask<Byte> m) {
 582         return blend(lanewise(op), m);


































 583     }
 584 
 585     // Binary lanewise support
 586 
 587     /**
 588      * {@inheritDoc} <!--workaround-->
 589      * @see #lanewise(VectorOperators.Binary,byte)
 590      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 591      */
 592     @Override
 593     public abstract
 594     ByteVector lanewise(VectorOperators.Binary op,
 595                                   Vector<Byte> v);
 596     @ForceInline
 597     final
 598     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 599                                           Vector<Byte> v) {
 600         ByteVector that = (ByteVector) v;
 601         that.check(this);

 602         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 603             if (op == FIRST_NONZERO) {
 604                 // FIXME: Support this in the JIT.
 605                 VectorMask<Byte> thisNZ
 606                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 607                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 608                 op = OR_UNCHECKED;
 609             }
 610             if (opKind(op, VO_SHIFT)) {
 611                 // As per shift specification for Java, mask the shift count.
 612                 // This allows the JIT to ignore some ISA details.
 613                 that = that.lanewise(AND, SHIFT_MASK);
 614             }
 615             if (op == AND_NOT) {
 616                 // FIXME: Support this in the JIT.
 617                 that = that.lanewise(NOT);
 618                 op = AND;
 619             } else if (op == DIV) {
 620                 VectorMask<Byte> eqz = that.eq((byte)0);
 621                 if (eqz.anyTrue()) {
 622                     throw that.divZeroException();
 623                 }
 624             }
 625         }

 626         int opc = opCode(op);
 627         return VectorSupport.binaryOp(
 628             opc, getClass(), byte.class, length(),
 629             this, that,
 630             BIN_IMPL.find(op, opc, (opc_) -> {
 631               switch (opc_) {
 632                 case VECTOR_OP_ADD: return (v0, v1) ->
 633                         v0.bOp(v1, (i, a, b) -> (byte)(a + b));
 634                 case VECTOR_OP_SUB: return (v0, v1) ->
 635                         v0.bOp(v1, (i, a, b) -> (byte)(a - b));
 636                 case VECTOR_OP_MUL: return (v0, v1) ->
 637                         v0.bOp(v1, (i, a, b) -> (byte)(a * b));
 638                 case VECTOR_OP_DIV: return (v0, v1) ->
 639                         v0.bOp(v1, (i, a, b) -> (byte)(a / b));
 640                 case VECTOR_OP_MAX: return (v0, v1) ->
 641                         v0.bOp(v1, (i, a, b) -> (byte)Math.max(a, b));
 642                 case VECTOR_OP_MIN: return (v0, v1) ->
 643                         v0.bOp(v1, (i, a, b) -> (byte)Math.min(a, b));
 644                 case VECTOR_OP_AND: return (v0, v1) ->
 645                         v0.bOp(v1, (i, a, b) -> (byte)(a & b));
 646                 case VECTOR_OP_OR: return (v0, v1) ->
 647                         v0.bOp(v1, (i, a, b) -> (byte)(a | b));
 648                 case VECTOR_OP_XOR: return (v0, v1) ->
 649                         v0.bOp(v1, (i, a, b) -> (byte)(a ^ b));
 650                 case VECTOR_OP_LSHIFT: return (v0, v1) ->
 651                         v0.bOp(v1, (i, a, n) -> (byte)(a << n));
 652                 case VECTOR_OP_RSHIFT: return (v0, v1) ->
 653                         v0.bOp(v1, (i, a, n) -> (byte)(a >> n));
 654                 case VECTOR_OP_URSHIFT: return (v0, v1) ->
 655                         v0.bOp(v1, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 656                 case VECTOR_OP_LROTATE: return (v0, v1) ->
 657                         v0.bOp(v1, (i, a, n) -> rotateLeft(a, (int)n));
 658                 case VECTOR_OP_RROTATE: return (v0, v1) ->
 659                         v0.bOp(v1, (i, a, n) -> rotateRight(a, (int)n));
 660                 default: return null;
 661                 }}));
 662     }
 663     private static final
 664     ImplCache<Binary,BinaryOperator<ByteVector>> BIN_IMPL
 665         = new ImplCache<>(Binary.class, ByteVector.class);
 666 
 667     /**
 668      * {@inheritDoc} <!--workaround-->
 669      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 670      */
 671     @ForceInline
 672     public final
 673     ByteVector lanewise(VectorOperators.Binary op,
 674                                   Vector<Byte> v,
 675                                   VectorMask<Byte> m) {





 676         ByteVector that = (ByteVector) v;
 677         if (op == DIV) {
 678             VectorMask<Byte> eqz = that.eq((byte)0);
 679             if (eqz.and(m).anyTrue()) {
 680                 throw that.divZeroException();























 681             }
 682             // suppress div/0 exceptions in unset lanes
 683             that = that.lanewise(NOT, eqz);
 684             return blend(lanewise(DIV, that), m);
 685         }
 686         return blend(lanewise(op, v), m);











































 687     }

 688     // FIXME: Maybe all of the public final methods in this file (the
 689     // simple ones that just call lanewise) should be pushed down to
 690     // the X-VectorBits template.  They can't optimize properly at
 691     // this level, and must rely on inlining.  Does it work?
 692     // (If it works, of course keep the code here.)
 693 
 694     /**
 695      * Combines the lane values of this vector
 696      * with the value of a broadcast scalar.
 697      *
 698      * This is a lane-wise binary operation which applies
 699      * the selected operation to each lane.
 700      * The return value will be equal to this expression:
 701      * {@code this.lanewise(op, this.broadcast(e))}.
 702      *
 703      * @param op the operation used to process lane values
 704      * @param e the input scalar
 705      * @return the result of applying the operation lane-wise
 706      *         to the two input vectors
 707      * @throws UnsupportedOperationException if this vector does

 730      * This is a masked lane-wise binary operation which applies
 731      * the selected operation to each lane.
 732      * The return value will be equal to this expression:
 733      * {@code this.lanewise(op, this.broadcast(e), m)}.
 734      *
 735      * @param op the operation used to process lane values
 736      * @param e the input scalar
 737      * @param m the mask controlling lane selection
 738      * @return the result of applying the operation lane-wise
 739      *         to the input vector and the scalar
 740      * @throws UnsupportedOperationException if this vector does
 741      *         not support the requested operation
 742      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 743      * @see #lanewise(VectorOperators.Binary,byte)
 744      */
 745     @ForceInline
 746     public final
 747     ByteVector lanewise(VectorOperators.Binary op,
 748                                   byte e,
 749                                   VectorMask<Byte> m) {
 750         return blend(lanewise(op, e), m);






 751     }
 752 
 753     /**
 754      * {@inheritDoc} <!--workaround-->
 755      * @apiNote
 756      * When working with vector subtypes like {@code ByteVector},
 757      * {@linkplain #lanewise(VectorOperators.Binary,byte)
 758      * the more strongly typed method}
 759      * is typically selected.  It can be explicitly selected
 760      * using a cast: {@code v.lanewise(op,(byte)e)}.
 761      * The two expressions will produce numerically identical results.
 762      */
 763     @ForceInline
 764     public final
 765     ByteVector lanewise(VectorOperators.Binary op,
 766                                   long e) {
 767         byte e1 = (byte) e;
 768         if ((long)e1 != e
 769             // allow shift ops to clip down their int parameters
 770             && !(opKind(op, VO_SHIFT) && (int)e1 == e)
 771             ) {
 772             vspecies().checkValue(e);  // for exception
 773         }
 774         return lanewise(op, e1);
 775     }
 776 
 777     /**
 778      * {@inheritDoc} <!--workaround-->
 779      * @apiNote
 780      * When working with vector subtypes like {@code ByteVector},
 781      * {@linkplain #lanewise(VectorOperators.Binary,byte,VectorMask)
 782      * the more strongly typed method}
 783      * is typically selected.  It can be explicitly selected
 784      * using a cast: {@code v.lanewise(op,(byte)e,m)}.
 785      * The two expressions will produce numerically identical results.
 786      */
 787     @ForceInline
 788     public final
 789     ByteVector lanewise(VectorOperators.Binary op,
 790                                   long e, VectorMask<Byte> m) {
 791         return blend(lanewise(op, e), m);






 792     }
 793 
 794     /*package-private*/
 795     abstract ByteVector
 796     lanewiseShift(VectorOperators.Binary op, int e);
 797 
 798     /*package-private*/
 799     @ForceInline
 800     final ByteVector
 801     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 802         // Special handling for these.  FIXME: Refactor?
 803         assert(opKind(op, VO_SHIFT));
 804         // As per shift specification for Java, mask the shift count.
 805         e &= SHIFT_MASK;
 806         int opc = opCode(op);
 807         return VectorSupport.broadcastInt(
 808             opc, getClass(), byte.class, length(),
 809             this, e,
 810             BIN_INT_IMPL.find(op, opc, (opc_) -> {
 811               switch (opc_) {
 812                 case VECTOR_OP_LSHIFT: return (v, n) ->
 813                         v.uOp((i, a) -> (byte)(a << n));
 814                 case VECTOR_OP_RSHIFT: return (v, n) ->
 815                         v.uOp((i, a) -> (byte)(a >> n));
 816                 case VECTOR_OP_URSHIFT: return (v, n) ->
 817                         v.uOp((i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 818                 case VECTOR_OP_LROTATE: return (v, n) ->
 819                         v.uOp((i, a) -> rotateLeft(a, (int)n));
 820                 case VECTOR_OP_RROTATE: return (v, n) ->
 821                         v.uOp((i, a) -> rotateRight(a, (int)n));
 822                 default: return null;
 823                 }}));
 824     }






















 825     private static final
 826     ImplCache<Binary,VectorBroadcastIntOp<ByteVector>> BIN_INT_IMPL
 827         = new ImplCache<>(Binary.class, ByteVector.class);
 828 
















 829     // As per shift specification for Java, mask the shift count.
 830     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 831     // The latter two maskings go beyond the JLS, but seem reasonable
 832     // since our lane types are first-class types, not just dressed
 833     // up ints.
 834     private static final int SHIFT_MASK = (Byte.SIZE - 1);
 835     // Also simulate >>> on sub-word variables with a mask.
 836     private static final int LSHR_SETUP_MASK = ((1 << Byte.SIZE) - 1);
 837 
 838     // Ternary lanewise support
 839 
 840     // Ternary operators come in eight variations:
 841     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 842     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 843 
 844     // It is annoying to support all of these variations of masking
 845     // and broadcast, but it would be more surprising not to continue
 846     // the obvious pattern started by unary and binary.
 847 
 848    /**

 861                                                   Vector<Byte> v2);
 862     @ForceInline
 863     final
 864     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
 865                                           Vector<Byte> v1,
 866                                           Vector<Byte> v2) {
 867         ByteVector that = (ByteVector) v1;
 868         ByteVector tother = (ByteVector) v2;
 869         // It's a word: https://www.dictionary.com/browse/tother
 870         // See also Chapter 11 of Dickens, Our Mutual Friend:
 871         // "Totherest Governor," replied Mr Riderhood...
 872         that.check(this);
 873         tother.check(this);
 874         if (op == BITWISE_BLEND) {
 875             // FIXME: Support this in the JIT.
 876             that = this.lanewise(XOR, that).lanewise(AND, tother);
 877             return this.lanewise(XOR, that);
 878         }
 879         int opc = opCode(op);
 880         return VectorSupport.ternaryOp(
 881             opc, getClass(), byte.class, length(),
 882             this, that, tother,
 883             TERN_IMPL.find(op, opc, (opc_) -> {
 884               switch (opc_) {
 885                 default: return null;
 886                 }}));
 887     }
 888     private static final
 889     ImplCache<Ternary,TernaryOperation<ByteVector>> TERN_IMPL
 890         = new ImplCache<>(Ternary.class, ByteVector.class);
 891 
 892     /**
 893      * {@inheritDoc} <!--workaround-->
 894      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
 895      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
 896      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
 897      */
 898     @ForceInline
 899     public final
 900     ByteVector lanewise(VectorOperators.Ternary op,
 901                                   Vector<Byte> v1,
 902                                   Vector<Byte> v2,
 903                                   VectorMask<Byte> m) {
 904         return blend(lanewise(op, v1, v2), m);



































 905     }
 906 
 907     /**
 908      * Combines the lane values of this vector
 909      * with the values of two broadcast scalars.
 910      *
 911      * This is a lane-wise ternary operation which applies
 912      * the selected operation to each lane.
 913      * The return value will be equal to this expression:
 914      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
 915      *
 916      * @param op the operation used to combine lane values
 917      * @param e1 the first input scalar
 918      * @param e2 the second input scalar
 919      * @return the result of applying the operation lane-wise
 920      *         to the input vector and the scalars
 921      * @throws UnsupportedOperationException if this vector does
 922      *         not support the requested operation
 923      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 924      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)

 941      * The return value will be equal to this expression:
 942      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
 943      *
 944      * @param op the operation used to combine lane values
 945      * @param e1 the first input scalar
 946      * @param e2 the second input scalar
 947      * @param m the mask controlling lane selection
 948      * @return the result of applying the operation lane-wise
 949      *         to the input vector and the scalars
 950      * @throws UnsupportedOperationException if this vector does
 951      *         not support the requested operation
 952      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
 953      * @see #lanewise(VectorOperators.Ternary,byte,byte)
 954      */
 955     @ForceInline
 956     public final
 957     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
 958                                   byte e1,
 959                                   byte e2,
 960                                   VectorMask<Byte> m) {
 961         return blend(lanewise(op, e1, e2), m);
 962     }
 963 
 964     /**
 965      * Combines the lane values of this vector
 966      * with the values of another vector and a broadcast scalar.
 967      *
 968      * This is a lane-wise ternary operation which applies
 969      * the selected operation to each lane.
 970      * The return value will be equal to this expression:
 971      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
 972      *
 973      * @param op the operation used to combine lane values
 974      * @param v1 the other input vector
 975      * @param e2 the input scalar
 976      * @return the result of applying the operation lane-wise
 977      *         to the input vectors and the scalar
 978      * @throws UnsupportedOperationException if this vector does
 979      *         not support the requested operation
 980      * @see #lanewise(VectorOperators.Ternary,byte,byte)
 981      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)

 999      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1000      *
1001      * @param op the operation used to combine lane values
1002      * @param v1 the other input vector
1003      * @param e2 the input scalar
1004      * @param m the mask controlling lane selection
1005      * @return the result of applying the operation lane-wise
1006      *         to the input vectors and the scalar
1007      * @throws UnsupportedOperationException if this vector does
1008      *         not support the requested operation
1009      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1010      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1011      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
1012      */
1013     @ForceInline
1014     public final
1015     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1016                                   Vector<Byte> v1,
1017                                   byte e2,
1018                                   VectorMask<Byte> m) {
1019         return blend(lanewise(op, v1, e2), m);
1020     }
1021 
1022     /**
1023      * Combines the lane values of this vector
1024      * with the values of another vector and a broadcast scalar.
1025      *
1026      * This is a lane-wise ternary operation which applies
1027      * the selected operation to each lane.
1028      * The return value will be equal to this expression:
1029      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1030      *
1031      * @param op the operation used to combine lane values
1032      * @param e1 the input scalar
1033      * @param v2 the other input vector
1034      * @return the result of applying the operation lane-wise
1035      *         to the input vectors and the scalar
1036      * @throws UnsupportedOperationException if this vector does
1037      *         not support the requested operation
1038      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1039      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)

1056      * The return value will be equal to this expression:
1057      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1058      *
1059      * @param op the operation used to combine lane values
1060      * @param e1 the input scalar
1061      * @param v2 the other input vector
1062      * @param m the mask controlling lane selection
1063      * @return the result of applying the operation lane-wise
1064      *         to the input vectors and the scalar
1065      * @throws UnsupportedOperationException if this vector does
1066      *         not support the requested operation
1067      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1068      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
1069      */
1070     @ForceInline
1071     public final
1072     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1073                                   byte e1,
1074                                   Vector<Byte> v2,
1075                                   VectorMask<Byte> m) {
1076         return blend(lanewise(op, e1, v2), m);
1077     }
1078 
1079     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1080     // https://en.wikipedia.org/wiki/Ogdoad
1081 
1082     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1083     //
1084     // These include masked and non-masked versions.
1085     // This subclass adds broadcast (masked or not).
1086 
1087     /**
1088      * {@inheritDoc} <!--workaround-->
1089      * @see #add(byte)
1090      */
1091     @Override
1092     @ForceInline
1093     public final ByteVector add(Vector<Byte> v) {
1094         return lanewise(ADD, v);
1095     }
1096 

1728     @Override
1729     @ForceInline
1730     public final
1731     VectorMask<Byte> test(VectorOperators.Test op,
1732                                   VectorMask<Byte> m) {
1733         return test(op).and(m);
1734     }
1735 
1736     /**
1737      * {@inheritDoc} <!--workaround-->
1738      */
1739     @Override
1740     public abstract
1741     VectorMask<Byte> compare(VectorOperators.Comparison op, Vector<Byte> v);
1742 
1743     /*package-private*/
1744     @ForceInline
1745     final
1746     <M extends VectorMask<Byte>>
1747     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v) {
1748         Objects.requireNonNull(v);
1749         ByteSpecies vsp = vspecies();
1750         ByteVector that = (ByteVector) v;
1751         that.check(this);
1752         int opc = opCode(op);
1753         return VectorSupport.compare(
1754             opc, getClass(), maskType, byte.class, length(),
1755             this, that,
1756             (cond, v0, v1) -> {
1757                 AbstractMask<Byte> m
1758                     = v0.bTest(cond, v1, (cond_, i, a, b)
1759                                -> compareWithOp(cond, a, b));
1760                 @SuppressWarnings("unchecked")
1761                 M m2 = (M) m;
1762                 return m2;
1763             });
1764     }
1765 






















1766     @ForceInline
1767     private static boolean compareWithOp(int cond, byte a, byte b) {
1768         return switch (cond) {
1769             case BT_eq -> a == b;
1770             case BT_ne -> a != b;
1771             case BT_lt -> a < b;
1772             case BT_le -> a <= b;
1773             case BT_gt -> a > b;
1774             case BT_ge -> a >= b;
1775             case BT_ult -> Byte.compareUnsigned(a, b) < 0;
1776             case BT_ule -> Byte.compareUnsigned(a, b) <= 0;
1777             case BT_ugt -> Byte.compareUnsigned(a, b) > 0;
1778             case BT_uge -> Byte.compareUnsigned(a, b) >= 0;
1779             default -> throw new AssertionError();
1780         };
1781     }
1782 
1783     /**
1784      * {@inheritDoc} <!--workaround-->
1785      */
1786     @Override
1787     @ForceInline
1788     public final
1789     VectorMask<Byte> compare(VectorOperators.Comparison op,
1790                                   Vector<Byte> v,
1791                                   VectorMask<Byte> m) {
1792         return compare(op, v).and(m);
1793     }
1794 
1795     /**
1796      * Tests this vector by comparing it with an input scalar,
1797      * according to the given comparison operation.
1798      *
1799      * This is a lane-wise binary test operation which applies
1800      * the comparison operation to each lane.
1801      * <p>
1802      * The result is the same as
1803      * {@code compare(op, broadcast(species(), e))}.
1804      * That is, the scalar may be regarded as broadcast to
1805      * a vector of the same species, and then compared
1806      * against the original vector, using the selected
1807      * comparison operation.
1808      *
1809      * @param op the operation used to compare lane values
1810      * @param e the input scalar
1811      * @return the mask result of testing lane-wise if this vector
1812      *         compares to the input, according to the selected
1813      *         comparison operator
1814      * @see ByteVector#compare(VectorOperators.Comparison,Vector)

1833      *
1834      * This is a masked lane-wise binary test operation which applies
1835      * to each pair of corresponding lane values.
1836      *
1837      * The returned result is equal to the expression
1838      * {@code compare(op,s).and(m)}.
1839      *
1840      * @param op the operation used to compare lane values
1841      * @param e the input scalar
1842      * @param m the mask controlling lane selection
1843      * @return the mask result of testing lane-wise if this vector
1844      *         compares to the input, according to the selected
1845      *         comparison operator,
1846      *         and only in the lanes selected by the mask
1847      * @see ByteVector#compare(VectorOperators.Comparison,Vector,VectorMask)
1848      */
1849     @ForceInline
1850     public final VectorMask<Byte> compare(VectorOperators.Comparison op,
1851                                                byte e,
1852                                                VectorMask<Byte> m) {
1853         return compare(op, e).and(m);
1854     }
1855 
1856     /**
1857      * {@inheritDoc} <!--workaround-->
1858      */
1859     @Override
1860     public abstract
1861     VectorMask<Byte> compare(Comparison op, long e);
1862 
1863     /*package-private*/
1864     @ForceInline
1865     final
1866     <M extends VectorMask<Byte>>
1867     M compareTemplate(Class<M> maskType, Comparison op, long e) {
1868         return compareTemplate(maskType, op, broadcast(e));
1869     }
1870 
1871     /**
1872      * {@inheritDoc} <!--workaround-->
1873      */

2084     wrongPartForSlice(int part) {
2085         String msg = String.format("bad part number %d for slice operation",
2086                                    part);
2087         return new ArrayIndexOutOfBoundsException(msg);
2088     }
2089 
2090     /**
2091      * {@inheritDoc} <!--workaround-->
2092      */
2093     @Override
2094     public abstract
2095     ByteVector rearrange(VectorShuffle<Byte> m);
2096 
2097     /*package-private*/
2098     @ForceInline
2099     final
2100     <S extends VectorShuffle<Byte>>
2101     ByteVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2102         shuffle.checkIndexes();
2103         return VectorSupport.rearrangeOp(
2104             getClass(), shuffletype, byte.class, length(),
2105             this, shuffle,
2106             (v1, s_) -> v1.uOp((i, a) -> {
2107                 int ei = s_.laneSource(i);
2108                 return v1.lane(ei);
2109             }));
2110     }
2111 
2112     /**
2113      * {@inheritDoc} <!--workaround-->
2114      */
2115     @Override
2116     public abstract
2117     ByteVector rearrange(VectorShuffle<Byte> s,
2118                                    VectorMask<Byte> m);
2119 
2120     /*package-private*/
2121     @ForceInline
2122     final
2123     <S extends VectorShuffle<Byte>>
2124     ByteVector rearrangeTemplate(Class<S> shuffletype,

2125                                            S shuffle,
2126                                            VectorMask<Byte> m) {
2127         ByteVector unmasked =
2128             VectorSupport.rearrangeOp(
2129                 getClass(), shuffletype, byte.class, length(),
2130                 this, shuffle,
2131                 (v1, s_) -> v1.uOp((i, a) -> {
2132                     int ei = s_.laneSource(i);
2133                     return ei < 0 ? 0 : v1.lane(ei);
2134                 }));
2135         VectorMask<Byte> valid = shuffle.laneIsValid();
2136         if (m.andNot(valid).anyTrue()) {
2137             shuffle.checkIndexes();
2138             throw new AssertionError();
2139         }
2140         return broadcast((byte)0).blend(unmasked, m);






2141     }
2142 
2143     /**
2144      * {@inheritDoc} <!--workaround-->
2145      */
2146     @Override
2147     public abstract
2148     ByteVector rearrange(VectorShuffle<Byte> s,
2149                                    Vector<Byte> v);
2150 
2151     /*package-private*/
2152     @ForceInline
2153     final
2154     <S extends VectorShuffle<Byte>>
2155     ByteVector rearrangeTemplate(Class<S> shuffletype,
2156                                            S shuffle,
2157                                            ByteVector v) {
2158         VectorMask<Byte> valid = shuffle.laneIsValid();
2159         @SuppressWarnings("unchecked")
2160         S ws = (S) shuffle.wrapIndexes();
2161         ByteVector r0 =
2162             VectorSupport.rearrangeOp(
2163                 getClass(), shuffletype, byte.class, length(),
2164                 this, ws,
2165                 (v0, s_) -> v0.uOp((i, a) -> {
2166                     int ei = s_.laneSource(i);
2167                     return v0.lane(ei);
2168                 }));
2169         ByteVector r1 =
2170             VectorSupport.rearrangeOp(
2171                 getClass(), shuffletype, byte.class, length(),
2172                 v, ws,
2173                 (v1, s_) -> v1.uOp((i, a) -> {
2174                     int ei = s_.laneSource(i);
2175                     return v1.lane(ei);
2176                 }));
2177         return r1.blend(r0, valid);
2178     }
2179 
2180     @ForceInline
2181     private final
2182     VectorShuffle<Byte> toShuffle0(ByteSpecies dsp) {
2183         byte[] a = toArray();
2184         int[] sa = new int[a.length];
2185         for (int i = 0; i < a.length; i++) {
2186             sa[i] = (int) a[i];
2187         }
2188         return VectorShuffle.fromArray(dsp, sa, 0);
2189     }
2190 
2191     /*package-private*/
2192     @ForceInline
2193     final

2416      * <li>
2417      * All other reduction operations are fully commutative and
2418      * associative.  The implementation can choose any order of
2419      * processing, yet it will always produce the same result.
2420      * </ul>
2421      *
2422      * @param op the operation used to combine lane values
2423      * @param m the mask controlling lane selection
2424      * @return the reduced result accumulated from the selected lane values
2425      * @throws UnsupportedOperationException if this vector does
2426      *         not support the requested operation
2427      * @see #reduceLanes(VectorOperators.Associative)
2428      */
2429     public abstract byte reduceLanes(VectorOperators.Associative op,
2430                                        VectorMask<Byte> m);
2431 
2432     /*package-private*/
2433     @ForceInline
2434     final
2435     byte reduceLanesTemplate(VectorOperators.Associative op,

2436                                VectorMask<Byte> m) {
2437         ByteVector v = reduceIdentityVector(op).blend(this, m);
2438         return v.reduceLanesTemplate(op);








2439     }
2440 
2441     /*package-private*/
2442     @ForceInline
2443     final
2444     byte reduceLanesTemplate(VectorOperators.Associative op) {
2445         if (op == FIRST_NONZERO) {
2446             // FIXME:  The JIT should handle this, and other scan ops alos.
2447             VectorMask<Byte> thisNZ
2448                 = this.viewAsIntegralLanes().compare(NE, (byte) 0);
2449             return this.lane(thisNZ.firstTrue());
2450         }
2451         int opc = opCode(op);
2452         return fromBits(VectorSupport.reductionCoerced(
2453             opc, getClass(), byte.class, length(),
2454             this,
2455             REDUCE_IMPL.find(op, opc, (opc_) -> {
2456               switch (opc_) {
2457               case VECTOR_OP_ADD: return v ->
2458                       toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a + b)));
2459               case VECTOR_OP_MUL: return v ->
2460                       toBits(v.rOp((byte)1, (i, a, b) -> (byte)(a * b)));
2461               case VECTOR_OP_MIN: return v ->
2462                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (byte) Math.min(a, b)));
2463               case VECTOR_OP_MAX: return v ->
2464                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (byte) Math.max(a, b)));
2465               case VECTOR_OP_AND: return v ->
2466                       toBits(v.rOp((byte)-1, (i, a, b) -> (byte)(a & b)));
2467               case VECTOR_OP_OR: return v ->
2468                       toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a | b)));
2469               case VECTOR_OP_XOR: return v ->
2470                       toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a ^ b)));
2471               default: return null;
2472               }})));
2473     }

2474     private static final
2475     ImplCache<Associative,Function<ByteVector,Long>> REDUCE_IMPL
2476         = new ImplCache<>(Associative.class, ByteVector.class);




















2477 
2478     private
2479     @ForceInline
2480     ByteVector reduceIdentityVector(VectorOperators.Associative op) {
2481         int opc = opCode(op);
2482         UnaryOperator<ByteVector> fn
2483             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2484                 switch (opc_) {
2485                 case VECTOR_OP_ADD:
2486                 case VECTOR_OP_OR:
2487                 case VECTOR_OP_XOR:
2488                     return v -> v.broadcast(0);
2489                 case VECTOR_OP_MUL:
2490                     return v -> v.broadcast(1);
2491                 case VECTOR_OP_AND:
2492                     return v -> v.broadcast(-1);
2493                 case VECTOR_OP_MIN:
2494                     return v -> v.broadcast(MAX_OR_INF);
2495                 case VECTOR_OP_MAX:
2496                     return v -> v.broadcast(MIN_OR_INF);

2682      * @param species species of desired vector
2683      * @param a the byte array
2684      * @param offset the offset into the array
2685      * @param bo the intended byte order
2686      * @param m the mask controlling lane selection
2687      * @return a vector loaded from a byte array
2688      * @throws IndexOutOfBoundsException
2689      *         if {@code offset+N*ESIZE < 0}
2690      *         or {@code offset+(N+1)*ESIZE > a.length}
2691      *         for any lane {@code N} in the vector
2692      *         where the mask is set
2693      */
2694     @ForceInline
2695     public static
2696     ByteVector fromByteArray(VectorSpecies<Byte> species,
2697                                        byte[] a, int offset,
2698                                        ByteOrder bo,
2699                                        VectorMask<Byte> m) {
2700         ByteSpecies vsp = (ByteSpecies) species;
2701         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2702             ByteVector zero = vsp.zero();
2703             ByteVector v = zero.fromByteArray0(a, offset);
2704             return zero.blend(v.maybeSwap(bo), m);
2705         }
2706 
2707         // FIXME: optimize
2708         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2709         ByteBuffer wb = wrapper(a, bo);
2710         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
2711                    (wb_, o, i)  -> wb_.get(o + i * 1));
2712     }
2713 
2714     /**
2715      * Loads a vector from an array of type {@code byte[]}
2716      * starting at an offset.
2717      * For each vector lane, where {@code N} is the vector lane index, the
2718      * array element at index {@code offset + N} is placed into the
2719      * resulting vector at lane index {@code N}.
2720      *
2721      * @param species species of desired vector
2722      * @param a the array
2723      * @param offset the offset into the array
2724      * @return the vector loaded from an array

2746      * {@code N}, otherwise the default element value is placed into the
2747      * resulting vector at lane index {@code N}.
2748      *
2749      * @param species species of desired vector
2750      * @param a the array
2751      * @param offset the offset into the array
2752      * @param m the mask controlling lane selection
2753      * @return the vector loaded from an array
2754      * @throws IndexOutOfBoundsException
2755      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2756      *         for any lane {@code N} in the vector
2757      *         where the mask is set
2758      */
2759     @ForceInline
2760     public static
2761     ByteVector fromArray(VectorSpecies<Byte> species,
2762                                    byte[] a, int offset,
2763                                    VectorMask<Byte> m) {
2764         ByteSpecies vsp = (ByteSpecies) species;
2765         if (offset >= 0 && offset <= (a.length - species.length())) {
2766             ByteVector zero = vsp.zero();
2767             return zero.blend(zero.fromArray0(a, offset), m);
2768         }
2769 
2770         // FIXME: optimize
2771         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2772         return vsp.vOp(m, i -> a[offset + i]);
2773     }
2774 
2775     /**
2776      * Gathers a new vector composed of elements from an array of type
2777      * {@code byte[]},
2778      * using indexes obtained by adding a fixed {@code offset} to a
2779      * series of secondary offsets from an <em>index map</em>.
2780      * The index map is a contiguous sequence of {@code VLENGTH}
2781      * elements in a second array of {@code int}s, starting at a given
2782      * {@code mapOffset}.
2783      * <p>
2784      * For each vector lane, where {@code N} is the vector lane index,
2785      * the lane is loaded from the array
2786      * element {@code a[f(N)]}, where {@code f(N)} is the
2787      * index mapping expression

2904      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
2905      *
2906      * @param species species of desired vector
2907      * @param a the array
2908      * @param offset the offset into the array
2909      * @param m the mask controlling lane selection
2910      * @return the vector loaded from an array
2911      * @throws IndexOutOfBoundsException
2912      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2913      *         for any lane {@code N} in the vector
2914      *         where the mask is set
2915      */
2916     @ForceInline
2917     public static
2918     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
2919                                           boolean[] a, int offset,
2920                                           VectorMask<Byte> m) {
2921         ByteSpecies vsp = (ByteSpecies) species;
2922         if (offset >= 0 && offset <= (a.length - species.length())) {
2923             ByteVector zero = vsp.zero();
2924             return zero.blend(zero.fromBooleanArray0(a, offset), m);
2925         }
2926 
2927         // FIXME: optimize
2928         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2929         return vsp.vOp(m, i -> (byte) (a[offset + i] ? 1 : 0));
2930     }
2931 
2932     /**
2933      * Gathers a new vector composed of elements from an array of type
2934      * {@code boolean[]},
2935      * using indexes obtained by adding a fixed {@code offset} to a
2936      * series of secondary offsets from an <em>index map</em>.
2937      * The index map is a contiguous sequence of {@code VLENGTH}
2938      * elements in a second array of {@code int}s, starting at a given
2939      * {@code mapOffset}.
2940      * <p>
2941      * For each vector lane, where {@code N} is the vector lane index,
2942      * the lane is loaded from the expression
2943      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
2944      * index mapping expression

3082      * @param species species of desired vector
3083      * @param bb the byte buffer
3084      * @param offset the offset into the byte buffer
3085      * @param bo the intended byte order
3086      * @param m the mask controlling lane selection
3087      * @return a vector loaded from a byte buffer
3088      * @throws IndexOutOfBoundsException
3089      *         if {@code offset+N*1 < 0}
3090      *         or {@code offset+N*1 >= bb.limit()}
3091      *         for any lane {@code N} in the vector
3092      *         where the mask is set
3093      */
3094     @ForceInline
3095     public static
3096     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3097                                         ByteBuffer bb, int offset,
3098                                         ByteOrder bo,
3099                                         VectorMask<Byte> m) {
3100         ByteSpecies vsp = (ByteSpecies) species;
3101         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
3102             ByteVector zero = vsp.zero();
3103             ByteVector v = zero.fromByteBuffer0(bb, offset);
3104             return zero.blend(v.maybeSwap(bo), m);
3105         }
3106 
3107         // FIXME: optimize
3108         checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3109         ByteBuffer wb = wrapper(bb, bo);
3110         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
3111                    (wb_, o, i)  -> wb_.get(o + i * 1));
3112     }
3113 
3114     // Memory store operations
3115 
3116     /**
3117      * Stores this vector into an array of type {@code byte[]}
3118      * starting at an offset.
3119      * <p>
3120      * For each vector lane, where {@code N} is the vector lane index,
3121      * the lane element at index {@code N} is stored into the array
3122      * element {@code a[offset+N]}.
3123      *
3124      * @param a the array, of type {@code byte[]}

3156      * Lanes where the mask is unset are not stored and do not need
3157      * to correspond to legitimate elements of {@code a}.
3158      * That is, unset lanes may correspond to array indexes less than
3159      * zero or beyond the end of the array.
3160      *
3161      * @param a the array, of type {@code byte[]}
3162      * @param offset the offset into the array
3163      * @param m the mask controlling lane storage
3164      * @throws IndexOutOfBoundsException
3165      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3166      *         for any lane {@code N} in the vector
3167      *         where the mask is set
3168      */
3169     @ForceInline
3170     public final
3171     void intoArray(byte[] a, int offset,
3172                    VectorMask<Byte> m) {
3173         if (m.allTrue()) {
3174             intoArray(a, offset);
3175         } else {
3176             // FIXME: optimize
3177             ByteSpecies vsp = vspecies();
3178             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3179             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
3180         }
3181     }
3182 
3183     /**
3184      * Scatters this vector into an array of type {@code byte[]}
3185      * using indexes obtained by adding a fixed {@code offset} to a
3186      * series of secondary offsets from an <em>index map</em>.
3187      * The index map is a contiguous sequence of {@code VLENGTH}
3188      * elements in a second array of {@code int}s, starting at a given
3189      * {@code mapOffset}.
3190      * <p>
3191      * For each vector lane, where {@code N} is the vector lane index,
3192      * the lane element at index {@code N} is stored into the array
3193      * element {@code a[f(N)]}, where {@code f(N)} is the
3194      * index mapping expression
3195      * {@code offset + indexMap[mapOffset + N]]}.
3196      *
3197      * @param a the array
3198      * @param offset an offset to combine with the index map offsets
3199      * @param indexMap the index map

3312      * Lanes where the mask is unset are not stored and do not need
3313      * to correspond to legitimate elements of {@code a}.
3314      * That is, unset lanes may correspond to array indexes less than
3315      * zero or beyond the end of the array.
3316      *
3317      * @param a the array
3318      * @param offset the offset into the array
3319      * @param m the mask controlling lane storage
3320      * @throws IndexOutOfBoundsException
3321      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3322      *         for any lane {@code N} in the vector
3323      *         where the mask is set
3324      */
3325     @ForceInline
3326     public final
3327     void intoBooleanArray(boolean[] a, int offset,
3328                           VectorMask<Byte> m) {
3329         if (m.allTrue()) {
3330             intoBooleanArray(a, offset);
3331         } else {
3332             // FIXME: optimize
3333             ByteSpecies vsp = vspecies();
3334             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3335             stOp(a, offset, m, (arr, off, i, e) -> arr[off+i] = (e & 1) != 0);
3336         }
3337     }
3338 
3339     /**
3340      * Scatters this vector into an array of type {@code boolean[]}
3341      * using indexes obtained by adding a fixed {@code offset} to a
3342      * series of secondary offsets from an <em>index map</em>.
3343      * The index map is a contiguous sequence of {@code VLENGTH}
3344      * elements in a second array of {@code int}s, starting at a given
3345      * {@code mapOffset}.
3346      * <p>
3347      * For each vector lane, where {@code N} is the vector lane index,
3348      * the lane element at index {@code N}
3349      * is first converted to a {@code boolean} value and then
3350      * stored into the array
3351      * element {@code a[f(N)]}, where {@code f(N)} is the
3352      * index mapping expression
3353      * {@code offset + indexMap[mapOffset + N]]}.
3354      * <p>
3355      * A {@code byte} value is converted to a {@code boolean} value by applying the

3434     @ForceInline
3435     public final
3436     void intoByteArray(byte[] a, int offset,
3437                        ByteOrder bo) {
3438         offset = checkFromIndexSize(offset, byteSize(), a.length);
3439         maybeSwap(bo).intoByteArray0(a, offset);
3440     }
3441 
3442     /**
3443      * {@inheritDoc} <!--workaround-->
3444      */
3445     @Override
3446     @ForceInline
3447     public final
3448     void intoByteArray(byte[] a, int offset,
3449                        ByteOrder bo,
3450                        VectorMask<Byte> m) {
3451         if (m.allTrue()) {
3452             intoByteArray(a, offset, bo);
3453         } else {
3454             // FIXME: optimize
3455             ByteSpecies vsp = vspecies();
3456             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3457             ByteBuffer wb = wrapper(a, bo);
3458             this.stOp(wb, offset, m,
3459                     (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3460         }
3461     }
3462 
3463     /**
3464      * {@inheritDoc} <!--workaround-->
3465      */
3466     @Override
3467     @ForceInline
3468     public final
3469     void intoByteBuffer(ByteBuffer bb, int offset,
3470                         ByteOrder bo) {
3471         if (bb.isReadOnly()) {
3472             throw new ReadOnlyBufferException();
3473         }
3474         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3475         maybeSwap(bo).intoByteBuffer0(bb, offset);
3476     }
3477 
3478     /**
3479      * {@inheritDoc} <!--workaround-->
3480      */
3481     @Override
3482     @ForceInline
3483     public final
3484     void intoByteBuffer(ByteBuffer bb, int offset,
3485                         ByteOrder bo,
3486                         VectorMask<Byte> m) {
3487         if (m.allTrue()) {
3488             intoByteBuffer(bb, offset, bo);
3489         } else {
3490             // FIXME: optimize
3491             if (bb.isReadOnly()) {
3492                 throw new ReadOnlyBufferException();
3493             }
3494             ByteSpecies vsp = vspecies();
3495             checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3496             ByteBuffer wb = wrapper(bb, bo);
3497             this.stOp(wb, offset, m,
3498                     (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3499         }
3500     }
3501 
3502     // ================================================
3503 
3504     // Low-level memory operations.
3505     //
3506     // Note that all of these operations *must* inline into a context
3507     // where the exact species of the involved vector is a
3508     // compile-time constant.  Otherwise, the intrinsic generation
3509     // will fail and performance will suffer.
3510     //
3511     // In many cases this is achieved by re-deriving a version of the
3512     // method in each concrete subclass (per species).  The re-derived
3513     // method simply calls one of these generic methods, with exact
3514     // parameters for the controlling metadata, which is either a
3515     // typed vector or constant species instance.
3516 
3517     // Unchecked loading operations in native byte order.
3518     // Caller is responsible for applying index checks, masking, and
3519     // byte swapping.
3520 
3521     /*package-private*/
3522     abstract
3523     ByteVector fromArray0(byte[] a, int offset);
3524     @ForceInline
3525     final
3526     ByteVector fromArray0Template(byte[] a, int offset) {
3527         ByteSpecies vsp = vspecies();
3528         return VectorSupport.load(
3529             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3530             a, arrayAddress(a, offset),
3531             a, offset, vsp,
3532             (arr, off, s) -> s.ldOp(arr, off,
3533                                     (arr_, off_, i) -> arr_[off_ + i]));
3534     }
3535 


















3536 
3537     /*package-private*/
3538     abstract
3539     ByteVector fromBooleanArray0(boolean[] a, int offset);
3540     @ForceInline
3541     final
3542     ByteVector fromBooleanArray0Template(boolean[] a, int offset) {
3543         ByteSpecies vsp = vspecies();
3544         return VectorSupport.load(
3545             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3546             a, booleanArrayAddress(a, offset),
3547             a, offset, vsp,
3548             (arr, off, s) -> s.ldOp(arr, off,
3549                                     (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3550     }
3551 

















3552     @Override
3553     abstract
3554     ByteVector fromByteArray0(byte[] a, int offset);
3555     @ForceInline
3556     final
3557     ByteVector fromByteArray0Template(byte[] a, int offset) {
3558         ByteSpecies vsp = vspecies();
3559         return VectorSupport.load(
3560             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3561             a, byteArrayAddress(a, offset),
3562             a, offset, vsp,
3563             (arr, off, s) -> {
3564                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3565                 return s.ldOp(wb, off,
3566                         (wb_, o, i) -> wb_.get(o + i * 1));
3567             });
3568     }
3569 



















3570     abstract
3571     ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
3572     @ForceInline
3573     final
3574     ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3575         ByteSpecies vsp = vspecies();
3576         return ScopedMemoryAccess.loadFromByteBuffer(
3577                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3578                 bb, offset, vsp,
3579                 (buf, off, s) -> {
3580                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3581                     return s.ldOp(wb, off,
3582                             (wb_, o, i) -> wb_.get(o + i * 1));
3583                 });
3584     }
3585 


















3586     // Unchecked storing operations in native byte order.
3587     // Caller is responsible for applying index checks, masking, and
3588     // byte swapping.
3589 
3590     abstract
3591     void intoArray0(byte[] a, int offset);
3592     @ForceInline
3593     final
3594     void intoArray0Template(byte[] a, int offset) {
3595         ByteSpecies vsp = vspecies();
3596         VectorSupport.store(
3597             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3598             a, arrayAddress(a, offset),
3599             this, a, offset,
3600             (arr, off, v)
3601             -> v.stOp(arr, off,
3602                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3603     }
3604 




































3605     abstract
3606     void intoByteArray0(byte[] a, int offset);
3607     @ForceInline
3608     final
3609     void intoByteArray0Template(byte[] a, int offset) {
3610         ByteSpecies vsp = vspecies();
3611         VectorSupport.store(
3612             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3613             a, byteArrayAddress(a, offset),
3614             this, a, offset,
3615             (arr, off, v) -> {
3616                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3617                 v.stOp(wb, off,
3618                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
3619             });
3620     }
3621 



















3622     @ForceInline
3623     final
3624     void intoByteBuffer0(ByteBuffer bb, int offset) {
3625         ByteSpecies vsp = vspecies();
3626         ScopedMemoryAccess.storeIntoByteBuffer(
3627                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3628                 this, bb, offset,
3629                 (buf, off, v) -> {
3630                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3631                     v.stOp(wb, off,
3632                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3633                 });
3634     }
3635 



















3636     // End of low-level memory operations.
3637 
3638     private static
3639     void checkMaskFromIndexSize(int offset,
3640                                 ByteSpecies vsp,
3641                                 VectorMask<Byte> m,
3642                                 int scale,
3643                                 int limit) {
3644         ((AbstractMask<Byte>)m)
3645             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3646     }
3647 
3648     @ForceInline
3649     private void conditionalStoreNYI(int offset,
3650                                      ByteSpecies vsp,
3651                                      VectorMask<Byte> m,
3652                                      int scale,
3653                                      int limit) {
3654         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3655             String msg =

3942             byte[] res = new byte[laneCount()];
3943             boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
3944             for (int i = 0; i < res.length; i++) {
3945                 if (mbits[i]) {
3946                     res[i] = f.apply(i);
3947                 }
3948             }
3949             return dummyVector().vectorFactory(res);
3950         }
3951 
3952         /*package-private*/
3953         @ForceInline
3954         <M> ByteVector ldOp(M memory, int offset,
3955                                       FLdOp<M> f) {
3956             return dummyVector().ldOp(memory, offset, f);
3957         }
3958 
3959         /*package-private*/
3960         @ForceInline
3961         <M> ByteVector ldOp(M memory, int offset,
3962                                       AbstractMask<Byte> m,
3963                                       FLdOp<M> f) {
3964             return dummyVector().ldOp(memory, offset, m, f);
3965         }
3966 
3967         /*package-private*/
3968         @ForceInline
3969         <M> void stOp(M memory, int offset, FStOp<M> f) {
3970             dummyVector().stOp(memory, offset, f);
3971         }
3972 
3973         /*package-private*/
3974         @ForceInline
3975         <M> void stOp(M memory, int offset,
3976                       AbstractMask<Byte> m,
3977                       FStOp<M> f) {
3978             dummyVector().stOp(memory, offset, m, f);
3979         }
3980 
3981         // N.B. Make sure these constant vectors and
3982         // masks load up correctly into registers.

  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;

  32 import java.util.function.Function;
  33 import java.util.function.UnaryOperator;
  34 
  35 import jdk.internal.misc.ScopedMemoryAccess;
  36 import jdk.internal.misc.Unsafe;
  37 import jdk.internal.vm.annotation.ForceInline;
  38 import jdk.internal.vm.vector.VectorSupport;
  39 
  40 import static jdk.internal.vm.vector.VectorSupport.*;
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 
  43 import static jdk.incubator.vector.VectorOperators.*;
  44 
  45 // -- This file was mechanically generated: Do not edit! -- //
  46 
  47 /**
  48  * A specialized {@link Vector} representing an ordered immutable sequence of
  49  * {@code byte} values.
  50  */
  51 @SuppressWarnings("cast")  // warning: redundant cast

 155     ByteVector uOp(FUnOp f);
 156     @ForceInline
 157     final
 158     ByteVector uOpTemplate(FUnOp f) {
 159         byte[] vec = vec();
 160         byte[] res = new byte[length()];
 161         for (int i = 0; i < res.length; i++) {
 162             res[i] = f.apply(i, vec[i]);
 163         }
 164         return vectorFactory(res);
 165     }
 166 
 167     /*package-private*/
 168     abstract
 169     ByteVector uOp(VectorMask<Byte> m,
 170                              FUnOp f);
 171     @ForceInline
 172     final
 173     ByteVector uOpTemplate(VectorMask<Byte> m,
 174                                      FUnOp f) {
 175         if (m == null) {
 176             return uOpTemplate(f);
 177         }
 178         byte[] vec = vec();
 179         byte[] res = new byte[length()];
 180         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 181         for (int i = 0; i < res.length; i++) {
 182             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 183         }
 184         return vectorFactory(res);
 185     }
 186 
 187     // Binary operator
 188 
 189     /*package-private*/
 190     interface FBinOp {
 191         byte apply(int i, byte a, byte b);
 192     }
 193 
 194     /*package-private*/
 195     abstract
 196     ByteVector bOp(Vector<Byte> o,
 197                              FBinOp f);

 201                                      FBinOp f) {
 202         byte[] res = new byte[length()];
 203         byte[] vec1 = this.vec();
 204         byte[] vec2 = ((ByteVector)o).vec();
 205         for (int i = 0; i < res.length; i++) {
 206             res[i] = f.apply(i, vec1[i], vec2[i]);
 207         }
 208         return vectorFactory(res);
 209     }
 210 
 211     /*package-private*/
 212     abstract
 213     ByteVector bOp(Vector<Byte> o,
 214                              VectorMask<Byte> m,
 215                              FBinOp f);
 216     @ForceInline
 217     final
 218     ByteVector bOpTemplate(Vector<Byte> o,
 219                                      VectorMask<Byte> m,
 220                                      FBinOp f) {
 221         if (m == null) {
 222             return bOpTemplate(o, f);
 223         }
 224         byte[] res = new byte[length()];
 225         byte[] vec1 = this.vec();
 226         byte[] vec2 = ((ByteVector)o).vec();
 227         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 228         for (int i = 0; i < res.length; i++) {
 229             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 230         }
 231         return vectorFactory(res);
 232     }
 233 
 234     // Ternary operator
 235 
 236     /*package-private*/
 237     interface FTriOp {
 238         byte apply(int i, byte a, byte b, byte c);
 239     }
 240 
 241     /*package-private*/
 242     abstract
 243     ByteVector tOp(Vector<Byte> o1,

 253         byte[] vec2 = ((ByteVector)o1).vec();
 254         byte[] vec3 = ((ByteVector)o2).vec();
 255         for (int i = 0; i < res.length; i++) {
 256             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 257         }
 258         return vectorFactory(res);
 259     }
 260 
 261     /*package-private*/
 262     abstract
 263     ByteVector tOp(Vector<Byte> o1,
 264                              Vector<Byte> o2,
 265                              VectorMask<Byte> m,
 266                              FTriOp f);
 267     @ForceInline
 268     final
 269     ByteVector tOpTemplate(Vector<Byte> o1,
 270                                      Vector<Byte> o2,
 271                                      VectorMask<Byte> m,
 272                                      FTriOp f) {
 273         if (m == null) {
 274             return tOpTemplate(o1, o2, f);
 275         }
 276         byte[] res = new byte[length()];
 277         byte[] vec1 = this.vec();
 278         byte[] vec2 = ((ByteVector)o1).vec();
 279         byte[] vec3 = ((ByteVector)o2).vec();
 280         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 281         for (int i = 0; i < res.length; i++) {
 282             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 283         }
 284         return vectorFactory(res);
 285     }
 286 
 287     // Reduction operator
 288 
 289     /*package-private*/
 290     abstract
 291     byte rOp(byte v, VectorMask<Byte> m, FBinOp f);
 292 
 293     @ForceInline
 294     final
 295     byte rOpTemplate(byte v, VectorMask<Byte> m, FBinOp f) {
 296         if (m == null) {
 297             return rOpTemplate(v, f);
 298         }
 299         byte[] vec = vec();
 300         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 301         for (int i = 0; i < vec.length; i++) {
 302             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
 303         }
 304         return v;
 305     }
 306 
 307     @ForceInline
 308     final
 309     byte rOpTemplate(byte v, FBinOp f) {
 310         byte[] vec = vec();
 311         for (int i = 0; i < vec.length; i++) {
 312             v = f.apply(i, v, vec[i]);
 313         }
 314         return v;
 315     }
 316 
 317     // Memory reference
 318 
 319     /*package-private*/
 320     interface FLdOp<M> {
 321         byte apply(M memory, int offset, int i);
 322     }
 323 
 324     /*package-private*/
 325     @ForceInline
 326     final

 555     final ByteVector broadcastTemplate(long e) {
 556         return vspecies().broadcast(e);
 557     }
 558 
 559     // Unary lanewise support
 560 
 561     /**
 562      * {@inheritDoc} <!--workaround-->
 563      */
 564     public abstract
 565     ByteVector lanewise(VectorOperators.Unary op);
 566 
 567     @ForceInline
 568     final
 569     ByteVector lanewiseTemplate(VectorOperators.Unary op) {
 570         if (opKind(op, VO_SPECIAL)) {
 571             if (op == ZOMO) {
 572                 return blend(broadcast(-1), compare(NE, 0));
 573             }
 574             if (op == NOT) {
 575                 return broadcast(-1).lanewise(XOR, this);
 576             } else if (op == NEG) {
 577                 // FIXME: Support this in the JIT.
 578                 return broadcast(0).lanewise(SUB, this);
 579             }
 580         }
 581         int opc = opCode(op);
 582         return VectorSupport.unaryOp(
 583             opc, getClass(), null, byte.class, length(),
 584             this, null,
 585             UN_IMPL.find(op, opc, ByteVector::unaryOperations));







 586     }



 587 
 588     /**
 589      * {@inheritDoc} <!--workaround-->
 590      */
 591     @Override
 592     public abstract
 593     ByteVector lanewise(VectorOperators.Unary op,
 594                                   VectorMask<Byte> m);
 595     @ForceInline
 596     final
 597     ByteVector lanewiseTemplate(VectorOperators.Unary op,
 598                                           Class<? extends VectorMask<Byte>> maskClass,
 599                                           VectorMask<Byte> m) {
 600         m.check(maskClass, this);
 601         if (opKind(op, VO_SPECIAL)) {
 602             if (op == ZOMO) {
 603                 return blend(broadcast(-1), compare(NE, 0, m));
 604             }
 605             if (op == NOT) {
 606                 return lanewise(XOR, broadcast(-1), m);
 607             } else if (op == NEG) {
 608                 return lanewise(NOT, m).lanewise(ADD, broadcast(1), m);
 609             }
 610         }
 611         int opc = opCode(op);
 612         return VectorSupport.unaryOp(
 613             opc, getClass(), maskClass, byte.class, length(),
 614             this, m,
 615             UN_IMPL.find(op, opc, ByteVector::unaryOperations));
 616     }
 617 
 618     private static final
 619     ImplCache<Unary, UnaryOperation<ByteVector, VectorMask<Byte>>>
 620         UN_IMPL = new ImplCache<>(Unary.class, ByteVector.class);
 621 
 622     private static UnaryOperation<ByteVector, VectorMask<Byte>> unaryOperations(int opc_) {
 623         switch (opc_) {
 624             case VECTOR_OP_NEG: return (v0, m) ->
 625                     v0.uOp(m, (i, a) -> (byte) -a);
 626             case VECTOR_OP_ABS: return (v0, m) ->
 627                     v0.uOp(m, (i, a) -> (byte) Math.abs(a));
 628             default: return null;
 629         }
 630     }
 631 
 632     // Binary lanewise support
 633 
 634     /**
 635      * {@inheritDoc} <!--workaround-->
 636      * @see #lanewise(VectorOperators.Binary,byte)
 637      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 638      */
 639     @Override
 640     public abstract
 641     ByteVector lanewise(VectorOperators.Binary op,
 642                                   Vector<Byte> v);
 643     @ForceInline
 644     final
 645     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 646                                           Vector<Byte> v) {
 647         ByteVector that = (ByteVector) v;
 648         that.check(this);
 649 
 650         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 651             if (op == FIRST_NONZERO) {
 652                 // FIXME: Support this in the JIT.
 653                 VectorMask<Byte> thisNZ
 654                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 655                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 656                 op = OR_UNCHECKED;
 657             }
 658             if (opKind(op, VO_SHIFT)) {
 659                 // As per shift specification for Java, mask the shift count.
 660                 // This allows the JIT to ignore some ISA details.
 661                 that = that.lanewise(AND, SHIFT_MASK);
 662             }
 663             if (op == AND_NOT) {
 664                 // FIXME: Support this in the JIT.
 665                 that = that.lanewise(NOT);
 666                 op = AND;
 667             } else if (op == DIV) {
 668                 VectorMask<Byte> eqz = that.eq((byte) 0);
 669                 if (eqz.anyTrue()) {
 670                     throw that.divZeroException();
 671                 }
 672             }
 673         }
 674 
 675         int opc = opCode(op);
 676         return VectorSupport.binaryOp(
 677             opc, getClass(), null, byte.class, length(),
 678             this, that, null,
 679             BIN_IMPL.find(op, opc, ByteVector::binaryOperations));































 680     }



 681 
 682     /**
 683      * {@inheritDoc} <!--workaround-->
 684      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 685      */
 686     @Override
 687     public abstract
 688     ByteVector lanewise(VectorOperators.Binary op,
 689                                   Vector<Byte> v,
 690                                   VectorMask<Byte> m);
 691     @ForceInline
 692     final
 693     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 694                                           Class<? extends VectorMask<Byte>> maskClass,
 695                                           Vector<Byte> v, VectorMask<Byte> m) {
 696         ByteVector that = (ByteVector) v;
 697         that.check(this);
 698         m.check(maskClass, this);
 699 
 700         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 701             if (op == FIRST_NONZERO) {
 702                 // FIXME: Support this in the JIT.
 703                 VectorMask<Byte> thisNZ
 704                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 705                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 706                 op = OR_UNCHECKED;
 707             }
 708             if (opKind(op, VO_SHIFT)) {
 709                 // As per shift specification for Java, mask the shift count.
 710                 // This allows the JIT to ignore some ISA details.
 711                 that = that.lanewise(AND, SHIFT_MASK);
 712             }
 713             if (op == AND_NOT) {
 714                 // FIXME: Support this in the JIT.
 715                 that = that.lanewise(NOT);
 716                 op = AND;
 717             } else if (op == DIV) {
 718                 VectorMask<Byte> eqz = that.eq((byte)0);
 719                 if (eqz.and(m).anyTrue()) {
 720                     throw that.divZeroException();
 721                 }
 722                 // suppress div/0 exceptions in unset lanes
 723                 that = that.lanewise(NOT, eqz);
 724             }



 725         }
 726 
 727         int opc = opCode(op);
 728         return VectorSupport.binaryOp(
 729             opc, getClass(), maskClass, byte.class, length(),
 730             this, that, m,
 731             BIN_IMPL.find(op, opc, ByteVector::binaryOperations));
 732     }
 733 
 734     private static final
 735     ImplCache<Binary, BinaryOperation<ByteVector, VectorMask<Byte>>>
 736         BIN_IMPL = new ImplCache<>(Binary.class, ByteVector.class);
 737 
 738     private static BinaryOperation<ByteVector, VectorMask<Byte>> binaryOperations(int opc_) {
 739         switch (opc_) {
 740             case VECTOR_OP_ADD: return (v0, v1, vm) ->
 741                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a + b));
 742             case VECTOR_OP_SUB: return (v0, v1, vm) ->
 743                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a - b));
 744             case VECTOR_OP_MUL: return (v0, v1, vm) ->
 745                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a * b));
 746             case VECTOR_OP_DIV: return (v0, v1, vm) ->
 747                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a / b));
 748             case VECTOR_OP_MAX: return (v0, v1, vm) ->
 749                     v0.bOp(v1, vm, (i, a, b) -> (byte)Math.max(a, b));
 750             case VECTOR_OP_MIN: return (v0, v1, vm) ->
 751                     v0.bOp(v1, vm, (i, a, b) -> (byte)Math.min(a, b));
 752             case VECTOR_OP_AND: return (v0, v1, vm) ->
 753                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a & b));
 754             case VECTOR_OP_OR: return (v0, v1, vm) ->
 755                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a | b));
 756             case VECTOR_OP_XOR: return (v0, v1, vm) ->
 757                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a ^ b));
 758             case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
 759                     v0.bOp(v1, vm, (i, a, n) -> (byte)(a << n));
 760             case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
 761                     v0.bOp(v1, vm, (i, a, n) -> (byte)(a >> n));
 762             case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
 763                     v0.bOp(v1, vm, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 764             case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
 765                     v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
 766             case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
 767                     v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
 768             default: return null;
 769         }
 770     }
 771 
 772     // FIXME: Maybe all of the public final methods in this file (the
 773     // simple ones that just call lanewise) should be pushed down to
 774     // the X-VectorBits template.  They can't optimize properly at
 775     // this level, and must rely on inlining.  Does it work?
 776     // (If it works, of course keep the code here.)
 777 
 778     /**
 779      * Combines the lane values of this vector
 780      * with the value of a broadcast scalar.
 781      *
 782      * This is a lane-wise binary operation which applies
 783      * the selected operation to each lane.
 784      * The return value will be equal to this expression:
 785      * {@code this.lanewise(op, this.broadcast(e))}.
 786      *
 787      * @param op the operation used to process lane values
 788      * @param e the input scalar
 789      * @return the result of applying the operation lane-wise
 790      *         to the two input vectors
 791      * @throws UnsupportedOperationException if this vector does

 814      * This is a masked lane-wise binary operation which applies
 815      * the selected operation to each lane.
 816      * The return value will be equal to this expression:
 817      * {@code this.lanewise(op, this.broadcast(e), m)}.
 818      *
 819      * @param op the operation used to process lane values
 820      * @param e the input scalar
 821      * @param m the mask controlling lane selection
 822      * @return the result of applying the operation lane-wise
 823      *         to the input vector and the scalar
 824      * @throws UnsupportedOperationException if this vector does
 825      *         not support the requested operation
 826      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 827      * @see #lanewise(VectorOperators.Binary,byte)
 828      */
 829     @ForceInline
 830     public final
 831     ByteVector lanewise(VectorOperators.Binary op,
 832                                   byte e,
 833                                   VectorMask<Byte> m) {
 834         if (opKind(op, VO_SHIFT) && (byte)(int)e == e) {
 835             return lanewiseShift(op, (int) e, m);
 836         }
 837         if (op == AND_NOT) {
 838             op = AND; e = (byte) ~e;
 839         }
 840         return lanewise(op, broadcast(e), m);
 841     }
 842 
 843     /**
 844      * {@inheritDoc} <!--workaround-->
 845      * @apiNote
 846      * When working with vector subtypes like {@code ByteVector},
 847      * {@linkplain #lanewise(VectorOperators.Binary,byte)
 848      * the more strongly typed method}
 849      * is typically selected.  It can be explicitly selected
 850      * using a cast: {@code v.lanewise(op,(byte)e)}.
 851      * The two expressions will produce numerically identical results.
 852      */
 853     @ForceInline
 854     public final
 855     ByteVector lanewise(VectorOperators.Binary op,
 856                                   long e) {
 857         byte e1 = (byte) e;
 858         if ((long)e1 != e
 859             // allow shift ops to clip down their int parameters
 860             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {

 861             vspecies().checkValue(e);  // for exception
 862         }
 863         return lanewise(op, e1);
 864     }
 865 
 866     /**
 867      * {@inheritDoc} <!--workaround-->
 868      * @apiNote
 869      * When working with vector subtypes like {@code ByteVector},
 870      * {@linkplain #lanewise(VectorOperators.Binary,byte,VectorMask)
 871      * the more strongly typed method}
 872      * is typically selected.  It can be explicitly selected
 873      * using a cast: {@code v.lanewise(op,(byte)e,m)}.
 874      * The two expressions will produce numerically identical results.
 875      */
 876     @ForceInline
 877     public final
 878     ByteVector lanewise(VectorOperators.Binary op,
 879                                   long e, VectorMask<Byte> m) {
 880         byte e1 = (byte) e;
 881         if ((long)e1 != e
 882             // allow shift ops to clip down their int parameters
 883             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 884             vspecies().checkValue(e);  // for exception
 885         }
 886         return lanewise(op, e1, m);
 887     }
 888 
 889     /*package-private*/
 890     abstract ByteVector
 891     lanewiseShift(VectorOperators.Binary op, int e);
 892 
 893     /*package-private*/
 894     @ForceInline
 895     final ByteVector
 896     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 897         // Special handling for these.  FIXME: Refactor?
 898         assert(opKind(op, VO_SHIFT));
 899         // As per shift specification for Java, mask the shift count.
 900         e &= SHIFT_MASK;
 901         int opc = opCode(op);
 902         return VectorSupport.broadcastInt(
 903             opc, getClass(), null, byte.class, length(),
 904             this, e, null,
 905             BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations));













 906     }
 907 
 908     /*package-private*/
 909     abstract ByteVector
 910     lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Byte> m);
 911 
 912     /*package-private*/
 913     @ForceInline
 914     final ByteVector
 915     lanewiseShiftTemplate(VectorOperators.Binary op,
 916                           Class<? extends VectorMask<Byte>> maskClass,
 917                           int e, VectorMask<Byte> m) {
 918         m.check(maskClass, this);
 919         assert(opKind(op, VO_SHIFT));
 920         // As per shift specification for Java, mask the shift count.
 921         e &= SHIFT_MASK;
 922         int opc = opCode(op);
 923         return VectorSupport.broadcastInt(
 924             opc, getClass(), maskClass, byte.class, length(),
 925             this, e, m,
 926             BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations));
 927     }
 928 
 929     private static final
 930     ImplCache<Binary,VectorBroadcastIntOp<ByteVector, VectorMask<Byte>>> BIN_INT_IMPL
 931         = new ImplCache<>(Binary.class, ByteVector.class);
 932 
 933     private static VectorBroadcastIntOp<ByteVector, VectorMask<Byte>> broadcastIntOperations(int opc_) {
 934         switch (opc_) {
 935             case VECTOR_OP_LSHIFT: return (v, n, m) ->
 936                     v.uOp(m, (i, a) -> (byte)(a << n));
 937             case VECTOR_OP_RSHIFT: return (v, n, m) ->
 938                     v.uOp(m, (i, a) -> (byte)(a >> n));
 939             case VECTOR_OP_URSHIFT: return (v, n, m) ->
 940                     v.uOp(m, (i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 941             case VECTOR_OP_LROTATE: return (v, n, m) ->
 942                     v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
 943             case VECTOR_OP_RROTATE: return (v, n, m) ->
 944                     v.uOp(m, (i, a) -> rotateRight(a, (int)n));
 945             default: return null;
 946         }
 947     }
 948 
 949     // As per shift specification for Java, mask the shift count.
 950     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 951     // The latter two maskings go beyond the JLS, but seem reasonable
 952     // since our lane types are first-class types, not just dressed
 953     // up ints.
 954     private static final int SHIFT_MASK = (Byte.SIZE - 1);
 955     // Also simulate >>> on sub-word variables with a mask.
 956     private static final int LSHR_SETUP_MASK = ((1 << Byte.SIZE) - 1);
 957 
 958     // Ternary lanewise support
 959 
 960     // Ternary operators come in eight variations:
 961     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 962     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 963 
 964     // It is annoying to support all of these variations of masking
 965     // and broadcast, but it would be more surprising not to continue
 966     // the obvious pattern started by unary and binary.
 967 
 968    /**

 981                                                   Vector<Byte> v2);
 982     @ForceInline
 983     final
 984     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
 985                                           Vector<Byte> v1,
 986                                           Vector<Byte> v2) {
 987         ByteVector that = (ByteVector) v1;
 988         ByteVector tother = (ByteVector) v2;
 989         // It's a word: https://www.dictionary.com/browse/tother
 990         // See also Chapter 11 of Dickens, Our Mutual Friend:
 991         // "Totherest Governor," replied Mr Riderhood...
 992         that.check(this);
 993         tother.check(this);
 994         if (op == BITWISE_BLEND) {
 995             // FIXME: Support this in the JIT.
 996             that = this.lanewise(XOR, that).lanewise(AND, tother);
 997             return this.lanewise(XOR, that);
 998         }
 999         int opc = opCode(op);
1000         return VectorSupport.ternaryOp(
1001             opc, getClass(), null, byte.class, length(),
1002             this, that, tother, null,
1003             TERN_IMPL.find(op, opc, ByteVector::ternaryOperations));



1004     }



1005 
1006     /**
1007      * {@inheritDoc} <!--workaround-->
1008      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1009      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
1010      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1011      */
1012     @Override
1013     public abstract
1014     ByteVector lanewise(VectorOperators.Ternary op,
1015                                   Vector<Byte> v1,
1016                                   Vector<Byte> v2,
1017                                   VectorMask<Byte> m);
1018     @ForceInline
1019     final
1020     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
1021                                           Class<? extends VectorMask<Byte>> maskClass,
1022                                           Vector<Byte> v1,
1023                                           Vector<Byte> v2,
1024                                           VectorMask<Byte> m) {
1025         ByteVector that = (ByteVector) v1;
1026         ByteVector tother = (ByteVector) v2;
1027         // It's a word: https://www.dictionary.com/browse/tother
1028         // See also Chapter 11 of Dickens, Our Mutual Friend:
1029         // "Totherest Governor," replied Mr Riderhood...
1030         that.check(this);
1031         tother.check(this);
1032         m.check(maskClass, this);
1033 
1034         if (op == BITWISE_BLEND) {
1035             // FIXME: Support this in the JIT.
1036             that = this.lanewise(XOR, that).lanewise(AND, tother);
1037             return this.lanewise(XOR, that, m);
1038         }
1039         int opc = opCode(op);
1040         return VectorSupport.ternaryOp(
1041             opc, getClass(), maskClass, byte.class, length(),
1042             this, that, tother, m,
1043             TERN_IMPL.find(op, opc, ByteVector::ternaryOperations));
1044     }
1045 
1046     private static final
1047     ImplCache<Ternary, TernaryOperation<ByteVector, VectorMask<Byte>>>
1048         TERN_IMPL = new ImplCache<>(Ternary.class, ByteVector.class);
1049 
1050     private static TernaryOperation<ByteVector, VectorMask<Byte>> ternaryOperations(int opc_) {
1051         switch (opc_) {
1052             default: return null;
1053         }
1054     }
1055 
1056     /**
1057      * Combines the lane values of this vector
1058      * with the values of two broadcast scalars.
1059      *
1060      * This is a lane-wise ternary operation which applies
1061      * the selected operation to each lane.
1062      * The return value will be equal to this expression:
1063      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
1064      *
1065      * @param op the operation used to combine lane values
1066      * @param e1 the first input scalar
1067      * @param e2 the second input scalar
1068      * @return the result of applying the operation lane-wise
1069      *         to the input vector and the scalars
1070      * @throws UnsupportedOperationException if this vector does
1071      *         not support the requested operation
1072      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1073      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)

1090      * The return value will be equal to this expression:
1091      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1092      *
1093      * @param op the operation used to combine lane values
1094      * @param e1 the first input scalar
1095      * @param e2 the second input scalar
1096      * @param m the mask controlling lane selection
1097      * @return the result of applying the operation lane-wise
1098      *         to the input vector and the scalars
1099      * @throws UnsupportedOperationException if this vector does
1100      *         not support the requested operation
1101      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1102      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1103      */
1104     @ForceInline
1105     public final
1106     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1107                                   byte e1,
1108                                   byte e2,
1109                                   VectorMask<Byte> m) {
1110         return lanewise(op, broadcast(e1), broadcast(e2), m);
1111     }
1112 
1113     /**
1114      * Combines the lane values of this vector
1115      * with the values of another vector and a broadcast scalar.
1116      *
1117      * This is a lane-wise ternary operation which applies
1118      * the selected operation to each lane.
1119      * The return value will be equal to this expression:
1120      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1121      *
1122      * @param op the operation used to combine lane values
1123      * @param v1 the other input vector
1124      * @param e2 the input scalar
1125      * @return the result of applying the operation lane-wise
1126      *         to the input vectors and the scalar
1127      * @throws UnsupportedOperationException if this vector does
1128      *         not support the requested operation
1129      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1130      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)

1148      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1149      *
1150      * @param op the operation used to combine lane values
1151      * @param v1 the other input vector
1152      * @param e2 the input scalar
1153      * @param m the mask controlling lane selection
1154      * @return the result of applying the operation lane-wise
1155      *         to the input vectors and the scalar
1156      * @throws UnsupportedOperationException if this vector does
1157      *         not support the requested operation
1158      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1159      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1160      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
1161      */
1162     @ForceInline
1163     public final
1164     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1165                                   Vector<Byte> v1,
1166                                   byte e2,
1167                                   VectorMask<Byte> m) {
1168         return lanewise(op, v1, broadcast(e2), m);
1169     }
1170 
1171     /**
1172      * Combines the lane values of this vector
1173      * with the values of another vector and a broadcast scalar.
1174      *
1175      * This is a lane-wise ternary operation which applies
1176      * the selected operation to each lane.
1177      * The return value will be equal to this expression:
1178      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1179      *
1180      * @param op the operation used to combine lane values
1181      * @param e1 the input scalar
1182      * @param v2 the other input vector
1183      * @return the result of applying the operation lane-wise
1184      *         to the input vectors and the scalar
1185      * @throws UnsupportedOperationException if this vector does
1186      *         not support the requested operation
1187      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1188      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)

1205      * The return value will be equal to this expression:
1206      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1207      *
1208      * @param op the operation used to combine lane values
1209      * @param e1 the input scalar
1210      * @param v2 the other input vector
1211      * @param m the mask controlling lane selection
1212      * @return the result of applying the operation lane-wise
1213      *         to the input vectors and the scalar
1214      * @throws UnsupportedOperationException if this vector does
1215      *         not support the requested operation
1216      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1217      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
1218      */
1219     @ForceInline
1220     public final
1221     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1222                                   byte e1,
1223                                   Vector<Byte> v2,
1224                                   VectorMask<Byte> m) {
1225         return lanewise(op, broadcast(e1), v2, m);
1226     }
1227 
1228     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1229     // https://en.wikipedia.org/wiki/Ogdoad
1230 
1231     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1232     //
1233     // These include masked and non-masked versions.
1234     // This subclass adds broadcast (masked or not).
1235 
1236     /**
1237      * {@inheritDoc} <!--workaround-->
1238      * @see #add(byte)
1239      */
1240     @Override
1241     @ForceInline
1242     public final ByteVector add(Vector<Byte> v) {
1243         return lanewise(ADD, v);
1244     }
1245 

1877     @Override
1878     @ForceInline
1879     public final
1880     VectorMask<Byte> test(VectorOperators.Test op,
1881                                   VectorMask<Byte> m) {
1882         return test(op).and(m);
1883     }
1884 
1885     /**
1886      * {@inheritDoc} <!--workaround-->
1887      */
1888     @Override
1889     public abstract
1890     VectorMask<Byte> compare(VectorOperators.Comparison op, Vector<Byte> v);
1891 
1892     /*package-private*/
1893     @ForceInline
1894     final
1895     <M extends VectorMask<Byte>>
1896     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v) {


1897         ByteVector that = (ByteVector) v;
1898         that.check(this);
1899         int opc = opCode(op);
1900         return VectorSupport.compare(
1901             opc, getClass(), maskType, byte.class, length(),
1902             this, that, null,
1903             (cond, v0, v1, m1) -> {
1904                 AbstractMask<Byte> m
1905                     = v0.bTest(cond, v1, (cond_, i, a, b)
1906                                -> compareWithOp(cond, a, b));
1907                 @SuppressWarnings("unchecked")
1908                 M m2 = (M) m;
1909                 return m2;
1910             });
1911     }
1912 
1913     /*package-private*/
1914     @ForceInline
1915     final
1916     <M extends VectorMask<Byte>>
1917     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v, M m) {
1918         ByteVector that = (ByteVector) v;
1919         that.check(this);
1920         m.check(maskType, this);
1921         int opc = opCode(op);
1922         return VectorSupport.compare(
1923             opc, getClass(), maskType, byte.class, length(),
1924             this, that, m,
1925             (cond, v0, v1, m1) -> {
1926                 AbstractMask<Byte> cmpM
1927                     = v0.bTest(cond, v1, (cond_, i, a, b)
1928                                -> compareWithOp(cond, a, b));
1929                 @SuppressWarnings("unchecked")
1930                 M m2 = (M) cmpM.and(m1);
1931                 return m2;
1932             });
1933     }
1934 
1935     @ForceInline
1936     private static boolean compareWithOp(int cond, byte a, byte b) {
1937         return switch (cond) {
1938             case BT_eq -> a == b;
1939             case BT_ne -> a != b;
1940             case BT_lt -> a < b;
1941             case BT_le -> a <= b;
1942             case BT_gt -> a > b;
1943             case BT_ge -> a >= b;
1944             case BT_ult -> Byte.compareUnsigned(a, b) < 0;
1945             case BT_ule -> Byte.compareUnsigned(a, b) <= 0;
1946             case BT_ugt -> Byte.compareUnsigned(a, b) > 0;
1947             case BT_uge -> Byte.compareUnsigned(a, b) >= 0;
1948             default -> throw new AssertionError();
1949         };
1950     }
1951 












1952     /**
1953      * Tests this vector by comparing it with an input scalar,
1954      * according to the given comparison operation.
1955      *
1956      * This is a lane-wise binary test operation which applies
1957      * the comparison operation to each lane.
1958      * <p>
1959      * The result is the same as
1960      * {@code compare(op, broadcast(species(), e))}.
1961      * That is, the scalar may be regarded as broadcast to
1962      * a vector of the same species, and then compared
1963      * against the original vector, using the selected
1964      * comparison operation.
1965      *
1966      * @param op the operation used to compare lane values
1967      * @param e the input scalar
1968      * @return the mask result of testing lane-wise if this vector
1969      *         compares to the input, according to the selected
1970      *         comparison operator
1971      * @see ByteVector#compare(VectorOperators.Comparison,Vector)

1990      *
1991      * This is a masked lane-wise binary test operation which applies
1992      * to each pair of corresponding lane values.
1993      *
1994      * The returned result is equal to the expression
1995      * {@code compare(op,s).and(m)}.
1996      *
1997      * @param op the operation used to compare lane values
1998      * @param e the input scalar
1999      * @param m the mask controlling lane selection
2000      * @return the mask result of testing lane-wise if this vector
2001      *         compares to the input, according to the selected
2002      *         comparison operator,
2003      *         and only in the lanes selected by the mask
2004      * @see ByteVector#compare(VectorOperators.Comparison,Vector,VectorMask)
2005      */
2006     @ForceInline
2007     public final VectorMask<Byte> compare(VectorOperators.Comparison op,
2008                                                byte e,
2009                                                VectorMask<Byte> m) {
2010         return compare(op, broadcast(e), m);
2011     }
2012 
2013     /**
2014      * {@inheritDoc} <!--workaround-->
2015      */
2016     @Override
2017     public abstract
2018     VectorMask<Byte> compare(Comparison op, long e);
2019 
2020     /*package-private*/
2021     @ForceInline
2022     final
2023     <M extends VectorMask<Byte>>
2024     M compareTemplate(Class<M> maskType, Comparison op, long e) {
2025         return compareTemplate(maskType, op, broadcast(e));
2026     }
2027 
2028     /**
2029      * {@inheritDoc} <!--workaround-->
2030      */

2241     wrongPartForSlice(int part) {
2242         String msg = String.format("bad part number %d for slice operation",
2243                                    part);
2244         return new ArrayIndexOutOfBoundsException(msg);
2245     }
2246 
2247     /**
2248      * {@inheritDoc} <!--workaround-->
2249      */
2250     @Override
2251     public abstract
2252     ByteVector rearrange(VectorShuffle<Byte> m);
2253 
2254     /*package-private*/
2255     @ForceInline
2256     final
2257     <S extends VectorShuffle<Byte>>
2258     ByteVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2259         shuffle.checkIndexes();
2260         return VectorSupport.rearrangeOp(
2261             getClass(), shuffletype, null, byte.class, length(),
2262             this, shuffle, null,
2263             (v1, s_, m_) -> v1.uOp((i, a) -> {
2264                 int ei = s_.laneSource(i);
2265                 return v1.lane(ei);
2266             }));
2267     }
2268 
2269     /**
2270      * {@inheritDoc} <!--workaround-->
2271      */
2272     @Override
2273     public abstract
2274     ByteVector rearrange(VectorShuffle<Byte> s,
2275                                    VectorMask<Byte> m);
2276 
2277     /*package-private*/
2278     @ForceInline
2279     final
2280     <S extends VectorShuffle<Byte>, M extends VectorMask<Byte>>
2281     ByteVector rearrangeTemplate(Class<S> shuffletype,
2282                                            Class<M> masktype,
2283                                            S shuffle,
2284                                            M m) {
2285 
2286         m.check(masktype, this);






2287         VectorMask<Byte> valid = shuffle.laneIsValid();
2288         if (m.andNot(valid).anyTrue()) {
2289             shuffle.checkIndexes();
2290             throw new AssertionError();
2291         }
2292         return VectorSupport.rearrangeOp(
2293                    getClass(), shuffletype, masktype, byte.class, length(),
2294                    this, shuffle, m,
2295                    (v1, s_, m_) -> v1.uOp((i, a) -> {
2296                         int ei = s_.laneSource(i);
2297                         return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
2298                    }));
2299     }
2300 
2301     /**
2302      * {@inheritDoc} <!--workaround-->
2303      */
2304     @Override
2305     public abstract
2306     ByteVector rearrange(VectorShuffle<Byte> s,
2307                                    Vector<Byte> v);
2308 
2309     /*package-private*/
2310     @ForceInline
2311     final
2312     <S extends VectorShuffle<Byte>>
2313     ByteVector rearrangeTemplate(Class<S> shuffletype,
2314                                            S shuffle,
2315                                            ByteVector v) {
2316         VectorMask<Byte> valid = shuffle.laneIsValid();
2317         @SuppressWarnings("unchecked")
2318         S ws = (S) shuffle.wrapIndexes();
2319         ByteVector r0 =
2320             VectorSupport.rearrangeOp(
2321                 getClass(), shuffletype, null, byte.class, length(),
2322                 this, ws, null,
2323                 (v0, s_, m_) -> v0.uOp((i, a) -> {
2324                     int ei = s_.laneSource(i);
2325                     return v0.lane(ei);
2326                 }));
2327         ByteVector r1 =
2328             VectorSupport.rearrangeOp(
2329                 getClass(), shuffletype, null, byte.class, length(),
2330                 v, ws, null,
2331                 (v1, s_, m_) -> v1.uOp((i, a) -> {
2332                     int ei = s_.laneSource(i);
2333                     return v1.lane(ei);
2334                 }));
2335         return r1.blend(r0, valid);
2336     }
2337 
2338     @ForceInline
2339     private final
2340     VectorShuffle<Byte> toShuffle0(ByteSpecies dsp) {
2341         byte[] a = toArray();
2342         int[] sa = new int[a.length];
2343         for (int i = 0; i < a.length; i++) {
2344             sa[i] = (int) a[i];
2345         }
2346         return VectorShuffle.fromArray(dsp, sa, 0);
2347     }
2348 
2349     /*package-private*/
2350     @ForceInline
2351     final

2574      * <li>
2575      * All other reduction operations are fully commutative and
2576      * associative.  The implementation can choose any order of
2577      * processing, yet it will always produce the same result.
2578      * </ul>
2579      *
2580      * @param op the operation used to combine lane values
2581      * @param m the mask controlling lane selection
2582      * @return the reduced result accumulated from the selected lane values
2583      * @throws UnsupportedOperationException if this vector does
2584      *         not support the requested operation
2585      * @see #reduceLanes(VectorOperators.Associative)
2586      */
2587     public abstract byte reduceLanes(VectorOperators.Associative op,
2588                                        VectorMask<Byte> m);
2589 
2590     /*package-private*/
2591     @ForceInline
2592     final
2593     byte reduceLanesTemplate(VectorOperators.Associative op,
2594                                Class<? extends VectorMask<Byte>> maskClass,
2595                                VectorMask<Byte> m) {
2596         m.check(maskClass, this);
2597         if (op == FIRST_NONZERO) {
2598             ByteVector v = reduceIdentityVector(op).blend(this, m);
2599             return v.reduceLanesTemplate(op);
2600         }
2601         int opc = opCode(op);
2602         return fromBits(VectorSupport.reductionCoerced(
2603             opc, getClass(), maskClass, byte.class, length(),
2604             this, m,
2605             REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations)));
2606     }
2607 
2608     /*package-private*/
2609     @ForceInline
2610     final
2611     byte reduceLanesTemplate(VectorOperators.Associative op) {
2612         if (op == FIRST_NONZERO) {
2613             // FIXME:  The JIT should handle this, and other scan ops alos.
2614             VectorMask<Byte> thisNZ
2615                 = this.viewAsIntegralLanes().compare(NE, (byte) 0);
2616             return this.lane(thisNZ.firstTrue());
2617         }
2618         int opc = opCode(op);
2619         return fromBits(VectorSupport.reductionCoerced(
2620             opc, getClass(), null, byte.class, length(),
2621             this, null,
2622             REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations)));

















2623     }
2624 
2625     private static final
2626     ImplCache<Associative, ReductionOperation<ByteVector, VectorMask<Byte>>>
2627         REDUCE_IMPL = new ImplCache<>(Associative.class, ByteVector.class);
2628 
2629     private static ReductionOperation<ByteVector, VectorMask<Byte>> reductionOperations(int opc_) {
2630         switch (opc_) {
2631             case VECTOR_OP_ADD: return (v, m) ->
2632                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a + b)));
2633             case VECTOR_OP_MUL: return (v, m) ->
2634                     toBits(v.rOp((byte)1, m, (i, a, b) -> (byte)(a * b)));
2635             case VECTOR_OP_MIN: return (v, m) ->
2636                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (byte) Math.min(a, b)));
2637             case VECTOR_OP_MAX: return (v, m) ->
2638                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (byte) Math.max(a, b)));
2639             case VECTOR_OP_AND: return (v, m) ->
2640                     toBits(v.rOp((byte)-1, m, (i, a, b) -> (byte)(a & b)));
2641             case VECTOR_OP_OR: return (v, m) ->
2642                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a | b)));
2643             case VECTOR_OP_XOR: return (v, m) ->
2644                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a ^ b)));
2645             default: return null;
2646         }
2647     }
2648 
2649     private
2650     @ForceInline
2651     ByteVector reduceIdentityVector(VectorOperators.Associative op) {
2652         int opc = opCode(op);
2653         UnaryOperator<ByteVector> fn
2654             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2655                 switch (opc_) {
2656                 case VECTOR_OP_ADD:
2657                 case VECTOR_OP_OR:
2658                 case VECTOR_OP_XOR:
2659                     return v -> v.broadcast(0);
2660                 case VECTOR_OP_MUL:
2661                     return v -> v.broadcast(1);
2662                 case VECTOR_OP_AND:
2663                     return v -> v.broadcast(-1);
2664                 case VECTOR_OP_MIN:
2665                     return v -> v.broadcast(MAX_OR_INF);
2666                 case VECTOR_OP_MAX:
2667                     return v -> v.broadcast(MIN_OR_INF);

2853      * @param species species of desired vector
2854      * @param a the byte array
2855      * @param offset the offset into the array
2856      * @param bo the intended byte order
2857      * @param m the mask controlling lane selection
2858      * @return a vector loaded from a byte array
2859      * @throws IndexOutOfBoundsException
2860      *         if {@code offset+N*ESIZE < 0}
2861      *         or {@code offset+(N+1)*ESIZE > a.length}
2862      *         for any lane {@code N} in the vector
2863      *         where the mask is set
2864      */
2865     @ForceInline
2866     public static
2867     ByteVector fromByteArray(VectorSpecies<Byte> species,
2868                                        byte[] a, int offset,
2869                                        ByteOrder bo,
2870                                        VectorMask<Byte> m) {
2871         ByteSpecies vsp = (ByteSpecies) species;
2872         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2873             return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);


2874         }
2875 
2876         // FIXME: optimize
2877         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2878         ByteBuffer wb = wrapper(a, bo);
2879         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
2880                    (wb_, o, i)  -> wb_.get(o + i * 1));
2881     }
2882 
2883     /**
2884      * Loads a vector from an array of type {@code byte[]}
2885      * starting at an offset.
2886      * For each vector lane, where {@code N} is the vector lane index, the
2887      * array element at index {@code offset + N} is placed into the
2888      * resulting vector at lane index {@code N}.
2889      *
2890      * @param species species of desired vector
2891      * @param a the array
2892      * @param offset the offset into the array
2893      * @return the vector loaded from an array

2915      * {@code N}, otherwise the default element value is placed into the
2916      * resulting vector at lane index {@code N}.
2917      *
2918      * @param species species of desired vector
2919      * @param a the array
2920      * @param offset the offset into the array
2921      * @param m the mask controlling lane selection
2922      * @return the vector loaded from an array
2923      * @throws IndexOutOfBoundsException
2924      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2925      *         for any lane {@code N} in the vector
2926      *         where the mask is set
2927      */
2928     @ForceInline
2929     public static
2930     ByteVector fromArray(VectorSpecies<Byte> species,
2931                                    byte[] a, int offset,
2932                                    VectorMask<Byte> m) {
2933         ByteSpecies vsp = (ByteSpecies) species;
2934         if (offset >= 0 && offset <= (a.length - species.length())) {
2935             return vsp.dummyVector().fromArray0(a, offset, m);

2936         }
2937 
2938         // FIXME: optimize
2939         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2940         return vsp.vOp(m, i -> a[offset + i]);
2941     }
2942 
2943     /**
2944      * Gathers a new vector composed of elements from an array of type
2945      * {@code byte[]},
2946      * using indexes obtained by adding a fixed {@code offset} to a
2947      * series of secondary offsets from an <em>index map</em>.
2948      * The index map is a contiguous sequence of {@code VLENGTH}
2949      * elements in a second array of {@code int}s, starting at a given
2950      * {@code mapOffset}.
2951      * <p>
2952      * For each vector lane, where {@code N} is the vector lane index,
2953      * the lane is loaded from the array
2954      * element {@code a[f(N)]}, where {@code f(N)} is the
2955      * index mapping expression

3072      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
3073      *
3074      * @param species species of desired vector
3075      * @param a the array
3076      * @param offset the offset into the array
3077      * @param m the mask controlling lane selection
3078      * @return the vector loaded from an array
3079      * @throws IndexOutOfBoundsException
3080      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3081      *         for any lane {@code N} in the vector
3082      *         where the mask is set
3083      */
3084     @ForceInline
3085     public static
3086     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3087                                           boolean[] a, int offset,
3088                                           VectorMask<Byte> m) {
3089         ByteSpecies vsp = (ByteSpecies) species;
3090         if (offset >= 0 && offset <= (a.length - species.length())) {
3091             ByteVector zero = vsp.zero();
3092             return vsp.dummyVector().fromBooleanArray0(a, offset, m);
3093         }
3094 
3095         // FIXME: optimize
3096         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3097         return vsp.vOp(m, i -> (byte) (a[offset + i] ? 1 : 0));
3098     }
3099 
3100     /**
3101      * Gathers a new vector composed of elements from an array of type
3102      * {@code boolean[]},
3103      * using indexes obtained by adding a fixed {@code offset} to a
3104      * series of secondary offsets from an <em>index map</em>.
3105      * The index map is a contiguous sequence of {@code VLENGTH}
3106      * elements in a second array of {@code int}s, starting at a given
3107      * {@code mapOffset}.
3108      * <p>
3109      * For each vector lane, where {@code N} is the vector lane index,
3110      * the lane is loaded from the expression
3111      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
3112      * index mapping expression

3250      * @param species species of desired vector
3251      * @param bb the byte buffer
3252      * @param offset the offset into the byte buffer
3253      * @param bo the intended byte order
3254      * @param m the mask controlling lane selection
3255      * @return a vector loaded from a byte buffer
3256      * @throws IndexOutOfBoundsException
3257      *         if {@code offset+N*1 < 0}
3258      *         or {@code offset+N*1 >= bb.limit()}
3259      *         for any lane {@code N} in the vector
3260      *         where the mask is set
3261      */
3262     @ForceInline
3263     public static
3264     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3265                                         ByteBuffer bb, int offset,
3266                                         ByteOrder bo,
3267                                         VectorMask<Byte> m) {
3268         ByteSpecies vsp = (ByteSpecies) species;
3269         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
3270             return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);


3271         }
3272 
3273         // FIXME: optimize
3274         checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3275         ByteBuffer wb = wrapper(bb, bo);
3276         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
3277                    (wb_, o, i)  -> wb_.get(o + i * 1));
3278     }
3279 
3280     // Memory store operations
3281 
3282     /**
3283      * Stores this vector into an array of type {@code byte[]}
3284      * starting at an offset.
3285      * <p>
3286      * For each vector lane, where {@code N} is the vector lane index,
3287      * the lane element at index {@code N} is stored into the array
3288      * element {@code a[offset+N]}.
3289      *
3290      * @param a the array, of type {@code byte[]}

3322      * Lanes where the mask is unset are not stored and do not need
3323      * to correspond to legitimate elements of {@code a}.
3324      * That is, unset lanes may correspond to array indexes less than
3325      * zero or beyond the end of the array.
3326      *
3327      * @param a the array, of type {@code byte[]}
3328      * @param offset the offset into the array
3329      * @param m the mask controlling lane storage
3330      * @throws IndexOutOfBoundsException
3331      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3332      *         for any lane {@code N} in the vector
3333      *         where the mask is set
3334      */
3335     @ForceInline
3336     public final
3337     void intoArray(byte[] a, int offset,
3338                    VectorMask<Byte> m) {
3339         if (m.allTrue()) {
3340             intoArray(a, offset);
3341         } else {

3342             ByteSpecies vsp = vspecies();
3343             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3344             intoArray0(a, offset, m);
3345         }
3346     }
3347 
3348     /**
3349      * Scatters this vector into an array of type {@code byte[]}
3350      * using indexes obtained by adding a fixed {@code offset} to a
3351      * series of secondary offsets from an <em>index map</em>.
3352      * The index map is a contiguous sequence of {@code VLENGTH}
3353      * elements in a second array of {@code int}s, starting at a given
3354      * {@code mapOffset}.
3355      * <p>
3356      * For each vector lane, where {@code N} is the vector lane index,
3357      * the lane element at index {@code N} is stored into the array
3358      * element {@code a[f(N)]}, where {@code f(N)} is the
3359      * index mapping expression
3360      * {@code offset + indexMap[mapOffset + N]]}.
3361      *
3362      * @param a the array
3363      * @param offset an offset to combine with the index map offsets
3364      * @param indexMap the index map

3477      * Lanes where the mask is unset are not stored and do not need
3478      * to correspond to legitimate elements of {@code a}.
3479      * That is, unset lanes may correspond to array indexes less than
3480      * zero or beyond the end of the array.
3481      *
3482      * @param a the array
3483      * @param offset the offset into the array
3484      * @param m the mask controlling lane storage
3485      * @throws IndexOutOfBoundsException
3486      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3487      *         for any lane {@code N} in the vector
3488      *         where the mask is set
3489      */
3490     @ForceInline
3491     public final
3492     void intoBooleanArray(boolean[] a, int offset,
3493                           VectorMask<Byte> m) {
3494         if (m.allTrue()) {
3495             intoBooleanArray(a, offset);
3496         } else {

3497             ByteSpecies vsp = vspecies();
3498             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3499             intoBooleanArray0(a, offset, m);
3500         }
3501     }
3502 
3503     /**
3504      * Scatters this vector into an array of type {@code boolean[]}
3505      * using indexes obtained by adding a fixed {@code offset} to a
3506      * series of secondary offsets from an <em>index map</em>.
3507      * The index map is a contiguous sequence of {@code VLENGTH}
3508      * elements in a second array of {@code int}s, starting at a given
3509      * {@code mapOffset}.
3510      * <p>
3511      * For each vector lane, where {@code N} is the vector lane index,
3512      * the lane element at index {@code N}
3513      * is first converted to a {@code boolean} value and then
3514      * stored into the array
3515      * element {@code a[f(N)]}, where {@code f(N)} is the
3516      * index mapping expression
3517      * {@code offset + indexMap[mapOffset + N]]}.
3518      * <p>
3519      * A {@code byte} value is converted to a {@code boolean} value by applying the

3598     @ForceInline
3599     public final
3600     void intoByteArray(byte[] a, int offset,
3601                        ByteOrder bo) {
3602         offset = checkFromIndexSize(offset, byteSize(), a.length);
3603         maybeSwap(bo).intoByteArray0(a, offset);
3604     }
3605 
3606     /**
3607      * {@inheritDoc} <!--workaround-->
3608      */
3609     @Override
3610     @ForceInline
3611     public final
3612     void intoByteArray(byte[] a, int offset,
3613                        ByteOrder bo,
3614                        VectorMask<Byte> m) {
3615         if (m.allTrue()) {
3616             intoByteArray(a, offset, bo);
3617         } else {

3618             ByteSpecies vsp = vspecies();
3619             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3620             maybeSwap(bo).intoByteArray0(a, offset, m);


3621         }
3622     }
3623 
3624     /**
3625      * {@inheritDoc} <!--workaround-->
3626      */
3627     @Override
3628     @ForceInline
3629     public final
3630     void intoByteBuffer(ByteBuffer bb, int offset,
3631                         ByteOrder bo) {
3632         if (ScopedMemoryAccess.isReadOnly(bb)) {
3633             throw new ReadOnlyBufferException();
3634         }
3635         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3636         maybeSwap(bo).intoByteBuffer0(bb, offset);
3637     }
3638 
3639     /**
3640      * {@inheritDoc} <!--workaround-->
3641      */
3642     @Override
3643     @ForceInline
3644     public final
3645     void intoByteBuffer(ByteBuffer bb, int offset,
3646                         ByteOrder bo,
3647                         VectorMask<Byte> m) {
3648         if (m.allTrue()) {
3649             intoByteBuffer(bb, offset, bo);
3650         } else {

3651             if (bb.isReadOnly()) {
3652                 throw new ReadOnlyBufferException();
3653             }
3654             ByteSpecies vsp = vspecies();
3655             checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3656             maybeSwap(bo).intoByteBuffer0(bb, offset, m);


3657         }
3658     }
3659 
3660     // ================================================
3661 
3662     // Low-level memory operations.
3663     //
3664     // Note that all of these operations *must* inline into a context
3665     // where the exact species of the involved vector is a
3666     // compile-time constant.  Otherwise, the intrinsic generation
3667     // will fail and performance will suffer.
3668     //
3669     // In many cases this is achieved by re-deriving a version of the
3670     // method in each concrete subclass (per species).  The re-derived
3671     // method simply calls one of these generic methods, with exact
3672     // parameters for the controlling metadata, which is either a
3673     // typed vector or constant species instance.
3674 
3675     // Unchecked loading operations in native byte order.
3676     // Caller is responsible for applying index checks, masking, and
3677     // byte swapping.
3678 
3679     /*package-private*/
3680     abstract
3681     ByteVector fromArray0(byte[] a, int offset);
3682     @ForceInline
3683     final
3684     ByteVector fromArray0Template(byte[] a, int offset) {
3685         ByteSpecies vsp = vspecies();
3686         return VectorSupport.load(
3687             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3688             a, arrayAddress(a, offset),
3689             a, offset, vsp,
3690             (arr, off, s) -> s.ldOp(arr, off,
3691                                     (arr_, off_, i) -> arr_[off_ + i]));
3692     }
3693 
3694     /*package-private*/
3695     abstract
3696     ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m);
3697     @ForceInline
3698     final
3699     <M extends VectorMask<Byte>>
3700     ByteVector fromArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3701         m.check(species());
3702         ByteSpecies vsp = vspecies();
3703         return VectorSupport.loadMasked(
3704             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3705             a, arrayAddress(a, offset), m,
3706             a, offset, vsp,
3707             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3708                                         (arr_, off_, i) -> arr_[off_ + i]));
3709     }
3710 
3711 
3712 
3713     /*package-private*/
3714     abstract
3715     ByteVector fromBooleanArray0(boolean[] a, int offset);
3716     @ForceInline
3717     final
3718     ByteVector fromBooleanArray0Template(boolean[] a, int offset) {
3719         ByteSpecies vsp = vspecies();
3720         return VectorSupport.load(
3721             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3722             a, booleanArrayAddress(a, offset),
3723             a, offset, vsp,
3724             (arr, off, s) -> s.ldOp(arr, off,
3725                                     (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3726     }
3727 
3728     /*package-private*/
3729     abstract
3730     ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m);
3731     @ForceInline
3732     final
3733     <M extends VectorMask<Byte>>
3734     ByteVector fromBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m) {
3735         m.check(species());
3736         ByteSpecies vsp = vspecies();
3737         return VectorSupport.loadMasked(
3738             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3739             a, booleanArrayAddress(a, offset), m,
3740             a, offset, vsp,
3741             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3742                                         (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3743     }
3744 
3745     @Override
3746     abstract
3747     ByteVector fromByteArray0(byte[] a, int offset);
3748     @ForceInline
3749     final
3750     ByteVector fromByteArray0Template(byte[] a, int offset) {
3751         ByteSpecies vsp = vspecies();
3752         return VectorSupport.load(
3753             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3754             a, byteArrayAddress(a, offset),
3755             a, offset, vsp,
3756             (arr, off, s) -> {
3757                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3758                 return s.ldOp(wb, off,
3759                         (wb_, o, i) -> wb_.get(o + i * 1));
3760             });
3761     }
3762 
3763     abstract
3764     ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m);
3765     @ForceInline
3766     final
3767     <M extends VectorMask<Byte>>
3768     ByteVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3769         ByteSpecies vsp = vspecies();
3770         m.check(vsp);
3771         return VectorSupport.loadMasked(
3772             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3773             a, byteArrayAddress(a, offset), m,
3774             a, offset, vsp,
3775             (arr, off, s, vm) -> {
3776                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3777                 return s.ldOp(wb, off, vm,
3778                         (wb_, o, i) -> wb_.get(o + i * 1));
3779             });
3780     }
3781 
3782     abstract
3783     ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
3784     @ForceInline
3785     final
3786     ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3787         ByteSpecies vsp = vspecies();
3788         return ScopedMemoryAccess.loadFromByteBuffer(
3789                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3790                 bb, offset, vsp,
3791                 (buf, off, s) -> {
3792                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3793                     return s.ldOp(wb, off,
3794                             (wb_, o, i) -> wb_.get(o + i * 1));
3795                 });
3796     }
3797 
3798     abstract
3799     ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
3800     @ForceInline
3801     final
3802     <M extends VectorMask<Byte>>
3803     ByteVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3804         ByteSpecies vsp = vspecies();
3805         m.check(vsp);
3806         return ScopedMemoryAccess.loadFromByteBufferMasked(
3807                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3808                 bb, offset, m, vsp,
3809                 (buf, off, s, vm) -> {
3810                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3811                     return s.ldOp(wb, off, vm,
3812                             (wb_, o, i) -> wb_.get(o + i * 1));
3813                 });
3814     }
3815 
3816     // Unchecked storing operations in native byte order.
3817     // Caller is responsible for applying index checks, masking, and
3818     // byte swapping.
3819 
3820     abstract
3821     void intoArray0(byte[] a, int offset);
3822     @ForceInline
3823     final
3824     void intoArray0Template(byte[] a, int offset) {
3825         ByteSpecies vsp = vspecies();
3826         VectorSupport.store(
3827             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3828             a, arrayAddress(a, offset),
3829             this, a, offset,
3830             (arr, off, v)
3831             -> v.stOp(arr, off,
3832                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3833     }
3834 
3835     abstract
3836     void intoArray0(byte[] a, int offset, VectorMask<Byte> m);
3837     @ForceInline
3838     final
3839     <M extends VectorMask<Byte>>
3840     void intoArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3841         m.check(species());
3842         ByteSpecies vsp = vspecies();
3843         VectorSupport.storeMasked(
3844             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3845             a, arrayAddress(a, offset),
3846             this, m, a, offset,
3847             (arr, off, v, vm)
3848             -> v.stOp(arr, off, vm,
3849                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3850     }
3851 
3852 
3853     abstract
3854     void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m);
3855     @ForceInline
3856     final
3857     <M extends VectorMask<Byte>>
3858     void intoBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m) {
3859         m.check(species());
3860         ByteSpecies vsp = vspecies();
3861         ByteVector normalized = this.and((byte) 1);
3862         VectorSupport.storeMasked(
3863             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3864             a, booleanArrayAddress(a, offset),
3865             normalized, m, a, offset,
3866             (arr, off, v, vm)
3867             -> v.stOp(arr, off, vm,
3868                       (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
3869     }
3870 
3871     abstract
3872     void intoByteArray0(byte[] a, int offset);
3873     @ForceInline
3874     final
3875     void intoByteArray0Template(byte[] a, int offset) {
3876         ByteSpecies vsp = vspecies();
3877         VectorSupport.store(
3878             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3879             a, byteArrayAddress(a, offset),
3880             this, a, offset,
3881             (arr, off, v) -> {
3882                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3883                 v.stOp(wb, off,
3884                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
3885             });
3886     }
3887 
3888     abstract
3889     void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m);
3890     @ForceInline
3891     final
3892     <M extends VectorMask<Byte>>
3893     void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3894         ByteSpecies vsp = vspecies();
3895         m.check(vsp);
3896         VectorSupport.storeMasked(
3897             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3898             a, byteArrayAddress(a, offset),
3899             this, m, a, offset,
3900             (arr, off, v, vm) -> {
3901                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3902                 v.stOp(wb, off, vm,
3903                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
3904             });
3905     }
3906 
3907     @ForceInline
3908     final
3909     void intoByteBuffer0(ByteBuffer bb, int offset) {
3910         ByteSpecies vsp = vspecies();
3911         ScopedMemoryAccess.storeIntoByteBuffer(
3912                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3913                 this, bb, offset,
3914                 (buf, off, v) -> {
3915                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3916                     v.stOp(wb, off,
3917                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3918                 });
3919     }
3920 
3921     abstract
3922     void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
3923     @ForceInline
3924     final
3925     <M extends VectorMask<Byte>>
3926     void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3927         ByteSpecies vsp = vspecies();
3928         m.check(vsp);
3929         ScopedMemoryAccess.storeIntoByteBufferMasked(
3930                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3931                 this, m, bb, offset,
3932                 (buf, off, v, vm) -> {
3933                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3934                     v.stOp(wb, off, vm,
3935                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3936                 });
3937     }
3938 
3939 
3940     // End of low-level memory operations.
3941 
3942     private static
3943     void checkMaskFromIndexSize(int offset,
3944                                 ByteSpecies vsp,
3945                                 VectorMask<Byte> m,
3946                                 int scale,
3947                                 int limit) {
3948         ((AbstractMask<Byte>)m)
3949             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3950     }
3951 
3952     @ForceInline
3953     private void conditionalStoreNYI(int offset,
3954                                      ByteSpecies vsp,
3955                                      VectorMask<Byte> m,
3956                                      int scale,
3957                                      int limit) {
3958         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3959             String msg =

4246             byte[] res = new byte[laneCount()];
4247             boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
4248             for (int i = 0; i < res.length; i++) {
4249                 if (mbits[i]) {
4250                     res[i] = f.apply(i);
4251                 }
4252             }
4253             return dummyVector().vectorFactory(res);
4254         }
4255 
4256         /*package-private*/
4257         @ForceInline
4258         <M> ByteVector ldOp(M memory, int offset,
4259                                       FLdOp<M> f) {
4260             return dummyVector().ldOp(memory, offset, f);
4261         }
4262 
4263         /*package-private*/
4264         @ForceInline
4265         <M> ByteVector ldOp(M memory, int offset,
4266                                       VectorMask<Byte> m,
4267                                       FLdOp<M> f) {
4268             return dummyVector().ldOp(memory, offset, m, f);
4269         }
4270 
4271         /*package-private*/
4272         @ForceInline
4273         <M> void stOp(M memory, int offset, FStOp<M> f) {
4274             dummyVector().stOp(memory, offset, f);
4275         }
4276 
4277         /*package-private*/
4278         @ForceInline
4279         <M> void stOp(M memory, int offset,
4280                       AbstractMask<Byte> m,
4281                       FStOp<M> f) {
4282             dummyVector().stOp(memory, offset, m, f);
4283         }
4284 
4285         // N.B. Make sure these constant vectors and
4286         // masks load up correctly into registers.
< prev index next >