1 /*
   2  * Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.Function;
  33 import java.util.function.UnaryOperator;
  34 
  35 import jdk.internal.misc.ScopedMemoryAccess;
  36 import jdk.internal.misc.Unsafe;
  37 import jdk.internal.vm.annotation.ForceInline;
  38 import jdk.internal.vm.vector.VectorSupport;
  39 
  40 import static jdk.internal.vm.vector.VectorSupport.*;
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 
  43 import static jdk.incubator.vector.VectorOperators.*;
  44 
  45 // -- This file was mechanically generated: Do not edit! -- //
  46 
  47 /**
  48  * A specialized {@link Vector} representing an ordered immutable sequence of
  49  * {@code byte} values.
  50  */
  51 @SuppressWarnings("cast")  // warning: redundant cast
  52 public abstract class ByteVector extends AbstractVector<Byte> {
  53 
  54     ByteVector(byte[] vec) {
  55         super(vec);
  56     }
  57 
  58     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  59 
  60     @ForceInline
  61     static int opCode(Operator op) {
  62         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  63     }
  64     @ForceInline
  65     static int opCode(Operator op, int requireKind) {
  66         requireKind |= VO_OPCODE_VALID;
  67         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  68     }
  69     @ForceInline
  70     static boolean opKind(Operator op, int bit) {
  71         return VectorOperators.opKind(op, bit);
  72     }
  73 
  74     // Virtualized factories and operators,
  75     // coded with portable definitions.
  76     // These are all @ForceInline in case
  77     // they need to be used performantly.
  78     // The various shape-specific subclasses
  79     // also specialize them by wrapping
  80     // them in a call like this:
  81     //    return (Byte128Vector)
  82     //       super.bOp((Byte128Vector) o);
  83     // The purpose of that is to forcibly inline
  84     // the generic definition from this file
  85     // into a sharply type- and size-specific
  86     // wrapper in the subclass file, so that
  87     // the JIT can specialize the code.
  88     // The code is only inlined and expanded
  89     // if it gets hot.  Think of it as a cheap
  90     // and lazy version of C++ templates.
  91 
  92     // Virtualized getter
  93 
  94     /*package-private*/
  95     abstract byte[] vec();
  96 
  97     // Virtualized constructors
  98 
  99     /**
 100      * Build a vector directly using my own constructor.
 101      * It is an error if the array is aliased elsewhere.
 102      */
 103     /*package-private*/
 104     abstract ByteVector vectorFactory(byte[] vec);
 105 
 106     /**
 107      * Build a mask directly using my species.
 108      * It is an error if the array is aliased elsewhere.
 109      */
 110     /*package-private*/
 111     @ForceInline
 112     final
 113     AbstractMask<Byte> maskFactory(boolean[] bits) {
 114         return vspecies().maskFactory(bits);
 115     }
 116 
 117     // Constant loader (takes dummy as vector arg)
 118     interface FVOp {
 119         byte apply(int i);
 120     }
 121 
 122     /*package-private*/
 123     @ForceInline
 124     final
 125     ByteVector vOp(FVOp f) {
 126         byte[] res = new byte[length()];
 127         for (int i = 0; i < res.length; i++) {
 128             res[i] = f.apply(i);
 129         }
 130         return vectorFactory(res);
 131     }
 132 
 133     @ForceInline
 134     final
 135     ByteVector vOp(VectorMask<Byte> m, FVOp f) {
 136         byte[] res = new byte[length()];
 137         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 138         for (int i = 0; i < res.length; i++) {
 139             if (mbits[i]) {
 140                 res[i] = f.apply(i);
 141             }
 142         }
 143         return vectorFactory(res);
 144     }
 145 
 146     // Unary operator
 147 
 148     /*package-private*/
 149     interface FUnOp {
 150         byte apply(int i, byte a);
 151     }
 152 
 153     /*package-private*/
 154     abstract
 155     ByteVector uOp(FUnOp f);
 156     @ForceInline
 157     final
 158     ByteVector uOpTemplate(FUnOp f) {
 159         byte[] vec = vec();
 160         byte[] res = new byte[length()];
 161         for (int i = 0; i < res.length; i++) {
 162             res[i] = f.apply(i, vec[i]);
 163         }
 164         return vectorFactory(res);
 165     }
 166 
 167     /*package-private*/
 168     abstract
 169     ByteVector uOp(VectorMask<Byte> m,
 170                              FUnOp f);
 171     @ForceInline
 172     final
 173     ByteVector uOpTemplate(VectorMask<Byte> m,
 174                                      FUnOp f) {
 175         if (m == null) {
 176             return uOpTemplate(f);
 177         }
 178         byte[] vec = vec();
 179         byte[] res = new byte[length()];
 180         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 181         for (int i = 0; i < res.length; i++) {
 182             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 183         }
 184         return vectorFactory(res);
 185     }
 186 
 187     // Binary operator
 188 
 189     /*package-private*/
 190     interface FBinOp {
 191         byte apply(int i, byte a, byte b);
 192     }
 193 
 194     /*package-private*/
 195     abstract
 196     ByteVector bOp(Vector<Byte> o,
 197                              FBinOp f);
 198     @ForceInline
 199     final
 200     ByteVector bOpTemplate(Vector<Byte> o,
 201                                      FBinOp f) {
 202         byte[] res = new byte[length()];
 203         byte[] vec1 = this.vec();
 204         byte[] vec2 = ((ByteVector)o).vec();
 205         for (int i = 0; i < res.length; i++) {
 206             res[i] = f.apply(i, vec1[i], vec2[i]);
 207         }
 208         return vectorFactory(res);
 209     }
 210 
 211     /*package-private*/
 212     abstract
 213     ByteVector bOp(Vector<Byte> o,
 214                              VectorMask<Byte> m,
 215                              FBinOp f);
 216     @ForceInline
 217     final
 218     ByteVector bOpTemplate(Vector<Byte> o,
 219                                      VectorMask<Byte> m,
 220                                      FBinOp f) {
 221         if (m == null) {
 222             return bOpTemplate(o, f);
 223         }
 224         byte[] res = new byte[length()];
 225         byte[] vec1 = this.vec();
 226         byte[] vec2 = ((ByteVector)o).vec();
 227         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 228         for (int i = 0; i < res.length; i++) {
 229             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 230         }
 231         return vectorFactory(res);
 232     }
 233 
 234     // Ternary operator
 235 
 236     /*package-private*/
 237     interface FTriOp {
 238         byte apply(int i, byte a, byte b, byte c);
 239     }
 240 
 241     /*package-private*/
 242     abstract
 243     ByteVector tOp(Vector<Byte> o1,
 244                              Vector<Byte> o2,
 245                              FTriOp f);
 246     @ForceInline
 247     final
 248     ByteVector tOpTemplate(Vector<Byte> o1,
 249                                      Vector<Byte> o2,
 250                                      FTriOp f) {
 251         byte[] res = new byte[length()];
 252         byte[] vec1 = this.vec();
 253         byte[] vec2 = ((ByteVector)o1).vec();
 254         byte[] vec3 = ((ByteVector)o2).vec();
 255         for (int i = 0; i < res.length; i++) {
 256             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 257         }
 258         return vectorFactory(res);
 259     }
 260 
 261     /*package-private*/
 262     abstract
 263     ByteVector tOp(Vector<Byte> o1,
 264                              Vector<Byte> o2,
 265                              VectorMask<Byte> m,
 266                              FTriOp f);
 267     @ForceInline
 268     final
 269     ByteVector tOpTemplate(Vector<Byte> o1,
 270                                      Vector<Byte> o2,
 271                                      VectorMask<Byte> m,
 272                                      FTriOp f) {
 273         if (m == null) {
 274             return tOpTemplate(o1, o2, f);
 275         }
 276         byte[] res = new byte[length()];
 277         byte[] vec1 = this.vec();
 278         byte[] vec2 = ((ByteVector)o1).vec();
 279         byte[] vec3 = ((ByteVector)o2).vec();
 280         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 281         for (int i = 0; i < res.length; i++) {
 282             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 283         }
 284         return vectorFactory(res);
 285     }
 286 
 287     // Reduction operator
 288 
 289     /*package-private*/
 290     abstract
 291     byte rOp(byte v, VectorMask<Byte> m, FBinOp f);
 292 
 293     @ForceInline
 294     final
 295     byte rOpTemplate(byte v, VectorMask<Byte> m, FBinOp f) {
 296         if (m == null) {
 297             return rOpTemplate(v, f);
 298         }
 299         byte[] vec = vec();
 300         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 301         for (int i = 0; i < vec.length; i++) {
 302             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
 303         }
 304         return v;
 305     }
 306 
 307     @ForceInline
 308     final
 309     byte rOpTemplate(byte v, FBinOp f) {
 310         byte[] vec = vec();
 311         for (int i = 0; i < vec.length; i++) {
 312             v = f.apply(i, v, vec[i]);
 313         }
 314         return v;
 315     }
 316 
 317     // Memory reference
 318 
 319     /*package-private*/
 320     interface FLdOp<M> {
 321         byte apply(M memory, int offset, int i);
 322     }
 323 
 324     /*package-private*/
 325     @ForceInline
 326     final
 327     <M> ByteVector ldOp(M memory, int offset,
 328                                   FLdOp<M> f) {
 329         //dummy; no vec = vec();
 330         byte[] res = new byte[length()];
 331         for (int i = 0; i < res.length; i++) {
 332             res[i] = f.apply(memory, offset, i);
 333         }
 334         return vectorFactory(res);
 335     }
 336 
 337     /*package-private*/
 338     @ForceInline
 339     final
 340     <M> ByteVector ldOp(M memory, int offset,
 341                                   VectorMask<Byte> m,
 342                                   FLdOp<M> f) {
 343         //byte[] vec = vec();
 344         byte[] res = new byte[length()];
 345         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 346         for (int i = 0; i < res.length; i++) {
 347             if (mbits[i]) {
 348                 res[i] = f.apply(memory, offset, i);
 349             }
 350         }
 351         return vectorFactory(res);
 352     }
 353 
 354     interface FStOp<M> {
 355         void apply(M memory, int offset, int i, byte a);
 356     }
 357 
 358     /*package-private*/
 359     @ForceInline
 360     final
 361     <M> void stOp(M memory, int offset,
 362                   FStOp<M> f) {
 363         byte[] vec = vec();
 364         for (int i = 0; i < vec.length; i++) {
 365             f.apply(memory, offset, i, vec[i]);
 366         }
 367     }
 368 
 369     /*package-private*/
 370     @ForceInline
 371     final
 372     <M> void stOp(M memory, int offset,
 373                   VectorMask<Byte> m,
 374                   FStOp<M> f) {
 375         byte[] vec = vec();
 376         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 377         for (int i = 0; i < vec.length; i++) {
 378             if (mbits[i]) {
 379                 f.apply(memory, offset, i, vec[i]);
 380             }
 381         }
 382     }
 383 
 384     // Binary test
 385 
 386     /*package-private*/
 387     interface FBinTest {
 388         boolean apply(int cond, int i, byte a, byte b);
 389     }
 390 
 391     /*package-private*/
 392     @ForceInline
 393     final
 394     AbstractMask<Byte> bTest(int cond,
 395                                   Vector<Byte> o,
 396                                   FBinTest f) {
 397         byte[] vec1 = vec();
 398         byte[] vec2 = ((ByteVector)o).vec();
 399         boolean[] bits = new boolean[length()];
 400         for (int i = 0; i < length(); i++){
 401             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 402         }
 403         return maskFactory(bits);
 404     }
 405 
 406     /*package-private*/
 407     @ForceInline
 408     static byte rotateLeft(byte a, int n) {
 409         return (byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (Byte.SIZE - (n & Byte.SIZE-1))));
 410     }
 411 
 412     /*package-private*/
 413     @ForceInline
 414     static byte rotateRight(byte a, int n) {
 415         return (byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (Byte.SIZE - (n & Byte.SIZE-1))));
 416     }
 417 
 418     /*package-private*/
 419     @Override
 420     abstract ByteSpecies vspecies();
 421 
 422     /*package-private*/
 423     @ForceInline
 424     static long toBits(byte e) {
 425         return  e;
 426     }
 427 
 428     /*package-private*/
 429     @ForceInline
 430     static byte fromBits(long bits) {
 431         return ((byte)bits);
 432     }
 433 
 434     // Static factories (other than memory operations)
 435 
 436     // Note: A surprising behavior in javadoc
 437     // sometimes makes a lone /** {@inheritDoc} */
 438     // comment drop the method altogether,
 439     // apparently if the method mentions an
 440     // parameter or return type of Vector<Byte>
 441     // instead of Vector<E> as originally specified.
 442     // Adding an empty HTML fragment appears to
 443     // nudge javadoc into providing the desired
 444     // inherited documentation.  We use the HTML
 445     // comment <!--workaround--> for this.
 446 
 447     /**
 448      * Returns a vector of the given species
 449      * where all lane elements are set to
 450      * zero, the default primitive value.
 451      *
 452      * @param species species of the desired zero vector
 453      * @return a zero vector
 454      */
 455     @ForceInline
 456     public static ByteVector zero(VectorSpecies<Byte> species) {
 457         ByteSpecies vsp = (ByteSpecies) species;
 458         return VectorSupport.fromBitsCoerced(vsp.vectorType(), byte.class, species.length(),
 459                                 0, MODE_BROADCAST, vsp,
 460                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 461     }
 462 
 463     /**
 464      * Returns a vector of the same species as this one
 465      * where all lane elements are set to
 466      * the primitive value {@code e}.
 467      *
 468      * The contents of the current vector are discarded;
 469      * only the species is relevant to this operation.
 470      *
 471      * <p> This method returns the value of this expression:
 472      * {@code ByteVector.broadcast(this.species(), e)}.
 473      *
 474      * @apiNote
 475      * Unlike the similar method named {@code broadcast()}
 476      * in the supertype {@code Vector}, this method does not
 477      * need to validate its argument, and cannot throw
 478      * {@code IllegalArgumentException}.  This method is
 479      * therefore preferable to the supertype method.
 480      *
 481      * @param e the value to broadcast
 482      * @return a vector where all lane elements are set to
 483      *         the primitive value {@code e}
 484      * @see #broadcast(VectorSpecies,long)
 485      * @see Vector#broadcast(long)
 486      * @see VectorSpecies#broadcast(long)
 487      */
 488     public abstract ByteVector broadcast(byte e);
 489 
 490     /**
 491      * Returns a vector of the given species
 492      * where all lane elements are set to
 493      * the primitive value {@code e}.
 494      *
 495      * @param species species of the desired vector
 496      * @param e the value to broadcast
 497      * @return a vector where all lane elements are set to
 498      *         the primitive value {@code e}
 499      * @see #broadcast(long)
 500      * @see Vector#broadcast(long)
 501      * @see VectorSpecies#broadcast(long)
 502      */
 503     @ForceInline
 504     public static ByteVector broadcast(VectorSpecies<Byte> species, byte e) {
 505         ByteSpecies vsp = (ByteSpecies) species;
 506         return vsp.broadcast(e);
 507     }
 508 
 509     /*package-private*/
 510     @ForceInline
 511     final ByteVector broadcastTemplate(byte e) {
 512         ByteSpecies vsp = vspecies();
 513         return vsp.broadcast(e);
 514     }
 515 
 516     /**
 517      * {@inheritDoc} <!--workaround-->
 518      * @apiNote
 519      * When working with vector subtypes like {@code ByteVector},
 520      * {@linkplain #broadcast(byte) the more strongly typed method}
 521      * is typically selected.  It can be explicitly selected
 522      * using a cast: {@code v.broadcast((byte)e)}.
 523      * The two expressions will produce numerically identical results.
 524      */
 525     @Override
 526     public abstract ByteVector broadcast(long e);
 527 
 528     /**
 529      * Returns a vector of the given species
 530      * where all lane elements are set to
 531      * the primitive value {@code e}.
 532      *
 533      * The {@code long} value must be accurately representable
 534      * by the {@code ETYPE} of the vector species, so that
 535      * {@code e==(long)(ETYPE)e}.
 536      *
 537      * @param species species of the desired vector
 538      * @param e the value to broadcast
 539      * @return a vector where all lane elements are set to
 540      *         the primitive value {@code e}
 541      * @throws IllegalArgumentException
 542      *         if the given {@code long} value cannot
 543      *         be represented by the vector's {@code ETYPE}
 544      * @see #broadcast(VectorSpecies,byte)
 545      * @see VectorSpecies#checkValue(long)
 546      */
 547     @ForceInline
 548     public static ByteVector broadcast(VectorSpecies<Byte> species, long e) {
 549         ByteSpecies vsp = (ByteSpecies) species;
 550         return vsp.broadcast(e);
 551     }
 552 
 553     /*package-private*/
 554     @ForceInline
 555     final ByteVector broadcastTemplate(long e) {
 556         return vspecies().broadcast(e);
 557     }
 558 
 559     // Unary lanewise support
 560 
 561     /**
 562      * {@inheritDoc} <!--workaround-->
 563      */
 564     public abstract
 565     ByteVector lanewise(VectorOperators.Unary op);
 566 
 567     @ForceInline
 568     final
 569     ByteVector lanewiseTemplate(VectorOperators.Unary op) {
 570         if (opKind(op, VO_SPECIAL)) {
 571             if (op == ZOMO) {
 572                 return blend(broadcast(-1), compare(NE, 0));
 573             }
 574             if (op == NOT) {
 575                 return broadcast(-1).lanewise(XOR, this);
 576             }
 577         }
 578         int opc = opCode(op);
 579         return VectorSupport.unaryOp(
 580             opc, getClass(), null, byte.class, length(),
 581             this, null,
 582             UN_IMPL.find(op, opc, ByteVector::unaryOperations));
 583     }
 584 
 585     /**
 586      * {@inheritDoc} <!--workaround-->
 587      */
 588     @Override
 589     public abstract
 590     ByteVector lanewise(VectorOperators.Unary op,
 591                                   VectorMask<Byte> m);
 592     @ForceInline
 593     final
 594     ByteVector lanewiseTemplate(VectorOperators.Unary op,
 595                                           Class<? extends VectorMask<Byte>> maskClass,
 596                                           VectorMask<Byte> m) {
 597         m.check(maskClass, this);
 598         if (opKind(op, VO_SPECIAL)) {
 599             if (op == ZOMO) {
 600                 return blend(broadcast(-1), compare(NE, 0, m));
 601             }
 602             if (op == NOT) {
 603                 return lanewise(XOR, broadcast(-1), m);
 604             }
 605         }
 606         int opc = opCode(op);
 607         return VectorSupport.unaryOp(
 608             opc, getClass(), maskClass, byte.class, length(),
 609             this, m,
 610             UN_IMPL.find(op, opc, ByteVector::unaryOperations));
 611     }
 612 
 613     private static final
 614     ImplCache<Unary, UnaryOperation<ByteVector, VectorMask<Byte>>>
 615         UN_IMPL = new ImplCache<>(Unary.class, ByteVector.class);
 616 
 617     private static UnaryOperation<ByteVector, VectorMask<Byte>> unaryOperations(int opc_) {
 618         switch (opc_) {
 619             case VECTOR_OP_NEG: return (v0, m) ->
 620                     v0.uOp(m, (i, a) -> (byte) -a);
 621             case VECTOR_OP_ABS: return (v0, m) ->
 622                     v0.uOp(m, (i, a) -> (byte) Math.abs(a));
 623             default: return null;
 624         }
 625     }
 626 
 627     // Binary lanewise support
 628 
 629     /**
 630      * {@inheritDoc} <!--workaround-->
 631      * @see #lanewise(VectorOperators.Binary,byte)
 632      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 633      */
 634     @Override
 635     public abstract
 636     ByteVector lanewise(VectorOperators.Binary op,
 637                                   Vector<Byte> v);
 638     @ForceInline
 639     final
 640     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 641                                           Vector<Byte> v) {
 642         ByteVector that = (ByteVector) v;
 643         that.check(this);
 644 
 645         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 646             if (op == FIRST_NONZERO) {
 647                 // FIXME: Support this in the JIT.
 648                 VectorMask<Byte> thisNZ
 649                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 650                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 651                 op = OR_UNCHECKED;
 652             }
 653             if (opKind(op, VO_SHIFT)) {
 654                 // As per shift specification for Java, mask the shift count.
 655                 // This allows the JIT to ignore some ISA details.
 656                 that = that.lanewise(AND, SHIFT_MASK);
 657             }
 658             if (op == AND_NOT) {
 659                 // FIXME: Support this in the JIT.
 660                 that = that.lanewise(NOT);
 661                 op = AND;
 662             } else if (op == DIV) {
 663                 VectorMask<Byte> eqz = that.eq((byte) 0);
 664                 if (eqz.anyTrue()) {
 665                     throw that.divZeroException();
 666                 }
 667             }
 668         }
 669 
 670         int opc = opCode(op);
 671         return VectorSupport.binaryOp(
 672             opc, getClass(), null, byte.class, length(),
 673             this, that, null,
 674             BIN_IMPL.find(op, opc, ByteVector::binaryOperations));
 675     }
 676 
 677     /**
 678      * {@inheritDoc} <!--workaround-->
 679      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 680      */
 681     @Override
 682     public abstract
 683     ByteVector lanewise(VectorOperators.Binary op,
 684                                   Vector<Byte> v,
 685                                   VectorMask<Byte> m);
 686     @ForceInline
 687     final
 688     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 689                                           Class<? extends VectorMask<Byte>> maskClass,
 690                                           Vector<Byte> v, VectorMask<Byte> m) {
 691         ByteVector that = (ByteVector) v;
 692         that.check(this);
 693         m.check(maskClass, this);
 694 
 695         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 696             if (op == FIRST_NONZERO) {
 697                 // FIXME: Support this in the JIT.
 698                 VectorMask<Byte> thisNZ
 699                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 700                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 701                 op = OR_UNCHECKED;
 702             }
 703             if (opKind(op, VO_SHIFT)) {
 704                 // As per shift specification for Java, mask the shift count.
 705                 // This allows the JIT to ignore some ISA details.
 706                 that = that.lanewise(AND, SHIFT_MASK);
 707             }
 708             if (op == AND_NOT) {
 709                 // FIXME: Support this in the JIT.
 710                 that = that.lanewise(NOT);
 711                 op = AND;
 712             } else if (op == DIV) {
 713                 VectorMask<Byte> eqz = that.eq((byte)0);
 714                 if (eqz.and(m).anyTrue()) {
 715                     throw that.divZeroException();
 716                 }
 717                 // suppress div/0 exceptions in unset lanes
 718                 that = that.lanewise(NOT, eqz);
 719             }
 720         }
 721 
 722         int opc = opCode(op);
 723         return VectorSupport.binaryOp(
 724             opc, getClass(), maskClass, byte.class, length(),
 725             this, that, m,
 726             BIN_IMPL.find(op, opc, ByteVector::binaryOperations));
 727     }
 728 
 729     private static final
 730     ImplCache<Binary, BinaryOperation<ByteVector, VectorMask<Byte>>>
 731         BIN_IMPL = new ImplCache<>(Binary.class, ByteVector.class);
 732 
 733     private static BinaryOperation<ByteVector, VectorMask<Byte>> binaryOperations(int opc_) {
 734         switch (opc_) {
 735             case VECTOR_OP_ADD: return (v0, v1, vm) ->
 736                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a + b));
 737             case VECTOR_OP_SUB: return (v0, v1, vm) ->
 738                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a - b));
 739             case VECTOR_OP_MUL: return (v0, v1, vm) ->
 740                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a * b));
 741             case VECTOR_OP_DIV: return (v0, v1, vm) ->
 742                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a / b));
 743             case VECTOR_OP_MAX: return (v0, v1, vm) ->
 744                     v0.bOp(v1, vm, (i, a, b) -> (byte)Math.max(a, b));
 745             case VECTOR_OP_MIN: return (v0, v1, vm) ->
 746                     v0.bOp(v1, vm, (i, a, b) -> (byte)Math.min(a, b));
 747             case VECTOR_OP_AND: return (v0, v1, vm) ->
 748                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a & b));
 749             case VECTOR_OP_OR: return (v0, v1, vm) ->
 750                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a | b));
 751             case VECTOR_OP_XOR: return (v0, v1, vm) ->
 752                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a ^ b));
 753             case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
 754                     v0.bOp(v1, vm, (i, a, n) -> (byte)(a << n));
 755             case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
 756                     v0.bOp(v1, vm, (i, a, n) -> (byte)(a >> n));
 757             case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
 758                     v0.bOp(v1, vm, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 759             case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
 760                     v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
 761             case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
 762                     v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
 763             default: return null;
 764         }
 765     }
 766 
 767     // FIXME: Maybe all of the public final methods in this file (the
 768     // simple ones that just call lanewise) should be pushed down to
 769     // the X-VectorBits template.  They can't optimize properly at
 770     // this level, and must rely on inlining.  Does it work?
 771     // (If it works, of course keep the code here.)
 772 
 773     /**
 774      * Combines the lane values of this vector
 775      * with the value of a broadcast scalar.
 776      *
 777      * This is a lane-wise binary operation which applies
 778      * the selected operation to each lane.
 779      * The return value will be equal to this expression:
 780      * {@code this.lanewise(op, this.broadcast(e))}.
 781      *
 782      * @param op the operation used to process lane values
 783      * @param e the input scalar
 784      * @return the result of applying the operation lane-wise
 785      *         to the two input vectors
 786      * @throws UnsupportedOperationException if this vector does
 787      *         not support the requested operation
 788      * @see #lanewise(VectorOperators.Binary,Vector)
 789      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 790      */
 791     @ForceInline
 792     public final
 793     ByteVector lanewise(VectorOperators.Binary op,
 794                                   byte e) {
 795         if (opKind(op, VO_SHIFT) && (byte)(int)e == e) {
 796             return lanewiseShift(op, (int) e);
 797         }
 798         if (op == AND_NOT) {
 799             op = AND; e = (byte) ~e;
 800         }
 801         return lanewise(op, broadcast(e));
 802     }
 803 
 804     /**
 805      * Combines the lane values of this vector
 806      * with the value of a broadcast scalar,
 807      * with selection of lane elements controlled by a mask.
 808      *
 809      * This is a masked lane-wise binary operation which applies
 810      * the selected operation to each lane.
 811      * The return value will be equal to this expression:
 812      * {@code this.lanewise(op, this.broadcast(e), m)}.
 813      *
 814      * @param op the operation used to process lane values
 815      * @param e the input scalar
 816      * @param m the mask controlling lane selection
 817      * @return the result of applying the operation lane-wise
 818      *         to the input vector and the scalar
 819      * @throws UnsupportedOperationException if this vector does
 820      *         not support the requested operation
 821      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 822      * @see #lanewise(VectorOperators.Binary,byte)
 823      */
 824     @ForceInline
 825     public final
 826     ByteVector lanewise(VectorOperators.Binary op,
 827                                   byte e,
 828                                   VectorMask<Byte> m) {
 829         if (opKind(op, VO_SHIFT) && (byte)(int)e == e) {
 830             return lanewiseShift(op, (int) e, m);
 831         }
 832         if (op == AND_NOT) {
 833             op = AND; e = (byte) ~e;
 834         }
 835         return lanewise(op, broadcast(e), m);
 836     }
 837 
 838     /**
 839      * {@inheritDoc} <!--workaround-->
 840      * @apiNote
 841      * When working with vector subtypes like {@code ByteVector},
 842      * {@linkplain #lanewise(VectorOperators.Binary,byte)
 843      * the more strongly typed method}
 844      * is typically selected.  It can be explicitly selected
 845      * using a cast: {@code v.lanewise(op,(byte)e)}.
 846      * The two expressions will produce numerically identical results.
 847      */
 848     @ForceInline
 849     public final
 850     ByteVector lanewise(VectorOperators.Binary op,
 851                                   long e) {
 852         byte e1 = (byte) e;
 853         if ((long)e1 != e
 854             // allow shift ops to clip down their int parameters
 855             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 856             vspecies().checkValue(e);  // for exception
 857         }
 858         return lanewise(op, e1);
 859     }
 860 
 861     /**
 862      * {@inheritDoc} <!--workaround-->
 863      * @apiNote
 864      * When working with vector subtypes like {@code ByteVector},
 865      * {@linkplain #lanewise(VectorOperators.Binary,byte,VectorMask)
 866      * the more strongly typed method}
 867      * is typically selected.  It can be explicitly selected
 868      * using a cast: {@code v.lanewise(op,(byte)e,m)}.
 869      * The two expressions will produce numerically identical results.
 870      */
 871     @ForceInline
 872     public final
 873     ByteVector lanewise(VectorOperators.Binary op,
 874                                   long e, VectorMask<Byte> m) {
 875         byte e1 = (byte) e;
 876         if ((long)e1 != e
 877             // allow shift ops to clip down their int parameters
 878             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 879             vspecies().checkValue(e);  // for exception
 880         }
 881         return lanewise(op, e1, m);
 882     }
 883 
 884     /*package-private*/
 885     abstract ByteVector
 886     lanewiseShift(VectorOperators.Binary op, int e);
 887 
 888     /*package-private*/
 889     @ForceInline
 890     final ByteVector
 891     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 892         // Special handling for these.  FIXME: Refactor?
 893         assert(opKind(op, VO_SHIFT));
 894         // As per shift specification for Java, mask the shift count.
 895         e &= SHIFT_MASK;
 896         int opc = opCode(op);
 897         return VectorSupport.broadcastInt(
 898             opc, getClass(), null, byte.class, length(),
 899             this, e, null,
 900             BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations));
 901     }
 902 
 903     /*package-private*/
 904     abstract ByteVector
 905     lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Byte> m);
 906 
 907     /*package-private*/
 908     @ForceInline
 909     final ByteVector
 910     lanewiseShiftTemplate(VectorOperators.Binary op,
 911                           Class<? extends VectorMask<Byte>> maskClass,
 912                           int e, VectorMask<Byte> m) {
 913         m.check(maskClass, this);
 914         assert(opKind(op, VO_SHIFT));
 915         // As per shift specification for Java, mask the shift count.
 916         e &= SHIFT_MASK;
 917         int opc = opCode(op);
 918         return VectorSupport.broadcastInt(
 919             opc, getClass(), maskClass, byte.class, length(),
 920             this, e, m,
 921             BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations));
 922     }
 923 
 924     private static final
 925     ImplCache<Binary,VectorBroadcastIntOp<ByteVector, VectorMask<Byte>>> BIN_INT_IMPL
 926         = new ImplCache<>(Binary.class, ByteVector.class);
 927 
 928     private static VectorBroadcastIntOp<ByteVector, VectorMask<Byte>> broadcastIntOperations(int opc_) {
 929         switch (opc_) {
 930             case VECTOR_OP_LSHIFT: return (v, n, m) ->
 931                     v.uOp(m, (i, a) -> (byte)(a << n));
 932             case VECTOR_OP_RSHIFT: return (v, n, m) ->
 933                     v.uOp(m, (i, a) -> (byte)(a >> n));
 934             case VECTOR_OP_URSHIFT: return (v, n, m) ->
 935                     v.uOp(m, (i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 936             case VECTOR_OP_LROTATE: return (v, n, m) ->
 937                     v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
 938             case VECTOR_OP_RROTATE: return (v, n, m) ->
 939                     v.uOp(m, (i, a) -> rotateRight(a, (int)n));
 940             default: return null;
 941         }
 942     }
 943 
 944     // As per shift specification for Java, mask the shift count.
 945     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 946     // The latter two maskings go beyond the JLS, but seem reasonable
 947     // since our lane types are first-class types, not just dressed
 948     // up ints.
 949     private static final int SHIFT_MASK = (Byte.SIZE - 1);
 950     // Also simulate >>> on sub-word variables with a mask.
 951     private static final int LSHR_SETUP_MASK = ((1 << Byte.SIZE) - 1);
 952 
 953     // Ternary lanewise support
 954 
 955     // Ternary operators come in eight variations:
 956     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 957     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 958 
 959     // It is annoying to support all of these variations of masking
 960     // and broadcast, but it would be more surprising not to continue
 961     // the obvious pattern started by unary and binary.
 962 
 963    /**
 964      * {@inheritDoc} <!--workaround-->
 965      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
 966      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
 967      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
 968      * @see #lanewise(VectorOperators.Ternary,byte,byte)
 969      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
 970      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
 971      */
 972     @Override
 973     public abstract
 974     ByteVector lanewise(VectorOperators.Ternary op,
 975                                                   Vector<Byte> v1,
 976                                                   Vector<Byte> v2);
 977     @ForceInline
 978     final
 979     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
 980                                           Vector<Byte> v1,
 981                                           Vector<Byte> v2) {
 982         ByteVector that = (ByteVector) v1;
 983         ByteVector tother = (ByteVector) v2;
 984         // It's a word: https://www.dictionary.com/browse/tother
 985         // See also Chapter 11 of Dickens, Our Mutual Friend:
 986         // "Totherest Governor," replied Mr Riderhood...
 987         that.check(this);
 988         tother.check(this);
 989         if (op == BITWISE_BLEND) {
 990             // FIXME: Support this in the JIT.
 991             that = this.lanewise(XOR, that).lanewise(AND, tother);
 992             return this.lanewise(XOR, that);
 993         }
 994         int opc = opCode(op);
 995         return VectorSupport.ternaryOp(
 996             opc, getClass(), null, byte.class, length(),
 997             this, that, tother, null,
 998             TERN_IMPL.find(op, opc, ByteVector::ternaryOperations));
 999     }
1000 
1001     /**
1002      * {@inheritDoc} <!--workaround-->
1003      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1004      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
1005      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1006      */
1007     @Override
1008     public abstract
1009     ByteVector lanewise(VectorOperators.Ternary op,
1010                                   Vector<Byte> v1,
1011                                   Vector<Byte> v2,
1012                                   VectorMask<Byte> m);
1013     @ForceInline
1014     final
1015     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
1016                                           Class<? extends VectorMask<Byte>> maskClass,
1017                                           Vector<Byte> v1,
1018                                           Vector<Byte> v2,
1019                                           VectorMask<Byte> m) {
1020         ByteVector that = (ByteVector) v1;
1021         ByteVector tother = (ByteVector) v2;
1022         // It's a word: https://www.dictionary.com/browse/tother
1023         // See also Chapter 11 of Dickens, Our Mutual Friend:
1024         // "Totherest Governor," replied Mr Riderhood...
1025         that.check(this);
1026         tother.check(this);
1027         m.check(maskClass, this);
1028 
1029         if (op == BITWISE_BLEND) {
1030             // FIXME: Support this in the JIT.
1031             that = this.lanewise(XOR, that).lanewise(AND, tother);
1032             return this.lanewise(XOR, that, m);
1033         }
1034         int opc = opCode(op);
1035         return VectorSupport.ternaryOp(
1036             opc, getClass(), maskClass, byte.class, length(),
1037             this, that, tother, m,
1038             TERN_IMPL.find(op, opc, ByteVector::ternaryOperations));
1039     }
1040 
1041     private static final
1042     ImplCache<Ternary, TernaryOperation<ByteVector, VectorMask<Byte>>>
1043         TERN_IMPL = new ImplCache<>(Ternary.class, ByteVector.class);
1044 
1045     private static TernaryOperation<ByteVector, VectorMask<Byte>> ternaryOperations(int opc_) {
1046         switch (opc_) {
1047             default: return null;
1048         }
1049     }
1050 
1051     /**
1052      * Combines the lane values of this vector
1053      * with the values of two broadcast scalars.
1054      *
1055      * This is a lane-wise ternary operation which applies
1056      * the selected operation to each lane.
1057      * The return value will be equal to this expression:
1058      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
1059      *
1060      * @param op the operation used to combine lane values
1061      * @param e1 the first input scalar
1062      * @param e2 the second input scalar
1063      * @return the result of applying the operation lane-wise
1064      *         to the input vector and the scalars
1065      * @throws UnsupportedOperationException if this vector does
1066      *         not support the requested operation
1067      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1068      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1069      */
1070     @ForceInline
1071     public final
1072     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
1073                                   byte e1,
1074                                   byte e2) {
1075         return lanewise(op, broadcast(e1), broadcast(e2));
1076     }
1077 
1078     /**
1079      * Combines the lane values of this vector
1080      * with the values of two broadcast scalars,
1081      * with selection of lane elements controlled by a mask.
1082      *
1083      * This is a masked lane-wise ternary operation which applies
1084      * the selected operation to each lane.
1085      * The return value will be equal to this expression:
1086      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1087      *
1088      * @param op the operation used to combine lane values
1089      * @param e1 the first input scalar
1090      * @param e2 the second input scalar
1091      * @param m the mask controlling lane selection
1092      * @return the result of applying the operation lane-wise
1093      *         to the input vector and the scalars
1094      * @throws UnsupportedOperationException if this vector does
1095      *         not support the requested operation
1096      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1097      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1098      */
1099     @ForceInline
1100     public final
1101     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1102                                   byte e1,
1103                                   byte e2,
1104                                   VectorMask<Byte> m) {
1105         return lanewise(op, broadcast(e1), broadcast(e2), m);
1106     }
1107 
1108     /**
1109      * Combines the lane values of this vector
1110      * with the values of another vector and a broadcast scalar.
1111      *
1112      * This is a lane-wise ternary operation which applies
1113      * the selected operation to each lane.
1114      * The return value will be equal to this expression:
1115      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1116      *
1117      * @param op the operation used to combine lane values
1118      * @param v1 the other input vector
1119      * @param e2 the input scalar
1120      * @return the result of applying the operation lane-wise
1121      *         to the input vectors and the scalar
1122      * @throws UnsupportedOperationException if this vector does
1123      *         not support the requested operation
1124      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1125      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
1126      */
1127     @ForceInline
1128     public final
1129     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1130                                   Vector<Byte> v1,
1131                                   byte e2) {
1132         return lanewise(op, v1, broadcast(e2));
1133     }
1134 
1135     /**
1136      * Combines the lane values of this vector
1137      * with the values of another vector and a broadcast scalar,
1138      * with selection of lane elements controlled by a mask.
1139      *
1140      * This is a masked lane-wise ternary operation which applies
1141      * the selected operation to each lane.
1142      * The return value will be equal to this expression:
1143      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1144      *
1145      * @param op the operation used to combine lane values
1146      * @param v1 the other input vector
1147      * @param e2 the input scalar
1148      * @param m the mask controlling lane selection
1149      * @return the result of applying the operation lane-wise
1150      *         to the input vectors and the scalar
1151      * @throws UnsupportedOperationException if this vector does
1152      *         not support the requested operation
1153      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1154      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1155      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
1156      */
1157     @ForceInline
1158     public final
1159     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1160                                   Vector<Byte> v1,
1161                                   byte e2,
1162                                   VectorMask<Byte> m) {
1163         return lanewise(op, v1, broadcast(e2), m);
1164     }
1165 
1166     /**
1167      * Combines the lane values of this vector
1168      * with the values of another vector and a broadcast scalar.
1169      *
1170      * This is a lane-wise ternary operation which applies
1171      * the selected operation to each lane.
1172      * The return value will be equal to this expression:
1173      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1174      *
1175      * @param op the operation used to combine lane values
1176      * @param e1 the input scalar
1177      * @param v2 the other input vector
1178      * @return the result of applying the operation lane-wise
1179      *         to the input vectors and the scalar
1180      * @throws UnsupportedOperationException if this vector does
1181      *         not support the requested operation
1182      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1183      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1184      */
1185     @ForceInline
1186     public final
1187     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1188                                   byte e1,
1189                                   Vector<Byte> v2) {
1190         return lanewise(op, broadcast(e1), v2);
1191     }
1192 
1193     /**
1194      * Combines the lane values of this vector
1195      * with the values of another vector and a broadcast scalar,
1196      * with selection of lane elements controlled by a mask.
1197      *
1198      * This is a masked lane-wise ternary operation which applies
1199      * the selected operation to each lane.
1200      * The return value will be equal to this expression:
1201      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1202      *
1203      * @param op the operation used to combine lane values
1204      * @param e1 the input scalar
1205      * @param v2 the other input vector
1206      * @param m the mask controlling lane selection
1207      * @return the result of applying the operation lane-wise
1208      *         to the input vectors and the scalar
1209      * @throws UnsupportedOperationException if this vector does
1210      *         not support the requested operation
1211      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1212      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
1213      */
1214     @ForceInline
1215     public final
1216     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1217                                   byte e1,
1218                                   Vector<Byte> v2,
1219                                   VectorMask<Byte> m) {
1220         return lanewise(op, broadcast(e1), v2, m);
1221     }
1222 
1223     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1224     // https://en.wikipedia.org/wiki/Ogdoad
1225 
1226     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1227     //
1228     // These include masked and non-masked versions.
1229     // This subclass adds broadcast (masked or not).
1230 
1231     /**
1232      * {@inheritDoc} <!--workaround-->
1233      * @see #add(byte)
1234      */
1235     @Override
1236     @ForceInline
1237     public final ByteVector add(Vector<Byte> v) {
1238         return lanewise(ADD, v);
1239     }
1240 
1241     /**
1242      * Adds this vector to the broadcast of an input scalar.
1243      *
1244      * This is a lane-wise binary operation which applies
1245      * the primitive addition operation ({@code +}) to each lane.
1246      *
1247      * This method is also equivalent to the expression
1248      * {@link #lanewise(VectorOperators.Binary,byte)
1249      *    lanewise}{@code (}{@link VectorOperators#ADD
1250      *    ADD}{@code , e)}.
1251      *
1252      * @param e the input scalar
1253      * @return the result of adding each lane of this vector to the scalar
1254      * @see #add(Vector)
1255      * @see #broadcast(byte)
1256      * @see #add(byte,VectorMask)
1257      * @see VectorOperators#ADD
1258      * @see #lanewise(VectorOperators.Binary,Vector)
1259      * @see #lanewise(VectorOperators.Binary,byte)
1260      */
1261     @ForceInline
1262     public final
1263     ByteVector add(byte e) {
1264         return lanewise(ADD, e);
1265     }
1266 
1267     /**
1268      * {@inheritDoc} <!--workaround-->
1269      * @see #add(byte,VectorMask)
1270      */
1271     @Override
1272     @ForceInline
1273     public final ByteVector add(Vector<Byte> v,
1274                                           VectorMask<Byte> m) {
1275         return lanewise(ADD, v, m);
1276     }
1277 
1278     /**
1279      * Adds this vector to the broadcast of an input scalar,
1280      * selecting lane elements controlled by a mask.
1281      *
1282      * This is a masked lane-wise binary operation which applies
1283      * the primitive addition operation ({@code +}) to each lane.
1284      *
1285      * This method is also equivalent to the expression
1286      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1287      *    lanewise}{@code (}{@link VectorOperators#ADD
1288      *    ADD}{@code , s, m)}.
1289      *
1290      * @param e the input scalar
1291      * @param m the mask controlling lane selection
1292      * @return the result of adding each lane of this vector to the scalar
1293      * @see #add(Vector,VectorMask)
1294      * @see #broadcast(byte)
1295      * @see #add(byte)
1296      * @see VectorOperators#ADD
1297      * @see #lanewise(VectorOperators.Binary,Vector)
1298      * @see #lanewise(VectorOperators.Binary,byte)
1299      */
1300     @ForceInline
1301     public final ByteVector add(byte e,
1302                                           VectorMask<Byte> m) {
1303         return lanewise(ADD, e, m);
1304     }
1305 
1306     /**
1307      * {@inheritDoc} <!--workaround-->
1308      * @see #sub(byte)
1309      */
1310     @Override
1311     @ForceInline
1312     public final ByteVector sub(Vector<Byte> v) {
1313         return lanewise(SUB, v);
1314     }
1315 
1316     /**
1317      * Subtracts an input scalar from this vector.
1318      *
1319      * This is a masked lane-wise binary operation which applies
1320      * the primitive subtraction operation ({@code -}) to each lane.
1321      *
1322      * This method is also equivalent to the expression
1323      * {@link #lanewise(VectorOperators.Binary,byte)
1324      *    lanewise}{@code (}{@link VectorOperators#SUB
1325      *    SUB}{@code , e)}.
1326      *
1327      * @param e the input scalar
1328      * @return the result of subtracting the scalar from each lane of this vector
1329      * @see #sub(Vector)
1330      * @see #broadcast(byte)
1331      * @see #sub(byte,VectorMask)
1332      * @see VectorOperators#SUB
1333      * @see #lanewise(VectorOperators.Binary,Vector)
1334      * @see #lanewise(VectorOperators.Binary,byte)
1335      */
1336     @ForceInline
1337     public final ByteVector sub(byte e) {
1338         return lanewise(SUB, e);
1339     }
1340 
1341     /**
1342      * {@inheritDoc} <!--workaround-->
1343      * @see #sub(byte,VectorMask)
1344      */
1345     @Override
1346     @ForceInline
1347     public final ByteVector sub(Vector<Byte> v,
1348                                           VectorMask<Byte> m) {
1349         return lanewise(SUB, v, m);
1350     }
1351 
1352     /**
1353      * Subtracts an input scalar from this vector
1354      * under the control of a mask.
1355      *
1356      * This is a masked lane-wise binary operation which applies
1357      * the primitive subtraction operation ({@code -}) to each lane.
1358      *
1359      * This method is also equivalent to the expression
1360      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1361      *    lanewise}{@code (}{@link VectorOperators#SUB
1362      *    SUB}{@code , s, m)}.
1363      *
1364      * @param e the input scalar
1365      * @param m the mask controlling lane selection
1366      * @return the result of subtracting the scalar from each lane of this vector
1367      * @see #sub(Vector,VectorMask)
1368      * @see #broadcast(byte)
1369      * @see #sub(byte)
1370      * @see VectorOperators#SUB
1371      * @see #lanewise(VectorOperators.Binary,Vector)
1372      * @see #lanewise(VectorOperators.Binary,byte)
1373      */
1374     @ForceInline
1375     public final ByteVector sub(byte e,
1376                                           VectorMask<Byte> m) {
1377         return lanewise(SUB, e, m);
1378     }
1379 
1380     /**
1381      * {@inheritDoc} <!--workaround-->
1382      * @see #mul(byte)
1383      */
1384     @Override
1385     @ForceInline
1386     public final ByteVector mul(Vector<Byte> v) {
1387         return lanewise(MUL, v);
1388     }
1389 
1390     /**
1391      * Multiplies this vector by the broadcast of an input scalar.
1392      *
1393      * This is a lane-wise binary operation which applies
1394      * the primitive multiplication operation ({@code *}) to each lane.
1395      *
1396      * This method is also equivalent to the expression
1397      * {@link #lanewise(VectorOperators.Binary,byte)
1398      *    lanewise}{@code (}{@link VectorOperators#MUL
1399      *    MUL}{@code , e)}.
1400      *
1401      * @param e the input scalar
1402      * @return the result of multiplying this vector by the given scalar
1403      * @see #mul(Vector)
1404      * @see #broadcast(byte)
1405      * @see #mul(byte,VectorMask)
1406      * @see VectorOperators#MUL
1407      * @see #lanewise(VectorOperators.Binary,Vector)
1408      * @see #lanewise(VectorOperators.Binary,byte)
1409      */
1410     @ForceInline
1411     public final ByteVector mul(byte e) {
1412         return lanewise(MUL, e);
1413     }
1414 
1415     /**
1416      * {@inheritDoc} <!--workaround-->
1417      * @see #mul(byte,VectorMask)
1418      */
1419     @Override
1420     @ForceInline
1421     public final ByteVector mul(Vector<Byte> v,
1422                                           VectorMask<Byte> m) {
1423         return lanewise(MUL, v, m);
1424     }
1425 
1426     /**
1427      * Multiplies this vector by the broadcast of an input scalar,
1428      * selecting lane elements controlled by a mask.
1429      *
1430      * This is a masked lane-wise binary operation which applies
1431      * the primitive multiplication operation ({@code *}) to each lane.
1432      *
1433      * This method is also equivalent to the expression
1434      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1435      *    lanewise}{@code (}{@link VectorOperators#MUL
1436      *    MUL}{@code , s, m)}.
1437      *
1438      * @param e the input scalar
1439      * @param m the mask controlling lane selection
1440      * @return the result of muling each lane of this vector to the scalar
1441      * @see #mul(Vector,VectorMask)
1442      * @see #broadcast(byte)
1443      * @see #mul(byte)
1444      * @see VectorOperators#MUL
1445      * @see #lanewise(VectorOperators.Binary,Vector)
1446      * @see #lanewise(VectorOperators.Binary,byte)
1447      */
1448     @ForceInline
1449     public final ByteVector mul(byte e,
1450                                           VectorMask<Byte> m) {
1451         return lanewise(MUL, e, m);
1452     }
1453 
1454     /**
1455      * {@inheritDoc} <!--workaround-->
1456      * @apiNote If there is a zero divisor, {@code
1457      * ArithmeticException} will be thrown.
1458      */
1459     @Override
1460     @ForceInline
1461     public final ByteVector div(Vector<Byte> v) {
1462         return lanewise(DIV, v);
1463     }
1464 
1465     /**
1466      * Divides this vector by the broadcast of an input scalar.
1467      *
1468      * This is a lane-wise binary operation which applies
1469      * the primitive division operation ({@code /}) to each lane.
1470      *
1471      * This method is also equivalent to the expression
1472      * {@link #lanewise(VectorOperators.Binary,byte)
1473      *    lanewise}{@code (}{@link VectorOperators#DIV
1474      *    DIV}{@code , e)}.
1475      *
1476      * @apiNote If there is a zero divisor, {@code
1477      * ArithmeticException} will be thrown.
1478      *
1479      * @param e the input scalar
1480      * @return the result of dividing each lane of this vector by the scalar
1481      * @see #div(Vector)
1482      * @see #broadcast(byte)
1483      * @see #div(byte,VectorMask)
1484      * @see VectorOperators#DIV
1485      * @see #lanewise(VectorOperators.Binary,Vector)
1486      * @see #lanewise(VectorOperators.Binary,byte)
1487      */
1488     @ForceInline
1489     public final ByteVector div(byte e) {
1490         return lanewise(DIV, e);
1491     }
1492 
1493     /**
1494      * {@inheritDoc} <!--workaround-->
1495      * @see #div(byte,VectorMask)
1496      * @apiNote If there is a zero divisor, {@code
1497      * ArithmeticException} will be thrown.
1498      */
1499     @Override
1500     @ForceInline
1501     public final ByteVector div(Vector<Byte> v,
1502                                           VectorMask<Byte> m) {
1503         return lanewise(DIV, v, m);
1504     }
1505 
1506     /**
1507      * Divides this vector by the broadcast of an input scalar,
1508      * selecting lane elements controlled by a mask.
1509      *
1510      * This is a masked lane-wise binary operation which applies
1511      * the primitive division operation ({@code /}) to each lane.
1512      *
1513      * This method is also equivalent to the expression
1514      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1515      *    lanewise}{@code (}{@link VectorOperators#DIV
1516      *    DIV}{@code , s, m)}.
1517      *
1518      * @apiNote If there is a zero divisor, {@code
1519      * ArithmeticException} will be thrown.
1520      *
1521      * @param e the input scalar
1522      * @param m the mask controlling lane selection
1523      * @return the result of dividing each lane of this vector by the scalar
1524      * @see #div(Vector,VectorMask)
1525      * @see #broadcast(byte)
1526      * @see #div(byte)
1527      * @see VectorOperators#DIV
1528      * @see #lanewise(VectorOperators.Binary,Vector)
1529      * @see #lanewise(VectorOperators.Binary,byte)
1530      */
1531     @ForceInline
1532     public final ByteVector div(byte e,
1533                                           VectorMask<Byte> m) {
1534         return lanewise(DIV, e, m);
1535     }
1536 
1537     /// END OF FULL-SERVICE BINARY METHODS
1538 
1539     /// SECOND-TIER BINARY METHODS
1540     //
1541     // There are no masked versions.
1542 
1543     /**
1544      * {@inheritDoc} <!--workaround-->
1545      */
1546     @Override
1547     @ForceInline
1548     public final ByteVector min(Vector<Byte> v) {
1549         return lanewise(MIN, v);
1550     }
1551 
1552     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1553     /**
1554      * Computes the smaller of this vector and the broadcast of an input scalar.
1555      *
1556      * This is a lane-wise binary operation which applies the
1557      * operation {@code Math.min()} to each pair of
1558      * corresponding lane values.
1559      *
1560      * This method is also equivalent to the expression
1561      * {@link #lanewise(VectorOperators.Binary,byte)
1562      *    lanewise}{@code (}{@link VectorOperators#MIN
1563      *    MIN}{@code , e)}.
1564      *
1565      * @param e the input scalar
1566      * @return the result of multiplying this vector by the given scalar
1567      * @see #min(Vector)
1568      * @see #broadcast(byte)
1569      * @see VectorOperators#MIN
1570      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1571      */
1572     @ForceInline
1573     public final ByteVector min(byte e) {
1574         return lanewise(MIN, e);
1575     }
1576 
1577     /**
1578      * {@inheritDoc} <!--workaround-->
1579      */
1580     @Override
1581     @ForceInline
1582     public final ByteVector max(Vector<Byte> v) {
1583         return lanewise(MAX, v);
1584     }
1585 
1586     /**
1587      * Computes the larger of this vector and the broadcast of an input scalar.
1588      *
1589      * This is a lane-wise binary operation which applies the
1590      * operation {@code Math.max()} to each pair of
1591      * corresponding lane values.
1592      *
1593      * This method is also equivalent to the expression
1594      * {@link #lanewise(VectorOperators.Binary,byte)
1595      *    lanewise}{@code (}{@link VectorOperators#MAX
1596      *    MAX}{@code , e)}.
1597      *
1598      * @param e the input scalar
1599      * @return the result of multiplying this vector by the given scalar
1600      * @see #max(Vector)
1601      * @see #broadcast(byte)
1602      * @see VectorOperators#MAX
1603      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1604      */
1605     @ForceInline
1606     public final ByteVector max(byte e) {
1607         return lanewise(MAX, e);
1608     }
1609 
1610     // common bitwise operators: and, or, not (with scalar versions)
1611     /**
1612      * Computes the bitwise logical conjunction ({@code &})
1613      * of this vector and a second input vector.
1614      *
1615      * This is a lane-wise binary operation which applies the
1616      * the primitive bitwise "and" operation ({@code &})
1617      * to each pair of corresponding lane values.
1618      *
1619      * This method is also equivalent to the expression
1620      * {@link #lanewise(VectorOperators.Binary,Vector)
1621      *    lanewise}{@code (}{@link VectorOperators#AND
1622      *    AND}{@code , v)}.
1623      *
1624      * <p>
1625      * This is not a full-service named operation like
1626      * {@link #add(Vector) add}.  A masked version of
1627      * this operation is not directly available
1628      * but may be obtained via the masked version of
1629      * {@code lanewise}.
1630      *
1631      * @param v a second input vector
1632      * @return the bitwise {@code &} of this vector and the second input vector
1633      * @see #and(byte)
1634      * @see #or(Vector)
1635      * @see #not()
1636      * @see VectorOperators#AND
1637      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1638      */
1639     @ForceInline
1640     public final ByteVector and(Vector<Byte> v) {
1641         return lanewise(AND, v);
1642     }
1643 
1644     /**
1645      * Computes the bitwise logical conjunction ({@code &})
1646      * of this vector and a scalar.
1647      *
1648      * This is a lane-wise binary operation which applies the
1649      * the primitive bitwise "and" operation ({@code &})
1650      * to each pair of corresponding lane values.
1651      *
1652      * This method is also equivalent to the expression
1653      * {@link #lanewise(VectorOperators.Binary,Vector)
1654      *    lanewise}{@code (}{@link VectorOperators#AND
1655      *    AND}{@code , e)}.
1656      *
1657      * @param e an input scalar
1658      * @return the bitwise {@code &} of this vector and scalar
1659      * @see #and(Vector)
1660      * @see VectorOperators#AND
1661      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1662      */
1663     @ForceInline
1664     public final ByteVector and(byte e) {
1665         return lanewise(AND, e);
1666     }
1667 
1668     /**
1669      * Computes the bitwise logical disjunction ({@code |})
1670      * of this vector and a second input vector.
1671      *
1672      * This is a lane-wise binary operation which applies the
1673      * the primitive bitwise "or" operation ({@code |})
1674      * to each pair of corresponding lane values.
1675      *
1676      * This method is also equivalent to the expression
1677      * {@link #lanewise(VectorOperators.Binary,Vector)
1678      *    lanewise}{@code (}{@link VectorOperators#OR
1679      *    AND}{@code , v)}.
1680      *
1681      * <p>
1682      * This is not a full-service named operation like
1683      * {@link #add(Vector) add}.  A masked version of
1684      * this operation is not directly available
1685      * but may be obtained via the masked version of
1686      * {@code lanewise}.
1687      *
1688      * @param v a second input vector
1689      * @return the bitwise {@code |} of this vector and the second input vector
1690      * @see #or(byte)
1691      * @see #and(Vector)
1692      * @see #not()
1693      * @see VectorOperators#OR
1694      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1695      */
1696     @ForceInline
1697     public final ByteVector or(Vector<Byte> v) {
1698         return lanewise(OR, v);
1699     }
1700 
1701     /**
1702      * Computes the bitwise logical disjunction ({@code |})
1703      * of this vector and a scalar.
1704      *
1705      * This is a lane-wise binary operation which applies the
1706      * the primitive bitwise "or" operation ({@code |})
1707      * to each pair of corresponding lane values.
1708      *
1709      * This method is also equivalent to the expression
1710      * {@link #lanewise(VectorOperators.Binary,Vector)
1711      *    lanewise}{@code (}{@link VectorOperators#OR
1712      *    OR}{@code , e)}.
1713      *
1714      * @param e an input scalar
1715      * @return the bitwise {@code |} of this vector and scalar
1716      * @see #or(Vector)
1717      * @see VectorOperators#OR
1718      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1719      */
1720     @ForceInline
1721     public final ByteVector or(byte e) {
1722         return lanewise(OR, e);
1723     }
1724 
1725 
1726 
1727     /// UNARY METHODS
1728 
1729     /**
1730      * {@inheritDoc} <!--workaround-->
1731      */
1732     @Override
1733     @ForceInline
1734     public final
1735     ByteVector neg() {
1736         return lanewise(NEG);
1737     }
1738 
1739     /**
1740      * {@inheritDoc} <!--workaround-->
1741      */
1742     @Override
1743     @ForceInline
1744     public final
1745     ByteVector abs() {
1746         return lanewise(ABS);
1747     }
1748 
1749     // not (~)
1750     /**
1751      * Computes the bitwise logical complement ({@code ~})
1752      * of this vector.
1753      *
1754      * This is a lane-wise binary operation which applies the
1755      * the primitive bitwise "not" operation ({@code ~})
1756      * to each lane value.
1757      *
1758      * This method is also equivalent to the expression
1759      * {@link #lanewise(VectorOperators.Unary)
1760      *    lanewise}{@code (}{@link VectorOperators#NOT
1761      *    NOT}{@code )}.
1762      *
1763      * <p>
1764      * This is not a full-service named operation like
1765      * {@link #add(Vector) add}.  A masked version of
1766      * this operation is not directly available
1767      * but may be obtained via the masked version of
1768      * {@code lanewise}.
1769      *
1770      * @return the bitwise complement {@code ~} of this vector
1771      * @see #and(Vector)
1772      * @see VectorOperators#NOT
1773      * @see #lanewise(VectorOperators.Unary,VectorMask)
1774      */
1775     @ForceInline
1776     public final ByteVector not() {
1777         return lanewise(NOT);
1778     }
1779 
1780 
1781     /// COMPARISONS
1782 
1783     /**
1784      * {@inheritDoc} <!--workaround-->
1785      */
1786     @Override
1787     @ForceInline
1788     public final
1789     VectorMask<Byte> eq(Vector<Byte> v) {
1790         return compare(EQ, v);
1791     }
1792 
1793     /**
1794      * Tests if this vector is equal to an input scalar.
1795      *
1796      * This is a lane-wise binary test operation which applies
1797      * the primitive equals operation ({@code ==}) to each lane.
1798      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1799      *
1800      * @param e the input scalar
1801      * @return the result mask of testing if this vector
1802      *         is equal to {@code e}
1803      * @see #compare(VectorOperators.Comparison,byte)
1804      */
1805     @ForceInline
1806     public final
1807     VectorMask<Byte> eq(byte e) {
1808         return compare(EQ, e);
1809     }
1810 
1811     /**
1812      * {@inheritDoc} <!--workaround-->
1813      */
1814     @Override
1815     @ForceInline
1816     public final
1817     VectorMask<Byte> lt(Vector<Byte> v) {
1818         return compare(LT, v);
1819     }
1820 
1821     /**
1822      * Tests if this vector is less than an input scalar.
1823      *
1824      * This is a lane-wise binary test operation which applies
1825      * the primitive less than operation ({@code <}) to each lane.
1826      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1827      *
1828      * @param e the input scalar
1829      * @return the mask result of testing if this vector
1830      *         is less than the input scalar
1831      * @see #compare(VectorOperators.Comparison,byte)
1832      */
1833     @ForceInline
1834     public final
1835     VectorMask<Byte> lt(byte e) {
1836         return compare(LT, e);
1837     }
1838 
1839     /**
1840      * {@inheritDoc} <!--workaround-->
1841      */
1842     @Override
1843     public abstract
1844     VectorMask<Byte> test(VectorOperators.Test op);
1845 
1846     /*package-private*/
1847     @ForceInline
1848     final
1849     <M extends VectorMask<Byte>>
1850     M testTemplate(Class<M> maskType, Test op) {
1851         ByteSpecies vsp = vspecies();
1852         if (opKind(op, VO_SPECIAL)) {
1853             VectorMask<Byte> m;
1854             if (op == IS_DEFAULT) {
1855                 m = compare(EQ, (byte) 0);
1856             } else if (op == IS_NEGATIVE) {
1857                 m = compare(LT, (byte) 0);
1858             }
1859             else {
1860                 throw new AssertionError(op);
1861             }
1862             return maskType.cast(m);
1863         }
1864         int opc = opCode(op);
1865         throw new AssertionError(op);
1866     }
1867 
1868     /**
1869      * {@inheritDoc} <!--workaround-->
1870      */
1871     @Override
1872     public abstract
1873     VectorMask<Byte> test(VectorOperators.Test op,
1874                                   VectorMask<Byte> m);
1875 
1876     /*package-private*/
1877     @ForceInline
1878     final
1879     <M extends VectorMask<Byte>>
1880     M testTemplate(Class<M> maskType, Test op, M mask) {
1881         ByteSpecies vsp = vspecies();
1882         mask.check(maskType, this);
1883         if (opKind(op, VO_SPECIAL)) {
1884             VectorMask<Byte> m = mask;
1885             if (op == IS_DEFAULT) {
1886                 m = compare(EQ, (byte) 0, m);
1887             } else if (op == IS_NEGATIVE) {
1888                 m = compare(LT, (byte) 0, m);
1889             }
1890             else {
1891                 throw new AssertionError(op);
1892             }
1893             return maskType.cast(m);
1894         }
1895         int opc = opCode(op);
1896         throw new AssertionError(op);
1897     }
1898 
1899     /**
1900      * {@inheritDoc} <!--workaround-->
1901      */
1902     @Override
1903     public abstract
1904     VectorMask<Byte> compare(VectorOperators.Comparison op, Vector<Byte> v);
1905 
1906     /*package-private*/
1907     @ForceInline
1908     final
1909     <M extends VectorMask<Byte>>
1910     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v) {
1911         ByteVector that = (ByteVector) v;
1912         that.check(this);
1913         int opc = opCode(op);
1914         return VectorSupport.compare(
1915             opc, getClass(), maskType, byte.class, length(),
1916             this, that, null,
1917             (cond, v0, v1, m1) -> {
1918                 AbstractMask<Byte> m
1919                     = v0.bTest(cond, v1, (cond_, i, a, b)
1920                                -> compareWithOp(cond, a, b));
1921                 @SuppressWarnings("unchecked")
1922                 M m2 = (M) m;
1923                 return m2;
1924             });
1925     }
1926 
1927     /*package-private*/
1928     @ForceInline
1929     final
1930     <M extends VectorMask<Byte>>
1931     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v, M m) {
1932         ByteVector that = (ByteVector) v;
1933         that.check(this);
1934         m.check(maskType, this);
1935         int opc = opCode(op);
1936         return VectorSupport.compare(
1937             opc, getClass(), maskType, byte.class, length(),
1938             this, that, m,
1939             (cond, v0, v1, m1) -> {
1940                 AbstractMask<Byte> cmpM
1941                     = v0.bTest(cond, v1, (cond_, i, a, b)
1942                                -> compareWithOp(cond, a, b));
1943                 @SuppressWarnings("unchecked")
1944                 M m2 = (M) cmpM.and(m1);
1945                 return m2;
1946             });
1947     }
1948 
1949     @ForceInline
1950     private static boolean compareWithOp(int cond, byte a, byte b) {
1951         return switch (cond) {
1952             case BT_eq -> a == b;
1953             case BT_ne -> a != b;
1954             case BT_lt -> a < b;
1955             case BT_le -> a <= b;
1956             case BT_gt -> a > b;
1957             case BT_ge -> a >= b;
1958             case BT_ult -> Byte.compareUnsigned(a, b) < 0;
1959             case BT_ule -> Byte.compareUnsigned(a, b) <= 0;
1960             case BT_ugt -> Byte.compareUnsigned(a, b) > 0;
1961             case BT_uge -> Byte.compareUnsigned(a, b) >= 0;
1962             default -> throw new AssertionError();
1963         };
1964     }
1965 
1966     /**
1967      * Tests this vector by comparing it with an input scalar,
1968      * according to the given comparison operation.
1969      *
1970      * This is a lane-wise binary test operation which applies
1971      * the comparison operation to each lane.
1972      * <p>
1973      * The result is the same as
1974      * {@code compare(op, broadcast(species(), e))}.
1975      * That is, the scalar may be regarded as broadcast to
1976      * a vector of the same species, and then compared
1977      * against the original vector, using the selected
1978      * comparison operation.
1979      *
1980      * @param op the operation used to compare lane values
1981      * @param e the input scalar
1982      * @return the mask result of testing lane-wise if this vector
1983      *         compares to the input, according to the selected
1984      *         comparison operator
1985      * @see ByteVector#compare(VectorOperators.Comparison,Vector)
1986      * @see #eq(byte)
1987      * @see #lt(byte)
1988      */
1989     public abstract
1990     VectorMask<Byte> compare(Comparison op, byte e);
1991 
1992     /*package-private*/
1993     @ForceInline
1994     final
1995     <M extends VectorMask<Byte>>
1996     M compareTemplate(Class<M> maskType, Comparison op, byte e) {
1997         return compareTemplate(maskType, op, broadcast(e));
1998     }
1999 
2000     /**
2001      * Tests this vector by comparing it with an input scalar,
2002      * according to the given comparison operation,
2003      * in lanes selected by a mask.
2004      *
2005      * This is a masked lane-wise binary test operation which applies
2006      * to each pair of corresponding lane values.
2007      *
2008      * The returned result is equal to the expression
2009      * {@code compare(op,s).and(m)}.
2010      *
2011      * @param op the operation used to compare lane values
2012      * @param e the input scalar
2013      * @param m the mask controlling lane selection
2014      * @return the mask result of testing lane-wise if this vector
2015      *         compares to the input, according to the selected
2016      *         comparison operator,
2017      *         and only in the lanes selected by the mask
2018      * @see ByteVector#compare(VectorOperators.Comparison,Vector,VectorMask)
2019      */
2020     @ForceInline
2021     public final VectorMask<Byte> compare(VectorOperators.Comparison op,
2022                                                byte e,
2023                                                VectorMask<Byte> m) {
2024         return compare(op, broadcast(e), m);
2025     }
2026 
2027     /**
2028      * {@inheritDoc} <!--workaround-->
2029      */
2030     @Override
2031     public abstract
2032     VectorMask<Byte> compare(Comparison op, long e);
2033 
2034     /*package-private*/
2035     @ForceInline
2036     final
2037     <M extends VectorMask<Byte>>
2038     M compareTemplate(Class<M> maskType, Comparison op, long e) {
2039         return compareTemplate(maskType, op, broadcast(e));
2040     }
2041 
2042     /**
2043      * {@inheritDoc} <!--workaround-->
2044      */
2045     @Override
2046     @ForceInline
2047     public final
2048     VectorMask<Byte> compare(Comparison op, long e, VectorMask<Byte> m) {
2049         return compare(op, broadcast(e), m);
2050     }
2051 
2052 
2053 
2054     /**
2055      * {@inheritDoc} <!--workaround-->
2056      */
2057     @Override public abstract
2058     ByteVector blend(Vector<Byte> v, VectorMask<Byte> m);
2059 
2060     /*package-private*/
2061     @ForceInline
2062     final
2063     <M extends VectorMask<Byte>>
2064     ByteVector
2065     blendTemplate(Class<M> maskType, ByteVector v, M m) {
2066         v.check(this);
2067         return VectorSupport.blend(
2068             getClass(), maskType, byte.class, length(),
2069             this, v, m,
2070             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
2071     }
2072 
2073     /**
2074      * {@inheritDoc} <!--workaround-->
2075      */
2076     @Override public abstract ByteVector addIndex(int scale);
2077 
2078     /*package-private*/
2079     @ForceInline
2080     final ByteVector addIndexTemplate(int scale) {
2081         ByteSpecies vsp = vspecies();
2082         // make sure VLENGTH*scale doesn't overflow:
2083         vsp.checkScale(scale);
2084         return VectorSupport.indexVector(
2085             getClass(), byte.class, length(),
2086             this, scale, vsp,
2087             (v, scale_, s)
2088             -> {
2089                 // If the platform doesn't support an INDEX
2090                 // instruction directly, load IOTA from memory
2091                 // and multiply.
2092                 ByteVector iota = s.iota();
2093                 byte sc = (byte) scale_;
2094                 return v.add(sc == 1 ? iota : iota.mul(sc));
2095             });
2096     }
2097 
2098     /**
2099      * Replaces selected lanes of this vector with
2100      * a scalar value
2101      * under the control of a mask.
2102      *
2103      * This is a masked lane-wise binary operation which
2104      * selects each lane value from one or the other input.
2105      *
2106      * The returned result is equal to the expression
2107      * {@code blend(broadcast(e),m)}.
2108      *
2109      * @param e the input scalar, containing the replacement lane value
2110      * @param m the mask controlling lane selection of the scalar
2111      * @return the result of blending the lane elements of this vector with
2112      *         the scalar value
2113      */
2114     @ForceInline
2115     public final ByteVector blend(byte e,
2116                                             VectorMask<Byte> m) {
2117         return blend(broadcast(e), m);
2118     }
2119 
2120     /**
2121      * Replaces selected lanes of this vector with
2122      * a scalar value
2123      * under the control of a mask.
2124      *
2125      * This is a masked lane-wise binary operation which
2126      * selects each lane value from one or the other input.
2127      *
2128      * The returned result is equal to the expression
2129      * {@code blend(broadcast(e),m)}.
2130      *
2131      * @param e the input scalar, containing the replacement lane value
2132      * @param m the mask controlling lane selection of the scalar
2133      * @return the result of blending the lane elements of this vector with
2134      *         the scalar value
2135      */
2136     @ForceInline
2137     public final ByteVector blend(long e,
2138                                             VectorMask<Byte> m) {
2139         return blend(broadcast(e), m);
2140     }
2141 
2142     /**
2143      * {@inheritDoc} <!--workaround-->
2144      */
2145     @Override
2146     public abstract
2147     ByteVector slice(int origin, Vector<Byte> v1);
2148 
2149     /*package-private*/
2150     final
2151     @ForceInline
2152     ByteVector sliceTemplate(int origin, Vector<Byte> v1) {
2153         ByteVector that = (ByteVector) v1;
2154         that.check(this);
2155         Objects.checkIndex(origin, length() + 1);
2156         VectorShuffle<Byte> iota = iotaShuffle();
2157         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
2158         iota = iotaShuffle(origin, 1, true);
2159         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
2160     }
2161 
2162     /**
2163      * {@inheritDoc} <!--workaround-->
2164      */
2165     @Override
2166     @ForceInline
2167     public final
2168     ByteVector slice(int origin,
2169                                Vector<Byte> w,
2170                                VectorMask<Byte> m) {
2171         return broadcast(0).blend(slice(origin, w), m);
2172     }
2173 
2174     /**
2175      * {@inheritDoc} <!--workaround-->
2176      */
2177     @Override
2178     public abstract
2179     ByteVector slice(int origin);
2180 
2181     /*package-private*/
2182     final
2183     @ForceInline
2184     ByteVector sliceTemplate(int origin) {
2185         Objects.checkIndex(origin, length() + 1);
2186         VectorShuffle<Byte> iota = iotaShuffle();
2187         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
2188         iota = iotaShuffle(origin, 1, true);
2189         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2190     }
2191 
2192     /**
2193      * {@inheritDoc} <!--workaround-->
2194      */
2195     @Override
2196     public abstract
2197     ByteVector unslice(int origin, Vector<Byte> w, int part);
2198 
2199     /*package-private*/
2200     final
2201     @ForceInline
2202     ByteVector
2203     unsliceTemplate(int origin, Vector<Byte> w, int part) {
2204         ByteVector that = (ByteVector) w;
2205         that.check(this);
2206         Objects.checkIndex(origin, length() + 1);
2207         VectorShuffle<Byte> iota = iotaShuffle();
2208         VectorMask<Byte> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
2209                                                                   (broadcast((byte)(origin))));
2210         iota = iotaShuffle(-origin, 1, true);
2211         return that.blend(this.rearrange(iota), blendMask);
2212     }
2213 
2214     /*package-private*/
2215     final
2216     @ForceInline
2217     <M extends VectorMask<Byte>>
2218     ByteVector
2219     unsliceTemplate(Class<M> maskType, int origin, Vector<Byte> w, int part, M m) {
2220         ByteVector that = (ByteVector) w;
2221         that.check(this);
2222         ByteVector slice = that.sliceTemplate(origin, that);
2223         slice = slice.blendTemplate(maskType, this, m);
2224         return slice.unsliceTemplate(origin, w, part);
2225     }
2226 
2227     /**
2228      * {@inheritDoc} <!--workaround-->
2229      */
2230     @Override
2231     public abstract
2232     ByteVector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m);
2233 
2234     /**
2235      * {@inheritDoc} <!--workaround-->
2236      */
2237     @Override
2238     public abstract
2239     ByteVector unslice(int origin);
2240 
2241     /*package-private*/
2242     final
2243     @ForceInline
2244     ByteVector
2245     unsliceTemplate(int origin) {
2246         Objects.checkIndex(origin, length() + 1);
2247         VectorShuffle<Byte> iota = iotaShuffle();
2248         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.GE,
2249                                                                   (broadcast((byte)(origin))));
2250         iota = iotaShuffle(-origin, 1, true);
2251         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2252     }
2253 
2254     private ArrayIndexOutOfBoundsException
2255     wrongPartForSlice(int part) {
2256         String msg = String.format("bad part number %d for slice operation",
2257                                    part);
2258         return new ArrayIndexOutOfBoundsException(msg);
2259     }
2260 
2261     /**
2262      * {@inheritDoc} <!--workaround-->
2263      */
2264     @Override
2265     public abstract
2266     ByteVector rearrange(VectorShuffle<Byte> m);
2267 
2268     /*package-private*/
2269     @ForceInline
2270     final
2271     <S extends VectorShuffle<Byte>>
2272     ByteVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2273         shuffle.checkIndexes();
2274         return VectorSupport.rearrangeOp(
2275             getClass(), shuffletype, null, byte.class, length(),
2276             this, shuffle, null,
2277             (v1, s_, m_) -> v1.uOp((i, a) -> {
2278                 int ei = s_.laneSource(i);
2279                 return v1.lane(ei);
2280             }));
2281     }
2282 
2283     /**
2284      * {@inheritDoc} <!--workaround-->
2285      */
2286     @Override
2287     public abstract
2288     ByteVector rearrange(VectorShuffle<Byte> s,
2289                                    VectorMask<Byte> m);
2290 
2291     /*package-private*/
2292     @ForceInline
2293     final
2294     <S extends VectorShuffle<Byte>, M extends VectorMask<Byte>>
2295     ByteVector rearrangeTemplate(Class<S> shuffletype,
2296                                            Class<M> masktype,
2297                                            S shuffle,
2298                                            M m) {
2299 
2300         m.check(masktype, this);
2301         VectorMask<Byte> valid = shuffle.laneIsValid();
2302         if (m.andNot(valid).anyTrue()) {
2303             shuffle.checkIndexes();
2304             throw new AssertionError();
2305         }
2306         return VectorSupport.rearrangeOp(
2307                    getClass(), shuffletype, masktype, byte.class, length(),
2308                    this, shuffle, m,
2309                    (v1, s_, m_) -> v1.uOp((i, a) -> {
2310                         int ei = s_.laneSource(i);
2311                         return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
2312                    }));
2313     }
2314 
2315     /**
2316      * {@inheritDoc} <!--workaround-->
2317      */
2318     @Override
2319     public abstract
2320     ByteVector rearrange(VectorShuffle<Byte> s,
2321                                    Vector<Byte> v);
2322 
2323     /*package-private*/
2324     @ForceInline
2325     final
2326     <S extends VectorShuffle<Byte>>
2327     ByteVector rearrangeTemplate(Class<S> shuffletype,
2328                                            S shuffle,
2329                                            ByteVector v) {
2330         VectorMask<Byte> valid = shuffle.laneIsValid();
2331         @SuppressWarnings("unchecked")
2332         S ws = (S) shuffle.wrapIndexes();
2333         ByteVector r0 =
2334             VectorSupport.rearrangeOp(
2335                 getClass(), shuffletype, null, byte.class, length(),
2336                 this, ws, null,
2337                 (v0, s_, m_) -> v0.uOp((i, a) -> {
2338                     int ei = s_.laneSource(i);
2339                     return v0.lane(ei);
2340                 }));
2341         ByteVector r1 =
2342             VectorSupport.rearrangeOp(
2343                 getClass(), shuffletype, null, byte.class, length(),
2344                 v, ws, null,
2345                 (v1, s_, m_) -> v1.uOp((i, a) -> {
2346                     int ei = s_.laneSource(i);
2347                     return v1.lane(ei);
2348                 }));
2349         return r1.blend(r0, valid);
2350     }
2351 
2352     @ForceInline
2353     private final
2354     VectorShuffle<Byte> toShuffle0(ByteSpecies dsp) {
2355         byte[] a = toArray();
2356         int[] sa = new int[a.length];
2357         for (int i = 0; i < a.length; i++) {
2358             sa[i] = (int) a[i];
2359         }
2360         return VectorShuffle.fromArray(dsp, sa, 0);
2361     }
2362 
2363     /*package-private*/
2364     @ForceInline
2365     final
2366     VectorShuffle<Byte> toShuffleTemplate(Class<?> shuffleType) {
2367         ByteSpecies vsp = vspecies();
2368         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2369                                      getClass(), byte.class, length(),
2370                                      shuffleType, byte.class, length(),
2371                                      this, vsp,
2372                                      ByteVector::toShuffle0);
2373     }
2374 
2375     /**
2376      * {@inheritDoc} <!--workaround-->
2377      */
2378     @Override
2379     public abstract
2380     ByteVector selectFrom(Vector<Byte> v);
2381 
2382     /*package-private*/
2383     @ForceInline
2384     final ByteVector selectFromTemplate(ByteVector v) {
2385         return v.rearrange(this.toShuffle());
2386     }
2387 
2388     /**
2389      * {@inheritDoc} <!--workaround-->
2390      */
2391     @Override
2392     public abstract
2393     ByteVector selectFrom(Vector<Byte> s, VectorMask<Byte> m);
2394 
2395     /*package-private*/
2396     @ForceInline
2397     final ByteVector selectFromTemplate(ByteVector v,
2398                                                   AbstractMask<Byte> m) {
2399         return v.rearrange(this.toShuffle(), m);
2400     }
2401 
2402     /// Ternary operations
2403 
2404     /**
2405      * Blends together the bits of two vectors under
2406      * the control of a third, which supplies mask bits.
2407      *
2408      * This is a lane-wise ternary operation which performs
2409      * a bitwise blending operation {@code (a&~c)|(b&c)}
2410      * to each lane.
2411      *
2412      * This method is also equivalent to the expression
2413      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2414      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2415      *    BITWISE_BLEND}{@code , bits, mask)}.
2416      *
2417      * @param bits input bits to blend into the current vector
2418      * @param mask a bitwise mask to enable blending of the input bits
2419      * @return the bitwise blend of the given bits into the current vector,
2420      *         under control of the bitwise mask
2421      * @see #bitwiseBlend(byte,byte)
2422      * @see #bitwiseBlend(byte,Vector)
2423      * @see #bitwiseBlend(Vector,byte)
2424      * @see VectorOperators#BITWISE_BLEND
2425      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2426      */
2427     @ForceInline
2428     public final
2429     ByteVector bitwiseBlend(Vector<Byte> bits, Vector<Byte> mask) {
2430         return lanewise(BITWISE_BLEND, bits, mask);
2431     }
2432 
2433     /**
2434      * Blends together the bits of a vector and a scalar under
2435      * the control of another scalar, which supplies mask bits.
2436      *
2437      * This is a lane-wise ternary operation which performs
2438      * a bitwise blending operation {@code (a&~c)|(b&c)}
2439      * to each lane.
2440      *
2441      * This method is also equivalent to the expression
2442      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2443      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2444      *    BITWISE_BLEND}{@code , bits, mask)}.
2445      *
2446      * @param bits input bits to blend into the current vector
2447      * @param mask a bitwise mask to enable blending of the input bits
2448      * @return the bitwise blend of the given bits into the current vector,
2449      *         under control of the bitwise mask
2450      * @see #bitwiseBlend(Vector,Vector)
2451      * @see VectorOperators#BITWISE_BLEND
2452      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
2453      */
2454     @ForceInline
2455     public final
2456     ByteVector bitwiseBlend(byte bits, byte mask) {
2457         return lanewise(BITWISE_BLEND, bits, mask);
2458     }
2459 
2460     /**
2461      * Blends together the bits of a vector and a scalar under
2462      * the control of another vector, which supplies mask bits.
2463      *
2464      * This is a lane-wise ternary operation which performs
2465      * a bitwise blending operation {@code (a&~c)|(b&c)}
2466      * to each lane.
2467      *
2468      * This method is also equivalent to the expression
2469      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2470      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2471      *    BITWISE_BLEND}{@code , bits, mask)}.
2472      *
2473      * @param bits input bits to blend into the current vector
2474      * @param mask a bitwise mask to enable blending of the input bits
2475      * @return the bitwise blend of the given bits into the current vector,
2476      *         under control of the bitwise mask
2477      * @see #bitwiseBlend(Vector,Vector)
2478      * @see VectorOperators#BITWISE_BLEND
2479      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
2480      */
2481     @ForceInline
2482     public final
2483     ByteVector bitwiseBlend(byte bits, Vector<Byte> mask) {
2484         return lanewise(BITWISE_BLEND, bits, mask);
2485     }
2486 
2487     /**
2488      * Blends together the bits of two vectors under
2489      * the control of a scalar, which supplies mask bits.
2490      *
2491      * This is a lane-wise ternary operation which performs
2492      * a bitwise blending operation {@code (a&~c)|(b&c)}
2493      * to each lane.
2494      *
2495      * This method is also equivalent to the expression
2496      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2497      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2498      *    BITWISE_BLEND}{@code , bits, mask)}.
2499      *
2500      * @param bits input bits to blend into the current vector
2501      * @param mask a bitwise mask to enable blending of the input bits
2502      * @return the bitwise blend of the given bits into the current vector,
2503      *         under control of the bitwise mask
2504      * @see #bitwiseBlend(Vector,Vector)
2505      * @see VectorOperators#BITWISE_BLEND
2506      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
2507      */
2508     @ForceInline
2509     public final
2510     ByteVector bitwiseBlend(Vector<Byte> bits, byte mask) {
2511         return lanewise(BITWISE_BLEND, bits, mask);
2512     }
2513 
2514 
2515     // Type specific horizontal reductions
2516 
2517     /**
2518      * Returns a value accumulated from all the lanes of this vector.
2519      *
2520      * This is an associative cross-lane reduction operation which
2521      * applies the specified operation to all the lane elements.
2522      * <p>
2523      * A few reduction operations do not support arbitrary reordering
2524      * of their operands, yet are included here because of their
2525      * usefulness.
2526      * <ul>
2527      * <li>
2528      * In the case of {@code FIRST_NONZERO}, the reduction returns
2529      * the value from the lowest-numbered non-zero lane.
2530      * <li>
2531      * All other reduction operations are fully commutative and
2532      * associative.  The implementation can choose any order of
2533      * processing, yet it will always produce the same result.
2534      * </ul>
2535      *
2536      * @param op the operation used to combine lane values
2537      * @return the accumulated result
2538      * @throws UnsupportedOperationException if this vector does
2539      *         not support the requested operation
2540      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2541      * @see #add(Vector)
2542      * @see #mul(Vector)
2543      * @see #min(Vector)
2544      * @see #max(Vector)
2545      * @see #and(Vector)
2546      * @see #or(Vector)
2547      * @see VectorOperators#XOR
2548      * @see VectorOperators#FIRST_NONZERO
2549      */
2550     public abstract byte reduceLanes(VectorOperators.Associative op);
2551 
2552     /**
2553      * Returns a value accumulated from selected lanes of this vector,
2554      * controlled by a mask.
2555      *
2556      * This is an associative cross-lane reduction operation which
2557      * applies the specified operation to the selected lane elements.
2558      * <p>
2559      * If no elements are selected, an operation-specific identity
2560      * value is returned.
2561      * <ul>
2562      * <li>
2563      * If the operation is
2564      *  {@code ADD}, {@code XOR}, {@code OR},
2565      * or {@code FIRST_NONZERO},
2566      * then the identity value is zero, the default {@code byte} value.
2567      * <li>
2568      * If the operation is {@code MUL},
2569      * then the identity value is one.
2570      * <li>
2571      * If the operation is {@code AND},
2572      * then the identity value is minus one (all bits set).
2573      * <li>
2574      * If the operation is {@code MAX},
2575      * then the identity value is {@code Byte.MIN_VALUE}.
2576      * <li>
2577      * If the operation is {@code MIN},
2578      * then the identity value is {@code Byte.MAX_VALUE}.
2579      * </ul>
2580      * <p>
2581      * A few reduction operations do not support arbitrary reordering
2582      * of their operands, yet are included here because of their
2583      * usefulness.
2584      * <ul>
2585      * <li>
2586      * In the case of {@code FIRST_NONZERO}, the reduction returns
2587      * the value from the lowest-numbered non-zero lane.
2588      * <li>
2589      * All other reduction operations are fully commutative and
2590      * associative.  The implementation can choose any order of
2591      * processing, yet it will always produce the same result.
2592      * </ul>
2593      *
2594      * @param op the operation used to combine lane values
2595      * @param m the mask controlling lane selection
2596      * @return the reduced result accumulated from the selected lane values
2597      * @throws UnsupportedOperationException if this vector does
2598      *         not support the requested operation
2599      * @see #reduceLanes(VectorOperators.Associative)
2600      */
2601     public abstract byte reduceLanes(VectorOperators.Associative op,
2602                                        VectorMask<Byte> m);
2603 
2604     /*package-private*/
2605     @ForceInline
2606     final
2607     byte reduceLanesTemplate(VectorOperators.Associative op,
2608                                Class<? extends VectorMask<Byte>> maskClass,
2609                                VectorMask<Byte> m) {
2610         m.check(maskClass, this);
2611         if (op == FIRST_NONZERO) {
2612             // FIXME:  The JIT should handle this.
2613             ByteVector v = broadcast((byte) 0).blend(this, m);
2614             return v.reduceLanesTemplate(op);
2615         }
2616         int opc = opCode(op);
2617         return fromBits(VectorSupport.reductionCoerced(
2618             opc, getClass(), maskClass, byte.class, length(),
2619             this, m,
2620             REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations)));
2621     }
2622 
2623     /*package-private*/
2624     @ForceInline
2625     final
2626     byte reduceLanesTemplate(VectorOperators.Associative op) {
2627         if (op == FIRST_NONZERO) {
2628             // FIXME:  The JIT should handle this.
2629             VectorMask<Byte> thisNZ
2630                 = this.viewAsIntegralLanes().compare(NE, (byte) 0);
2631             int ft = thisNZ.firstTrue();
2632             return ft < length() ? this.lane(ft) : (byte) 0;
2633         }
2634         int opc = opCode(op);
2635         return fromBits(VectorSupport.reductionCoerced(
2636             opc, getClass(), null, byte.class, length(),
2637             this, null,
2638             REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations)));
2639     }
2640 
2641     private static final
2642     ImplCache<Associative, ReductionOperation<ByteVector, VectorMask<Byte>>>
2643         REDUCE_IMPL = new ImplCache<>(Associative.class, ByteVector.class);
2644 
2645     private static ReductionOperation<ByteVector, VectorMask<Byte>> reductionOperations(int opc_) {
2646         switch (opc_) {
2647             case VECTOR_OP_ADD: return (v, m) ->
2648                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a + b)));
2649             case VECTOR_OP_MUL: return (v, m) ->
2650                     toBits(v.rOp((byte)1, m, (i, a, b) -> (byte)(a * b)));
2651             case VECTOR_OP_MIN: return (v, m) ->
2652                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (byte) Math.min(a, b)));
2653             case VECTOR_OP_MAX: return (v, m) ->
2654                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (byte) Math.max(a, b)));
2655             case VECTOR_OP_AND: return (v, m) ->
2656                     toBits(v.rOp((byte)-1, m, (i, a, b) -> (byte)(a & b)));
2657             case VECTOR_OP_OR: return (v, m) ->
2658                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a | b)));
2659             case VECTOR_OP_XOR: return (v, m) ->
2660                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a ^ b)));
2661             default: return null;
2662         }
2663     }
2664 
2665     private static final byte MIN_OR_INF = Byte.MIN_VALUE;
2666     private static final byte MAX_OR_INF = Byte.MAX_VALUE;
2667 
2668     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2669     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2670                                                      VectorMask<Byte> m);
2671 
2672     // Type specific accessors
2673 
2674     /**
2675      * Gets the lane element at lane index {@code i}
2676      *
2677      * @param i the lane index
2678      * @return the lane element at lane index {@code i}
2679      * @throws IllegalArgumentException if the index is is out of range
2680      * ({@code < 0 || >= length()})
2681      */
2682     public abstract byte lane(int i);
2683 
2684     /**
2685      * Replaces the lane element of this vector at lane index {@code i} with
2686      * value {@code e}.
2687      *
2688      * This is a cross-lane operation and behaves as if it returns the result
2689      * of blending this vector with an input vector that is the result of
2690      * broadcasting {@code e} and a mask that has only one lane set at lane
2691      * index {@code i}.
2692      *
2693      * @param i the lane index of the lane element to be replaced
2694      * @param e the value to be placed
2695      * @return the result of replacing the lane element of this vector at lane
2696      * index {@code i} with value {@code e}.
2697      * @throws IllegalArgumentException if the index is is out of range
2698      * ({@code < 0 || >= length()})
2699      */
2700     public abstract ByteVector withLane(int i, byte e);
2701 
2702     // Memory load operations
2703 
2704     /**
2705      * Returns an array of type {@code byte[]}
2706      * containing all the lane values.
2707      * The array length is the same as the vector length.
2708      * The array elements are stored in lane order.
2709      * <p>
2710      * This method behaves as if it stores
2711      * this vector into an allocated array
2712      * (using {@link #intoArray(byte[], int) intoArray})
2713      * and returns the array as follows:
2714      * <pre>{@code
2715      *   byte[] a = new byte[this.length()];
2716      *   this.intoArray(a, 0);
2717      *   return a;
2718      * }</pre>
2719      *
2720      * @return an array containing the lane values of this vector
2721      */
2722     @ForceInline
2723     @Override
2724     public final byte[] toArray() {
2725         byte[] a = new byte[vspecies().laneCount()];
2726         intoArray(a, 0);
2727         return a;
2728     }
2729 
2730     /** {@inheritDoc} <!--workaround-->
2731      * @implNote
2732      * When this method is used on used on vectors
2733      * of type {@code ByteVector},
2734      * there will be no loss of precision or range,
2735      * and so no {@code UnsupportedOperationException} will
2736      * be thrown.
2737      */
2738     @ForceInline
2739     @Override
2740     public final int[] toIntArray() {
2741         byte[] a = toArray();
2742         int[] res = new int[a.length];
2743         for (int i = 0; i < a.length; i++) {
2744             byte e = a[i];
2745             res[i] = (int) ByteSpecies.toIntegralChecked(e, true);
2746         }
2747         return res;
2748     }
2749 
2750     /** {@inheritDoc} <!--workaround-->
2751      * @implNote
2752      * When this method is used on used on vectors
2753      * of type {@code ByteVector},
2754      * there will be no loss of precision or range,
2755      * and so no {@code UnsupportedOperationException} will
2756      * be thrown.
2757      */
2758     @ForceInline
2759     @Override
2760     public final long[] toLongArray() {
2761         byte[] a = toArray();
2762         long[] res = new long[a.length];
2763         for (int i = 0; i < a.length; i++) {
2764             byte e = a[i];
2765             res[i] = ByteSpecies.toIntegralChecked(e, false);
2766         }
2767         return res;
2768     }
2769 
2770     /** {@inheritDoc} <!--workaround-->
2771      * @implNote
2772      * When this method is used on used on vectors
2773      * of type {@code ByteVector},
2774      * there will be no loss of precision.
2775      */
2776     @ForceInline
2777     @Override
2778     public final double[] toDoubleArray() {
2779         byte[] a = toArray();
2780         double[] res = new double[a.length];
2781         for (int i = 0; i < a.length; i++) {
2782             res[i] = (double) a[i];
2783         }
2784         return res;
2785     }
2786 
2787     /**
2788      * Loads a vector from a byte array starting at an offset.
2789      * Bytes are composed into primitive lane elements according
2790      * to the specified byte order.
2791      * The vector is arranged into lanes according to
2792      * <a href="Vector.html#lane-order">memory ordering</a>.
2793      * <p>
2794      * This method behaves as if it returns the result of calling
2795      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2796      * fromByteBuffer()} as follows:
2797      * <pre>{@code
2798      * var bb = ByteBuffer.wrap(a);
2799      * var m = species.maskAll(true);
2800      * return fromByteBuffer(species, bb, offset, bo, m);
2801      * }</pre>
2802      *
2803      * @param species species of desired vector
2804      * @param a the byte array
2805      * @param offset the offset into the array
2806      * @param bo the intended byte order
2807      * @return a vector loaded from a byte array
2808      * @throws IndexOutOfBoundsException
2809      *         if {@code offset+N*ESIZE < 0}
2810      *         or {@code offset+(N+1)*ESIZE > a.length}
2811      *         for any lane {@code N} in the vector
2812      */
2813     @ForceInline
2814     public static
2815     ByteVector fromByteArray(VectorSpecies<Byte> species,
2816                                        byte[] a, int offset,
2817                                        ByteOrder bo) {
2818         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
2819         ByteSpecies vsp = (ByteSpecies) species;
2820         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
2821     }
2822 
2823     /**
2824      * Loads a vector from a byte array starting at an offset
2825      * and using a mask.
2826      * Lanes where the mask is unset are filled with the default
2827      * value of {@code byte} (zero).
2828      * Bytes are composed into primitive lane elements according
2829      * to the specified byte order.
2830      * The vector is arranged into lanes according to
2831      * <a href="Vector.html#lane-order">memory ordering</a>.
2832      * <p>
2833      * This method behaves as if it returns the result of calling
2834      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2835      * fromByteBuffer()} as follows:
2836      * <pre>{@code
2837      * var bb = ByteBuffer.wrap(a);
2838      * return fromByteBuffer(species, bb, offset, bo, m);
2839      * }</pre>
2840      *
2841      * @param species species of desired vector
2842      * @param a the byte array
2843      * @param offset the offset into the array
2844      * @param bo the intended byte order
2845      * @param m the mask controlling lane selection
2846      * @return a vector loaded from a byte array
2847      * @throws IndexOutOfBoundsException
2848      *         if {@code offset+N*ESIZE < 0}
2849      *         or {@code offset+(N+1)*ESIZE > a.length}
2850      *         for any lane {@code N} in the vector
2851      *         where the mask is set
2852      */
2853     @ForceInline
2854     public static
2855     ByteVector fromByteArray(VectorSpecies<Byte> species,
2856                                        byte[] a, int offset,
2857                                        ByteOrder bo,
2858                                        VectorMask<Byte> m) {
2859         ByteSpecies vsp = (ByteSpecies) species;
2860         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2861             return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
2862         }
2863 
2864         // FIXME: optimize
2865         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2866         ByteBuffer wb = wrapper(a, bo);
2867         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
2868                    (wb_, o, i)  -> wb_.get(o + i * 1));
2869     }
2870 
2871     /**
2872      * Loads a vector from an array of type {@code byte[]}
2873      * starting at an offset.
2874      * For each vector lane, where {@code N} is the vector lane index, the
2875      * array element at index {@code offset + N} is placed into the
2876      * resulting vector at lane index {@code N}.
2877      *
2878      * @param species species of desired vector
2879      * @param a the array
2880      * @param offset the offset into the array
2881      * @return the vector loaded from an array
2882      * @throws IndexOutOfBoundsException
2883      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2884      *         for any lane {@code N} in the vector
2885      */
2886     @ForceInline
2887     public static
2888     ByteVector fromArray(VectorSpecies<Byte> species,
2889                                    byte[] a, int offset) {
2890         offset = checkFromIndexSize(offset, species.length(), a.length);
2891         ByteSpecies vsp = (ByteSpecies) species;
2892         return vsp.dummyVector().fromArray0(a, offset);
2893     }
2894 
2895     /**
2896      * Loads a vector from an array of type {@code byte[]}
2897      * starting at an offset and using a mask.
2898      * Lanes where the mask is unset are filled with the default
2899      * value of {@code byte} (zero).
2900      * For each vector lane, where {@code N} is the vector lane index,
2901      * if the mask lane at index {@code N} is set then the array element at
2902      * index {@code offset + N} is placed into the resulting vector at lane index
2903      * {@code N}, otherwise the default element value is placed into the
2904      * resulting vector at lane index {@code N}.
2905      *
2906      * @param species species of desired vector
2907      * @param a the array
2908      * @param offset the offset into the array
2909      * @param m the mask controlling lane selection
2910      * @return the vector loaded from an array
2911      * @throws IndexOutOfBoundsException
2912      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2913      *         for any lane {@code N} in the vector
2914      *         where the mask is set
2915      */
2916     @ForceInline
2917     public static
2918     ByteVector fromArray(VectorSpecies<Byte> species,
2919                                    byte[] a, int offset,
2920                                    VectorMask<Byte> m) {
2921         ByteSpecies vsp = (ByteSpecies) species;
2922         if (offset >= 0 && offset <= (a.length - species.length())) {
2923             return vsp.dummyVector().fromArray0(a, offset, m);
2924         }
2925 
2926         // FIXME: optimize
2927         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2928         return vsp.vOp(m, i -> a[offset + i]);
2929     }
2930 
2931     /**
2932      * Gathers a new vector composed of elements from an array of type
2933      * {@code byte[]},
2934      * using indexes obtained by adding a fixed {@code offset} to a
2935      * series of secondary offsets from an <em>index map</em>.
2936      * The index map is a contiguous sequence of {@code VLENGTH}
2937      * elements in a second array of {@code int}s, starting at a given
2938      * {@code mapOffset}.
2939      * <p>
2940      * For each vector lane, where {@code N} is the vector lane index,
2941      * the lane is loaded from the array
2942      * element {@code a[f(N)]}, where {@code f(N)} is the
2943      * index mapping expression
2944      * {@code offset + indexMap[mapOffset + N]]}.
2945      *
2946      * @param species species of desired vector
2947      * @param a the array
2948      * @param offset the offset into the array, may be negative if relative
2949      * indexes in the index map compensate to produce a value within the
2950      * array bounds
2951      * @param indexMap the index map
2952      * @param mapOffset the offset into the index map
2953      * @return the vector loaded from the indexed elements of the array
2954      * @throws IndexOutOfBoundsException
2955      *         if {@code mapOffset+N < 0}
2956      *         or if {@code mapOffset+N >= indexMap.length},
2957      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2958      *         is an invalid index into {@code a},
2959      *         for any lane {@code N} in the vector
2960      * @see ByteVector#toIntArray()
2961      */
2962     @ForceInline
2963     public static
2964     ByteVector fromArray(VectorSpecies<Byte> species,
2965                                    byte[] a, int offset,
2966                                    int[] indexMap, int mapOffset) {
2967         ByteSpecies vsp = (ByteSpecies) species;
2968         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
2969     }
2970 
2971     /**
2972      * Gathers a new vector composed of elements from an array of type
2973      * {@code byte[]},
2974      * under the control of a mask, and
2975      * using indexes obtained by adding a fixed {@code offset} to a
2976      * series of secondary offsets from an <em>index map</em>.
2977      * The index map is a contiguous sequence of {@code VLENGTH}
2978      * elements in a second array of {@code int}s, starting at a given
2979      * {@code mapOffset}.
2980      * <p>
2981      * For each vector lane, where {@code N} is the vector lane index,
2982      * if the lane is set in the mask,
2983      * the lane is loaded from the array
2984      * element {@code a[f(N)]}, where {@code f(N)} is the
2985      * index mapping expression
2986      * {@code offset + indexMap[mapOffset + N]]}.
2987      * Unset lanes in the resulting vector are set to zero.
2988      *
2989      * @param species species of desired vector
2990      * @param a the array
2991      * @param offset the offset into the array, may be negative if relative
2992      * indexes in the index map compensate to produce a value within the
2993      * array bounds
2994      * @param indexMap the index map
2995      * @param mapOffset the offset into the index map
2996      * @param m the mask controlling lane selection
2997      * @return the vector loaded from the indexed elements of the array
2998      * @throws IndexOutOfBoundsException
2999      *         if {@code mapOffset+N < 0}
3000      *         or if {@code mapOffset+N >= indexMap.length},
3001      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3002      *         is an invalid index into {@code a},
3003      *         for any lane {@code N} in the vector
3004      *         where the mask is set
3005      * @see ByteVector#toIntArray()
3006      */
3007     @ForceInline
3008     public static
3009     ByteVector fromArray(VectorSpecies<Byte> species,
3010                                    byte[] a, int offset,
3011                                    int[] indexMap, int mapOffset,
3012                                    VectorMask<Byte> m) {
3013         ByteSpecies vsp = (ByteSpecies) species;
3014         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
3015     }
3016 
3017 
3018     /**
3019      * Loads a vector from an array of type {@code boolean[]}
3020      * starting at an offset.
3021      * For each vector lane, where {@code N} is the vector lane index, the
3022      * array element at index {@code offset + N}
3023      * is first converted to a {@code byte} value and then
3024      * placed into the resulting vector at lane index {@code N}.
3025      * <p>
3026      * A {@code boolean} value is converted to a {@code byte} value by applying the
3027      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
3028      *
3029      * @param species species of desired vector
3030      * @param a the array
3031      * @param offset the offset into the array
3032      * @return the vector loaded from an array
3033      * @throws IndexOutOfBoundsException
3034      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3035      *         for any lane {@code N} in the vector
3036      */
3037     @ForceInline
3038     public static
3039     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3040                                           boolean[] a, int offset) {
3041         offset = checkFromIndexSize(offset, species.length(), a.length);
3042         ByteSpecies vsp = (ByteSpecies) species;
3043         return vsp.dummyVector().fromBooleanArray0(a, offset);
3044     }
3045 
3046     /**
3047      * Loads a vector from an array of type {@code boolean[]}
3048      * starting at an offset and using a mask.
3049      * Lanes where the mask is unset are filled with the default
3050      * value of {@code byte} (zero).
3051      * For each vector lane, where {@code N} is the vector lane index,
3052      * if the mask lane at index {@code N} is set then the array element at
3053      * index {@code offset + N}
3054      * is first converted to a {@code byte} value and then
3055      * placed into the resulting vector at lane index
3056      * {@code N}, otherwise the default element value is placed into the
3057      * resulting vector at lane index {@code N}.
3058      * <p>
3059      * A {@code boolean} value is converted to a {@code byte} value by applying the
3060      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
3061      *
3062      * @param species species of desired vector
3063      * @param a the array
3064      * @param offset the offset into the array
3065      * @param m the mask controlling lane selection
3066      * @return the vector loaded from an array
3067      * @throws IndexOutOfBoundsException
3068      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3069      *         for any lane {@code N} in the vector
3070      *         where the mask is set
3071      */
3072     @ForceInline
3073     public static
3074     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3075                                           boolean[] a, int offset,
3076                                           VectorMask<Byte> m) {
3077         ByteSpecies vsp = (ByteSpecies) species;
3078         if (offset >= 0 && offset <= (a.length - species.length())) {
3079             ByteVector zero = vsp.zero();
3080             return vsp.dummyVector().fromBooleanArray0(a, offset, m);
3081         }
3082 
3083         // FIXME: optimize
3084         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3085         return vsp.vOp(m, i -> (byte) (a[offset + i] ? 1 : 0));
3086     }
3087 
3088     /**
3089      * Gathers a new vector composed of elements from an array of type
3090      * {@code boolean[]},
3091      * using indexes obtained by adding a fixed {@code offset} to a
3092      * series of secondary offsets from an <em>index map</em>.
3093      * The index map is a contiguous sequence of {@code VLENGTH}
3094      * elements in a second array of {@code int}s, starting at a given
3095      * {@code mapOffset}.
3096      * <p>
3097      * For each vector lane, where {@code N} is the vector lane index,
3098      * the lane is loaded from the expression
3099      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
3100      * index mapping expression
3101      * {@code offset + indexMap[mapOffset + N]]}.
3102      *
3103      * @param species species of desired vector
3104      * @param a the array
3105      * @param offset the offset into the array, may be negative if relative
3106      * indexes in the index map compensate to produce a value within the
3107      * array bounds
3108      * @param indexMap the index map
3109      * @param mapOffset the offset into the index map
3110      * @return the vector loaded from the indexed elements of the array
3111      * @throws IndexOutOfBoundsException
3112      *         if {@code mapOffset+N < 0}
3113      *         or if {@code mapOffset+N >= indexMap.length},
3114      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3115      *         is an invalid index into {@code a},
3116      *         for any lane {@code N} in the vector
3117      * @see ByteVector#toIntArray()
3118      */
3119     @ForceInline
3120     public static
3121     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3122                                           boolean[] a, int offset,
3123                                           int[] indexMap, int mapOffset) {
3124         // FIXME: optimize
3125         ByteSpecies vsp = (ByteSpecies) species;
3126         return vsp.vOp(n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
3127     }
3128 
3129     /**
3130      * Gathers a new vector composed of elements from an array of type
3131      * {@code boolean[]},
3132      * under the control of a mask, and
3133      * using indexes obtained by adding a fixed {@code offset} to a
3134      * series of secondary offsets from an <em>index map</em>.
3135      * The index map is a contiguous sequence of {@code VLENGTH}
3136      * elements in a second array of {@code int}s, starting at a given
3137      * {@code mapOffset}.
3138      * <p>
3139      * For each vector lane, where {@code N} is the vector lane index,
3140      * if the lane is set in the mask,
3141      * the lane is loaded from the expression
3142      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
3143      * index mapping expression
3144      * {@code offset + indexMap[mapOffset + N]]}.
3145      * Unset lanes in the resulting vector are set to zero.
3146      *
3147      * @param species species of desired vector
3148      * @param a the array
3149      * @param offset the offset into the array, may be negative if relative
3150      * indexes in the index map compensate to produce a value within the
3151      * array bounds
3152      * @param indexMap the index map
3153      * @param mapOffset the offset into the index map
3154      * @param m the mask controlling lane selection
3155      * @return the vector loaded from the indexed elements of the array
3156      * @throws IndexOutOfBoundsException
3157      *         if {@code mapOffset+N < 0}
3158      *         or if {@code mapOffset+N >= indexMap.length},
3159      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3160      *         is an invalid index into {@code a},
3161      *         for any lane {@code N} in the vector
3162      *         where the mask is set
3163      * @see ByteVector#toIntArray()
3164      */
3165     @ForceInline
3166     public static
3167     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3168                                           boolean[] a, int offset,
3169                                           int[] indexMap, int mapOffset,
3170                                           VectorMask<Byte> m) {
3171         // FIXME: optimize
3172         ByteSpecies vsp = (ByteSpecies) species;
3173         return vsp.vOp(m, n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
3174     }
3175 
3176     /**
3177      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3178      * starting at an offset into the byte buffer.
3179      * Bytes are composed into primitive lane elements according
3180      * to the specified byte order.
3181      * The vector is arranged into lanes according to
3182      * <a href="Vector.html#lane-order">memory ordering</a>.
3183      * <p>
3184      * This method behaves as if it returns the result of calling
3185      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3186      * fromByteBuffer()} as follows:
3187      * <pre>{@code
3188      * var m = species.maskAll(true);
3189      * return fromByteBuffer(species, bb, offset, bo, m);
3190      * }</pre>
3191      *
3192      * @param species species of desired vector
3193      * @param bb the byte buffer
3194      * @param offset the offset into the byte buffer
3195      * @param bo the intended byte order
3196      * @return a vector loaded from a byte buffer
3197      * @throws IndexOutOfBoundsException
3198      *         if {@code offset+N*1 < 0}
3199      *         or {@code offset+N*1 >= bb.limit()}
3200      *         for any lane {@code N} in the vector
3201      */
3202     @ForceInline
3203     public static
3204     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3205                                         ByteBuffer bb, int offset,
3206                                         ByteOrder bo) {
3207         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
3208         ByteSpecies vsp = (ByteSpecies) species;
3209         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
3210     }
3211 
3212     /**
3213      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3214      * starting at an offset into the byte buffer
3215      * and using a mask.
3216      * Lanes where the mask is unset are filled with the default
3217      * value of {@code byte} (zero).
3218      * Bytes are composed into primitive lane elements according
3219      * to the specified byte order.
3220      * The vector is arranged into lanes according to
3221      * <a href="Vector.html#lane-order">memory ordering</a>.
3222      * <p>
3223      * The following pseudocode illustrates the behavior:
3224      * <pre>{@code
3225      * ByteBuffer eb = bb.duplicate()
3226      *     .position(offset);
3227      * byte[] ar = new byte[species.length()];
3228      * for (int n = 0; n < ar.length; n++) {
3229      *     if (m.laneIsSet(n)) {
3230      *         ar[n] = eb.get(n);
3231      *     }
3232      * }
3233      * ByteVector r = ByteVector.fromArray(species, ar, 0);
3234      * }</pre>
3235      * @implNote
3236      * The byte order argument is ignored.
3237      *
3238      * @param species species of desired vector
3239      * @param bb the byte buffer
3240      * @param offset the offset into the byte buffer
3241      * @param bo the intended byte order
3242      * @param m the mask controlling lane selection
3243      * @return a vector loaded from a byte buffer
3244      * @throws IndexOutOfBoundsException
3245      *         if {@code offset+N*1 < 0}
3246      *         or {@code offset+N*1 >= bb.limit()}
3247      *         for any lane {@code N} in the vector
3248      *         where the mask is set
3249      */
3250     @ForceInline
3251     public static
3252     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3253                                         ByteBuffer bb, int offset,
3254                                         ByteOrder bo,
3255                                         VectorMask<Byte> m) {
3256         ByteSpecies vsp = (ByteSpecies) species;
3257         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
3258             return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
3259         }
3260 
3261         // FIXME: optimize
3262         checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3263         ByteBuffer wb = wrapper(bb, bo);
3264         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
3265                    (wb_, o, i)  -> wb_.get(o + i * 1));
3266     }
3267 
3268     // Memory store operations
3269 
3270     /**
3271      * Stores this vector into an array of type {@code byte[]}
3272      * starting at an offset.
3273      * <p>
3274      * For each vector lane, where {@code N} is the vector lane index,
3275      * the lane element at index {@code N} is stored into the array
3276      * element {@code a[offset+N]}.
3277      *
3278      * @param a the array, of type {@code byte[]}
3279      * @param offset the offset into the array
3280      * @throws IndexOutOfBoundsException
3281      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3282      *         for any lane {@code N} in the vector
3283      */
3284     @ForceInline
3285     public final
3286     void intoArray(byte[] a, int offset) {
3287         offset = checkFromIndexSize(offset, length(), a.length);
3288         ByteSpecies vsp = vspecies();
3289         VectorSupport.store(
3290             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3291             a, arrayAddress(a, offset),
3292             this,
3293             a, offset,
3294             (arr, off, v)
3295             -> v.stOp(arr, off,
3296                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3297     }
3298 
3299     /**
3300      * Stores this vector into an array of type {@code byte[]}
3301      * starting at offset and using a mask.
3302      * <p>
3303      * For each vector lane, where {@code N} is the vector lane index,
3304      * the lane element at index {@code N} is stored into the array
3305      * element {@code a[offset+N]}.
3306      * If the mask lane at {@code N} is unset then the corresponding
3307      * array element {@code a[offset+N]} is left unchanged.
3308      * <p>
3309      * Array range checking is done for lanes where the mask is set.
3310      * Lanes where the mask is unset are not stored and do not need
3311      * to correspond to legitimate elements of {@code a}.
3312      * That is, unset lanes may correspond to array indexes less than
3313      * zero or beyond the end of the array.
3314      *
3315      * @param a the array, of type {@code byte[]}
3316      * @param offset the offset into the array
3317      * @param m the mask controlling lane storage
3318      * @throws IndexOutOfBoundsException
3319      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3320      *         for any lane {@code N} in the vector
3321      *         where the mask is set
3322      */
3323     @ForceInline
3324     public final
3325     void intoArray(byte[] a, int offset,
3326                    VectorMask<Byte> m) {
3327         if (m.allTrue()) {
3328             intoArray(a, offset);
3329         } else {
3330             ByteSpecies vsp = vspecies();
3331             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3332             intoArray0(a, offset, m);
3333         }
3334     }
3335 
3336     /**
3337      * Scatters this vector into an array of type {@code byte[]}
3338      * using indexes obtained by adding a fixed {@code offset} to a
3339      * series of secondary offsets from an <em>index map</em>.
3340      * The index map is a contiguous sequence of {@code VLENGTH}
3341      * elements in a second array of {@code int}s, starting at a given
3342      * {@code mapOffset}.
3343      * <p>
3344      * For each vector lane, where {@code N} is the vector lane index,
3345      * the lane element at index {@code N} is stored into the array
3346      * element {@code a[f(N)]}, where {@code f(N)} is the
3347      * index mapping expression
3348      * {@code offset + indexMap[mapOffset + N]]}.
3349      *
3350      * @param a the array
3351      * @param offset an offset to combine with the index map offsets
3352      * @param indexMap the index map
3353      * @param mapOffset the offset into the index map
3354      * @throws IndexOutOfBoundsException
3355      *         if {@code mapOffset+N < 0}
3356      *         or if {@code mapOffset+N >= indexMap.length},
3357      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3358      *         is an invalid index into {@code a},
3359      *         for any lane {@code N} in the vector
3360      * @see ByteVector#toIntArray()
3361      */
3362     @ForceInline
3363     public final
3364     void intoArray(byte[] a, int offset,
3365                    int[] indexMap, int mapOffset) {
3366         stOp(a, offset,
3367              (arr, off, i, e) -> {
3368                  int j = indexMap[mapOffset + i];
3369                  arr[off + j] = e;
3370              });
3371     }
3372 
3373     /**
3374      * Scatters this vector into an array of type {@code byte[]},
3375      * under the control of a mask, and
3376      * using indexes obtained by adding a fixed {@code offset} to a
3377      * series of secondary offsets from an <em>index map</em>.
3378      * The index map is a contiguous sequence of {@code VLENGTH}
3379      * elements in a second array of {@code int}s, starting at a given
3380      * {@code mapOffset}.
3381      * <p>
3382      * For each vector lane, where {@code N} is the vector lane index,
3383      * if the mask lane at index {@code N} is set then
3384      * the lane element at index {@code N} is stored into the array
3385      * element {@code a[f(N)]}, where {@code f(N)} is the
3386      * index mapping expression
3387      * {@code offset + indexMap[mapOffset + N]]}.
3388      *
3389      * @param a the array
3390      * @param offset an offset to combine with the index map offsets
3391      * @param indexMap the index map
3392      * @param mapOffset the offset into the index map
3393      * @param m the mask
3394      * @throws IndexOutOfBoundsException
3395      *         if {@code mapOffset+N < 0}
3396      *         or if {@code mapOffset+N >= indexMap.length},
3397      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3398      *         is an invalid index into {@code a},
3399      *         for any lane {@code N} in the vector
3400      *         where the mask is set
3401      * @see ByteVector#toIntArray()
3402      */
3403     @ForceInline
3404     public final
3405     void intoArray(byte[] a, int offset,
3406                    int[] indexMap, int mapOffset,
3407                    VectorMask<Byte> m) {
3408         stOp(a, offset, m,
3409              (arr, off, i, e) -> {
3410                  int j = indexMap[mapOffset + i];
3411                  arr[off + j] = e;
3412              });
3413     }
3414 
3415 
3416     /**
3417      * Stores this vector into an array of type {@code boolean[]}
3418      * starting at an offset.
3419      * <p>
3420      * For each vector lane, where {@code N} is the vector lane index,
3421      * the lane element at index {@code N}
3422      * is first converted to a {@code boolean} value and then
3423      * stored into the array element {@code a[offset+N]}.
3424      * <p>
3425      * A {@code byte} value is converted to a {@code boolean} value by applying the
3426      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3427      *
3428      * @param a the array
3429      * @param offset the offset into the array
3430      * @throws IndexOutOfBoundsException
3431      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3432      *         for any lane {@code N} in the vector
3433      */
3434     @ForceInline
3435     public final
3436     void intoBooleanArray(boolean[] a, int offset) {
3437         offset = checkFromIndexSize(offset, length(), a.length);
3438         ByteSpecies vsp = vspecies();
3439         ByteVector normalized = this.and((byte) 1);
3440         VectorSupport.store(
3441             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3442             a, booleanArrayAddress(a, offset),
3443             normalized,
3444             a, offset,
3445             (arr, off, v)
3446             -> v.stOp(arr, off,
3447                       (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
3448     }
3449 
3450     /**
3451      * Stores this vector into an array of type {@code boolean[]}
3452      * starting at offset and using a mask.
3453      * <p>
3454      * For each vector lane, where {@code N} is the vector lane index,
3455      * the lane element at index {@code N}
3456      * is first converted to a {@code boolean} value and then
3457      * stored into the array element {@code a[offset+N]}.
3458      * If the mask lane at {@code N} is unset then the corresponding
3459      * array element {@code a[offset+N]} is left unchanged.
3460      * <p>
3461      * A {@code byte} value is converted to a {@code boolean} value by applying the
3462      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3463      * <p>
3464      * Array range checking is done for lanes where the mask is set.
3465      * Lanes where the mask is unset are not stored and do not need
3466      * to correspond to legitimate elements of {@code a}.
3467      * That is, unset lanes may correspond to array indexes less than
3468      * zero or beyond the end of the array.
3469      *
3470      * @param a the array
3471      * @param offset the offset into the array
3472      * @param m the mask controlling lane storage
3473      * @throws IndexOutOfBoundsException
3474      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3475      *         for any lane {@code N} in the vector
3476      *         where the mask is set
3477      */
3478     @ForceInline
3479     public final
3480     void intoBooleanArray(boolean[] a, int offset,
3481                           VectorMask<Byte> m) {
3482         if (m.allTrue()) {
3483             intoBooleanArray(a, offset);
3484         } else {
3485             ByteSpecies vsp = vspecies();
3486             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3487             intoBooleanArray0(a, offset, m);
3488         }
3489     }
3490 
3491     /**
3492      * Scatters this vector into an array of type {@code boolean[]}
3493      * using indexes obtained by adding a fixed {@code offset} to a
3494      * series of secondary offsets from an <em>index map</em>.
3495      * The index map is a contiguous sequence of {@code VLENGTH}
3496      * elements in a second array of {@code int}s, starting at a given
3497      * {@code mapOffset}.
3498      * <p>
3499      * For each vector lane, where {@code N} is the vector lane index,
3500      * the lane element at index {@code N}
3501      * is first converted to a {@code boolean} value and then
3502      * stored into the array
3503      * element {@code a[f(N)]}, where {@code f(N)} is the
3504      * index mapping expression
3505      * {@code offset + indexMap[mapOffset + N]]}.
3506      * <p>
3507      * A {@code byte} value is converted to a {@code boolean} value by applying the
3508      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3509      *
3510      * @param a the array
3511      * @param offset an offset to combine with the index map offsets
3512      * @param indexMap the index map
3513      * @param mapOffset the offset into the index map
3514      * @throws IndexOutOfBoundsException
3515      *         if {@code mapOffset+N < 0}
3516      *         or if {@code mapOffset+N >= indexMap.length},
3517      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3518      *         is an invalid index into {@code a},
3519      *         for any lane {@code N} in the vector
3520      * @see ByteVector#toIntArray()
3521      */
3522     @ForceInline
3523     public final
3524     void intoBooleanArray(boolean[] a, int offset,
3525                           int[] indexMap, int mapOffset) {
3526         // FIXME: optimize
3527         stOp(a, offset,
3528              (arr, off, i, e) -> {
3529                  int j = indexMap[mapOffset + i];
3530                  arr[off + j] = (e & 1) != 0;
3531              });
3532     }
3533 
3534     /**
3535      * Scatters this vector into an array of type {@code boolean[]},
3536      * under the control of a mask, and
3537      * using indexes obtained by adding a fixed {@code offset} to a
3538      * series of secondary offsets from an <em>index map</em>.
3539      * The index map is a contiguous sequence of {@code VLENGTH}
3540      * elements in a second array of {@code int}s, starting at a given
3541      * {@code mapOffset}.
3542      * <p>
3543      * For each vector lane, where {@code N} is the vector lane index,
3544      * if the mask lane at index {@code N} is set then
3545      * the lane element at index {@code N}
3546      * is first converted to a {@code boolean} value and then
3547      * stored into the array
3548      * element {@code a[f(N)]}, where {@code f(N)} is the
3549      * index mapping expression
3550      * {@code offset + indexMap[mapOffset + N]]}.
3551      * <p>
3552      * A {@code byte} value is converted to a {@code boolean} value by applying the
3553      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3554      *
3555      * @param a the array
3556      * @param offset an offset to combine with the index map offsets
3557      * @param indexMap the index map
3558      * @param mapOffset the offset into the index map
3559      * @param m the mask
3560      * @throws IndexOutOfBoundsException
3561      *         if {@code mapOffset+N < 0}
3562      *         or if {@code mapOffset+N >= indexMap.length},
3563      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3564      *         is an invalid index into {@code a},
3565      *         for any lane {@code N} in the vector
3566      *         where the mask is set
3567      * @see ByteVector#toIntArray()
3568      */
3569     @ForceInline
3570     public final
3571     void intoBooleanArray(boolean[] a, int offset,
3572                           int[] indexMap, int mapOffset,
3573                           VectorMask<Byte> m) {
3574         // FIXME: optimize
3575         stOp(a, offset, m,
3576              (arr, off, i, e) -> {
3577                  int j = indexMap[mapOffset + i];
3578                  arr[off + j] = (e & 1) != 0;
3579              });
3580     }
3581 
3582     /**
3583      * {@inheritDoc} <!--workaround-->
3584      */
3585     @Override
3586     @ForceInline
3587     public final
3588     void intoByteArray(byte[] a, int offset,
3589                        ByteOrder bo) {
3590         offset = checkFromIndexSize(offset, byteSize(), a.length);
3591         maybeSwap(bo).intoByteArray0(a, offset);
3592     }
3593 
3594     /**
3595      * {@inheritDoc} <!--workaround-->
3596      */
3597     @Override
3598     @ForceInline
3599     public final
3600     void intoByteArray(byte[] a, int offset,
3601                        ByteOrder bo,
3602                        VectorMask<Byte> m) {
3603         if (m.allTrue()) {
3604             intoByteArray(a, offset, bo);
3605         } else {
3606             ByteSpecies vsp = vspecies();
3607             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3608             maybeSwap(bo).intoByteArray0(a, offset, m);
3609         }
3610     }
3611 
3612     /**
3613      * {@inheritDoc} <!--workaround-->
3614      */
3615     @Override
3616     @ForceInline
3617     public final
3618     void intoByteBuffer(ByteBuffer bb, int offset,
3619                         ByteOrder bo) {
3620         if (ScopedMemoryAccess.isReadOnly(bb)) {
3621             throw new ReadOnlyBufferException();
3622         }
3623         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3624         maybeSwap(bo).intoByteBuffer0(bb, offset);
3625     }
3626 
3627     /**
3628      * {@inheritDoc} <!--workaround-->
3629      */
3630     @Override
3631     @ForceInline
3632     public final
3633     void intoByteBuffer(ByteBuffer bb, int offset,
3634                         ByteOrder bo,
3635                         VectorMask<Byte> m) {
3636         if (m.allTrue()) {
3637             intoByteBuffer(bb, offset, bo);
3638         } else {
3639             if (bb.isReadOnly()) {
3640                 throw new ReadOnlyBufferException();
3641             }
3642             ByteSpecies vsp = vspecies();
3643             checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3644             maybeSwap(bo).intoByteBuffer0(bb, offset, m);
3645         }
3646     }
3647 
3648     // ================================================
3649 
3650     // Low-level memory operations.
3651     //
3652     // Note that all of these operations *must* inline into a context
3653     // where the exact species of the involved vector is a
3654     // compile-time constant.  Otherwise, the intrinsic generation
3655     // will fail and performance will suffer.
3656     //
3657     // In many cases this is achieved by re-deriving a version of the
3658     // method in each concrete subclass (per species).  The re-derived
3659     // method simply calls one of these generic methods, with exact
3660     // parameters for the controlling metadata, which is either a
3661     // typed vector or constant species instance.
3662 
3663     // Unchecked loading operations in native byte order.
3664     // Caller is responsible for applying index checks, masking, and
3665     // byte swapping.
3666 
3667     /*package-private*/
3668     abstract
3669     ByteVector fromArray0(byte[] a, int offset);
3670     @ForceInline
3671     final
3672     ByteVector fromArray0Template(byte[] a, int offset) {
3673         ByteSpecies vsp = vspecies();
3674         return VectorSupport.load(
3675             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3676             a, arrayAddress(a, offset),
3677             a, offset, vsp,
3678             (arr, off, s) -> s.ldOp(arr, off,
3679                                     (arr_, off_, i) -> arr_[off_ + i]));
3680     }
3681 
3682     /*package-private*/
3683     abstract
3684     ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m);
3685     @ForceInline
3686     final
3687     <M extends VectorMask<Byte>>
3688     ByteVector fromArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3689         m.check(species());
3690         ByteSpecies vsp = vspecies();
3691         return VectorSupport.loadMasked(
3692             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3693             a, arrayAddress(a, offset), m,
3694             a, offset, vsp,
3695             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3696                                         (arr_, off_, i) -> arr_[off_ + i]));
3697     }
3698 
3699 
3700 
3701     /*package-private*/
3702     abstract
3703     ByteVector fromBooleanArray0(boolean[] a, int offset);
3704     @ForceInline
3705     final
3706     ByteVector fromBooleanArray0Template(boolean[] a, int offset) {
3707         ByteSpecies vsp = vspecies();
3708         return VectorSupport.load(
3709             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3710             a, booleanArrayAddress(a, offset),
3711             a, offset, vsp,
3712             (arr, off, s) -> s.ldOp(arr, off,
3713                                     (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3714     }
3715 
3716     /*package-private*/
3717     abstract
3718     ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m);
3719     @ForceInline
3720     final
3721     <M extends VectorMask<Byte>>
3722     ByteVector fromBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m) {
3723         m.check(species());
3724         ByteSpecies vsp = vspecies();
3725         return VectorSupport.loadMasked(
3726             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3727             a, booleanArrayAddress(a, offset), m,
3728             a, offset, vsp,
3729             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3730                                         (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3731     }
3732 
3733     @Override
3734     abstract
3735     ByteVector fromByteArray0(byte[] a, int offset);
3736     @ForceInline
3737     final
3738     ByteVector fromByteArray0Template(byte[] a, int offset) {
3739         ByteSpecies vsp = vspecies();
3740         return VectorSupport.load(
3741             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3742             a, byteArrayAddress(a, offset),
3743             a, offset, vsp,
3744             (arr, off, s) -> {
3745                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3746                 return s.ldOp(wb, off,
3747                         (wb_, o, i) -> wb_.get(o + i * 1));
3748             });
3749     }
3750 
3751     abstract
3752     ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m);
3753     @ForceInline
3754     final
3755     <M extends VectorMask<Byte>>
3756     ByteVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3757         ByteSpecies vsp = vspecies();
3758         m.check(vsp);
3759         return VectorSupport.loadMasked(
3760             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3761             a, byteArrayAddress(a, offset), m,
3762             a, offset, vsp,
3763             (arr, off, s, vm) -> {
3764                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3765                 return s.ldOp(wb, off, vm,
3766                         (wb_, o, i) -> wb_.get(o + i * 1));
3767             });
3768     }
3769 
3770     abstract
3771     ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
3772     @ForceInline
3773     final
3774     ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3775         ByteSpecies vsp = vspecies();
3776         return ScopedMemoryAccess.loadFromByteBuffer(
3777                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3778                 bb, offset, vsp,
3779                 (buf, off, s) -> {
3780                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3781                     return s.ldOp(wb, off,
3782                             (wb_, o, i) -> wb_.get(o + i * 1));
3783                 });
3784     }
3785 
3786     abstract
3787     ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
3788     @ForceInline
3789     final
3790     <M extends VectorMask<Byte>>
3791     ByteVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3792         ByteSpecies vsp = vspecies();
3793         m.check(vsp);
3794         return ScopedMemoryAccess.loadFromByteBufferMasked(
3795                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3796                 bb, offset, m, vsp,
3797                 (buf, off, s, vm) -> {
3798                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3799                     return s.ldOp(wb, off, vm,
3800                             (wb_, o, i) -> wb_.get(o + i * 1));
3801                 });
3802     }
3803 
3804     // Unchecked storing operations in native byte order.
3805     // Caller is responsible for applying index checks, masking, and
3806     // byte swapping.
3807 
3808     abstract
3809     void intoArray0(byte[] a, int offset);
3810     @ForceInline
3811     final
3812     void intoArray0Template(byte[] a, int offset) {
3813         ByteSpecies vsp = vspecies();
3814         VectorSupport.store(
3815             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3816             a, arrayAddress(a, offset),
3817             this, a, offset,
3818             (arr, off, v)
3819             -> v.stOp(arr, off,
3820                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3821     }
3822 
3823     abstract
3824     void intoArray0(byte[] a, int offset, VectorMask<Byte> m);
3825     @ForceInline
3826     final
3827     <M extends VectorMask<Byte>>
3828     void intoArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3829         m.check(species());
3830         ByteSpecies vsp = vspecies();
3831         VectorSupport.storeMasked(
3832             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3833             a, arrayAddress(a, offset),
3834             this, m, a, offset,
3835             (arr, off, v, vm)
3836             -> v.stOp(arr, off, vm,
3837                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3838     }
3839 
3840 
3841     abstract
3842     void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m);
3843     @ForceInline
3844     final
3845     <M extends VectorMask<Byte>>
3846     void intoBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m) {
3847         m.check(species());
3848         ByteSpecies vsp = vspecies();
3849         ByteVector normalized = this.and((byte) 1);
3850         VectorSupport.storeMasked(
3851             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3852             a, booleanArrayAddress(a, offset),
3853             normalized, m, a, offset,
3854             (arr, off, v, vm)
3855             -> v.stOp(arr, off, vm,
3856                       (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
3857     }
3858 
3859     abstract
3860     void intoByteArray0(byte[] a, int offset);
3861     @ForceInline
3862     final
3863     void intoByteArray0Template(byte[] a, int offset) {
3864         ByteSpecies vsp = vspecies();
3865         VectorSupport.store(
3866             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3867             a, byteArrayAddress(a, offset),
3868             this, a, offset,
3869             (arr, off, v) -> {
3870                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3871                 v.stOp(wb, off,
3872                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
3873             });
3874     }
3875 
3876     abstract
3877     void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m);
3878     @ForceInline
3879     final
3880     <M extends VectorMask<Byte>>
3881     void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3882         ByteSpecies vsp = vspecies();
3883         m.check(vsp);
3884         VectorSupport.storeMasked(
3885             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3886             a, byteArrayAddress(a, offset),
3887             this, m, a, offset,
3888             (arr, off, v, vm) -> {
3889                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3890                 v.stOp(wb, off, vm,
3891                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
3892             });
3893     }
3894 
3895     @ForceInline
3896     final
3897     void intoByteBuffer0(ByteBuffer bb, int offset) {
3898         ByteSpecies vsp = vspecies();
3899         ScopedMemoryAccess.storeIntoByteBuffer(
3900                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3901                 this, bb, offset,
3902                 (buf, off, v) -> {
3903                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3904                     v.stOp(wb, off,
3905                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3906                 });
3907     }
3908 
3909     abstract
3910     void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
3911     @ForceInline
3912     final
3913     <M extends VectorMask<Byte>>
3914     void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3915         ByteSpecies vsp = vspecies();
3916         m.check(vsp);
3917         ScopedMemoryAccess.storeIntoByteBufferMasked(
3918                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3919                 this, m, bb, offset,
3920                 (buf, off, v, vm) -> {
3921                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3922                     v.stOp(wb, off, vm,
3923                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3924                 });
3925     }
3926 
3927 
3928     // End of low-level memory operations.
3929 
3930     private static
3931     void checkMaskFromIndexSize(int offset,
3932                                 ByteSpecies vsp,
3933                                 VectorMask<Byte> m,
3934                                 int scale,
3935                                 int limit) {
3936         ((AbstractMask<Byte>)m)
3937             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3938     }
3939 
3940     @ForceInline
3941     private void conditionalStoreNYI(int offset,
3942                                      ByteSpecies vsp,
3943                                      VectorMask<Byte> m,
3944                                      int scale,
3945                                      int limit) {
3946         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3947             String msg =
3948                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3949                               offset, limit, m, vsp);
3950             throw new AssertionError(msg);
3951         }
3952     }
3953 
3954     /*package-private*/
3955     @Override
3956     @ForceInline
3957     final
3958     ByteVector maybeSwap(ByteOrder bo) {
3959         return this;
3960     }
3961 
3962     static final int ARRAY_SHIFT =
3963         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE);
3964     static final long ARRAY_BASE =
3965         Unsafe.ARRAY_BYTE_BASE_OFFSET;
3966 
3967     @ForceInline
3968     static long arrayAddress(byte[] a, int index) {
3969         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3970     }
3971 
3972 
3973     static final int ARRAY_BOOLEAN_SHIFT =
3974             31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BOOLEAN_INDEX_SCALE);
3975     static final long ARRAY_BOOLEAN_BASE =
3976             Unsafe.ARRAY_BOOLEAN_BASE_OFFSET;
3977 
3978     @ForceInline
3979     static long booleanArrayAddress(boolean[] a, int index) {
3980         return ARRAY_BOOLEAN_BASE + (((long)index) << ARRAY_BOOLEAN_SHIFT);
3981     }
3982 
3983     @ForceInline
3984     static long byteArrayAddress(byte[] a, int index) {
3985         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3986     }
3987 
3988     // ================================================
3989 
3990     /// Reinterpreting view methods:
3991     //   lanewise reinterpret: viewAsXVector()
3992     //   keep shape, redraw lanes: reinterpretAsEs()
3993 
3994     /**
3995      * {@inheritDoc} <!--workaround-->
3996      */
3997     @ForceInline
3998     @Override
3999     public final ByteVector reinterpretAsBytes() {
4000         return this;
4001     }
4002 
4003     /**
4004      * {@inheritDoc} <!--workaround-->
4005      */
4006     @ForceInline
4007     @Override
4008     public final ByteVector viewAsIntegralLanes() {
4009         return this;
4010     }
4011 
4012     /**
4013      * {@inheritDoc} <!--workaround-->
4014      *
4015      * @implNote This method always throws
4016      * {@code UnsupportedOperationException}, because there is no floating
4017      * point type of the same size as {@code byte}.  The return type
4018      * of this method is arbitrarily designated as
4019      * {@code Vector<?>}.  Future versions of this API may change the return
4020      * type if additional floating point types become available.
4021      */
4022     @ForceInline
4023     @Override
4024     public final
4025     Vector<?>
4026     viewAsFloatingLanes() {
4027         LaneType flt = LaneType.BYTE.asFloating();
4028         // asFloating() will throw UnsupportedOperationException for the unsupported type byte
4029         throw new AssertionError("Cannot reach here");
4030     }
4031 
4032     // ================================================
4033 
4034     /// Object methods: toString, equals, hashCode
4035     //
4036     // Object methods are defined as if via Arrays.toString, etc.,
4037     // is applied to the array of elements.  Two equal vectors
4038     // are required to have equal species and equal lane values.
4039 
4040     /**
4041      * Returns a string representation of this vector, of the form
4042      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
4043      * in lane order.
4044      *
4045      * The string is produced as if by a call to {@link
4046      * java.util.Arrays#toString(byte[]) Arrays.toString()},
4047      * as appropriate to the {@code byte} array returned by
4048      * {@link #toArray this.toArray()}.
4049      *
4050      * @return a string of the form {@code "[0,1,2...]"}
4051      * reporting the lane values of this vector
4052      */
4053     @Override
4054     @ForceInline
4055     public final
4056     String toString() {
4057         // now that toArray is strongly typed, we can define this
4058         return Arrays.toString(toArray());
4059     }
4060 
4061     /**
4062      * {@inheritDoc} <!--workaround-->
4063      */
4064     @Override
4065     @ForceInline
4066     public final
4067     boolean equals(Object obj) {
4068         if (obj instanceof Vector) {
4069             Vector<?> that = (Vector<?>) obj;
4070             if (this.species().equals(that.species())) {
4071                 return this.eq(that.check(this.species())).allTrue();
4072             }
4073         }
4074         return false;
4075     }
4076 
4077     /**
4078      * {@inheritDoc} <!--workaround-->
4079      */
4080     @Override
4081     @ForceInline
4082     public final
4083     int hashCode() {
4084         // now that toArray is strongly typed, we can define this
4085         return Objects.hash(species(), Arrays.hashCode(toArray()));
4086     }
4087 
4088     // ================================================
4089 
4090     // Species
4091 
4092     /**
4093      * Class representing {@link ByteVector}'s of the same {@link VectorShape VectorShape}.
4094      */
4095     /*package-private*/
4096     static final class ByteSpecies extends AbstractSpecies<Byte> {
4097         private ByteSpecies(VectorShape shape,
4098                 Class<? extends ByteVector> vectorType,
4099                 Class<? extends AbstractMask<Byte>> maskType,
4100                 Function<Object, ByteVector> vectorFactory) {
4101             super(shape, LaneType.of(byte.class),
4102                   vectorType, maskType,
4103                   vectorFactory);
4104             assert(this.elementSize() == Byte.SIZE);
4105         }
4106 
4107         // Specializing overrides:
4108 
4109         @Override
4110         @ForceInline
4111         public final Class<Byte> elementType() {
4112             return byte.class;
4113         }
4114 
4115         @Override
4116         @ForceInline
4117         final Class<Byte> genericElementType() {
4118             return Byte.class;
4119         }
4120 
4121         @SuppressWarnings("unchecked")
4122         @Override
4123         @ForceInline
4124         public final Class<? extends ByteVector> vectorType() {
4125             return (Class<? extends ByteVector>) vectorType;
4126         }
4127 
4128         @Override
4129         @ForceInline
4130         public final long checkValue(long e) {
4131             longToElementBits(e);  // only for exception
4132             return e;
4133         }
4134 
4135         /*package-private*/
4136         @Override
4137         @ForceInline
4138         final ByteVector broadcastBits(long bits) {
4139             return (ByteVector)
4140                 VectorSupport.fromBitsCoerced(
4141                     vectorType, byte.class, laneCount,
4142                     bits, MODE_BROADCAST, this,
4143                     (bits_, s_) -> s_.rvOp(i -> bits_));
4144         }
4145 
4146         /*package-private*/
4147         @ForceInline
4148         final ByteVector broadcast(byte e) {
4149             return broadcastBits(toBits(e));
4150         }
4151 
4152         @Override
4153         @ForceInline
4154         public final ByteVector broadcast(long e) {
4155             return broadcastBits(longToElementBits(e));
4156         }
4157 
4158         /*package-private*/
4159         final @Override
4160         @ForceInline
4161         long longToElementBits(long value) {
4162             // Do the conversion, and then test it for failure.
4163             byte e = (byte) value;
4164             if ((long) e != value) {
4165                 throw badElementBits(value, e);
4166             }
4167             return toBits(e);
4168         }
4169 
4170         /*package-private*/
4171         @ForceInline
4172         static long toIntegralChecked(byte e, boolean convertToInt) {
4173             long value = convertToInt ? (int) e : (long) e;
4174             if ((byte) value != e) {
4175                 throw badArrayBits(e, convertToInt, value);
4176             }
4177             return value;
4178         }
4179 
4180         /* this non-public one is for internal conversions */
4181         @Override
4182         @ForceInline
4183         final ByteVector fromIntValues(int[] values) {
4184             VectorIntrinsics.requireLength(values.length, laneCount);
4185             byte[] va = new byte[laneCount()];
4186             for (int i = 0; i < va.length; i++) {
4187                 int lv = values[i];
4188                 byte v = (byte) lv;
4189                 va[i] = v;
4190                 if ((int)v != lv) {
4191                     throw badElementBits(lv, v);
4192                 }
4193             }
4194             return dummyVector().fromArray0(va, 0);
4195         }
4196 
4197         // Virtual constructors
4198 
4199         @ForceInline
4200         @Override final
4201         public ByteVector fromArray(Object a, int offset) {
4202             // User entry point:  Be careful with inputs.
4203             return ByteVector
4204                 .fromArray(this, (byte[]) a, offset);
4205         }
4206 
4207         @ForceInline
4208         @Override final
4209         ByteVector dummyVector() {
4210             return (ByteVector) super.dummyVector();
4211         }
4212 
4213         /*package-private*/
4214         final @Override
4215         @ForceInline
4216         ByteVector rvOp(RVOp f) {
4217             byte[] res = new byte[laneCount()];
4218             for (int i = 0; i < res.length; i++) {
4219                 byte bits = (byte) f.apply(i);
4220                 res[i] = fromBits(bits);
4221             }
4222             return dummyVector().vectorFactory(res);
4223         }
4224 
4225         ByteVector vOp(FVOp f) {
4226             byte[] res = new byte[laneCount()];
4227             for (int i = 0; i < res.length; i++) {
4228                 res[i] = f.apply(i);
4229             }
4230             return dummyVector().vectorFactory(res);
4231         }
4232 
4233         ByteVector vOp(VectorMask<Byte> m, FVOp f) {
4234             byte[] res = new byte[laneCount()];
4235             boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
4236             for (int i = 0; i < res.length; i++) {
4237                 if (mbits[i]) {
4238                     res[i] = f.apply(i);
4239                 }
4240             }
4241             return dummyVector().vectorFactory(res);
4242         }
4243 
4244         /*package-private*/
4245         @ForceInline
4246         <M> ByteVector ldOp(M memory, int offset,
4247                                       FLdOp<M> f) {
4248             return dummyVector().ldOp(memory, offset, f);
4249         }
4250 
4251         /*package-private*/
4252         @ForceInline
4253         <M> ByteVector ldOp(M memory, int offset,
4254                                       VectorMask<Byte> m,
4255                                       FLdOp<M> f) {
4256             return dummyVector().ldOp(memory, offset, m, f);
4257         }
4258 
4259         /*package-private*/
4260         @ForceInline
4261         <M> void stOp(M memory, int offset, FStOp<M> f) {
4262             dummyVector().stOp(memory, offset, f);
4263         }
4264 
4265         /*package-private*/
4266         @ForceInline
4267         <M> void stOp(M memory, int offset,
4268                       AbstractMask<Byte> m,
4269                       FStOp<M> f) {
4270             dummyVector().stOp(memory, offset, m, f);
4271         }
4272 
4273         // N.B. Make sure these constant vectors and
4274         // masks load up correctly into registers.
4275         //
4276         // Also, see if we can avoid all that switching.
4277         // Could we cache both vectors and both masks in
4278         // this species object?
4279 
4280         // Zero and iota vector access
4281         @Override
4282         @ForceInline
4283         public final ByteVector zero() {
4284             if ((Class<?>) vectorType() == ByteMaxVector.class)
4285                 return ByteMaxVector.ZERO;
4286             switch (vectorBitSize()) {
4287                 case 64: return Byte64Vector.ZERO;
4288                 case 128: return Byte128Vector.ZERO;
4289                 case 256: return Byte256Vector.ZERO;
4290                 case 512: return Byte512Vector.ZERO;
4291             }
4292             throw new AssertionError();
4293         }
4294 
4295         @Override
4296         @ForceInline
4297         public final ByteVector iota() {
4298             if ((Class<?>) vectorType() == ByteMaxVector.class)
4299                 return ByteMaxVector.IOTA;
4300             switch (vectorBitSize()) {
4301                 case 64: return Byte64Vector.IOTA;
4302                 case 128: return Byte128Vector.IOTA;
4303                 case 256: return Byte256Vector.IOTA;
4304                 case 512: return Byte512Vector.IOTA;
4305             }
4306             throw new AssertionError();
4307         }
4308 
4309         // Mask access
4310         @Override
4311         @ForceInline
4312         public final VectorMask<Byte> maskAll(boolean bit) {
4313             if ((Class<?>) vectorType() == ByteMaxVector.class)
4314                 return ByteMaxVector.ByteMaxMask.maskAll(bit);
4315             switch (vectorBitSize()) {
4316                 case 64: return Byte64Vector.Byte64Mask.maskAll(bit);
4317                 case 128: return Byte128Vector.Byte128Mask.maskAll(bit);
4318                 case 256: return Byte256Vector.Byte256Mask.maskAll(bit);
4319                 case 512: return Byte512Vector.Byte512Mask.maskAll(bit);
4320             }
4321             throw new AssertionError();
4322         }
4323     }
4324 
4325     /**
4326      * Finds a species for an element type of {@code byte} and shape.
4327      *
4328      * @param s the shape
4329      * @return a species for an element type of {@code byte} and shape
4330      * @throws IllegalArgumentException if no such species exists for the shape
4331      */
4332     static ByteSpecies species(VectorShape s) {
4333         Objects.requireNonNull(s);
4334         switch (s.switchKey) {
4335             case VectorShape.SK_64_BIT: return (ByteSpecies) SPECIES_64;
4336             case VectorShape.SK_128_BIT: return (ByteSpecies) SPECIES_128;
4337             case VectorShape.SK_256_BIT: return (ByteSpecies) SPECIES_256;
4338             case VectorShape.SK_512_BIT: return (ByteSpecies) SPECIES_512;
4339             case VectorShape.SK_Max_BIT: return (ByteSpecies) SPECIES_MAX;
4340             default: throw new IllegalArgumentException("Bad shape: " + s);
4341         }
4342     }
4343 
4344     /** Species representing {@link ByteVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
4345     public static final VectorSpecies<Byte> SPECIES_64
4346         = new ByteSpecies(VectorShape.S_64_BIT,
4347                             Byte64Vector.class,
4348                             Byte64Vector.Byte64Mask.class,
4349                             Byte64Vector::new);
4350 
4351     /** Species representing {@link ByteVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
4352     public static final VectorSpecies<Byte> SPECIES_128
4353         = new ByteSpecies(VectorShape.S_128_BIT,
4354                             Byte128Vector.class,
4355                             Byte128Vector.Byte128Mask.class,
4356                             Byte128Vector::new);
4357 
4358     /** Species representing {@link ByteVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
4359     public static final VectorSpecies<Byte> SPECIES_256
4360         = new ByteSpecies(VectorShape.S_256_BIT,
4361                             Byte256Vector.class,
4362                             Byte256Vector.Byte256Mask.class,
4363                             Byte256Vector::new);
4364 
4365     /** Species representing {@link ByteVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
4366     public static final VectorSpecies<Byte> SPECIES_512
4367         = new ByteSpecies(VectorShape.S_512_BIT,
4368                             Byte512Vector.class,
4369                             Byte512Vector.Byte512Mask.class,
4370                             Byte512Vector::new);
4371 
4372     /** Species representing {@link ByteVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
4373     public static final VectorSpecies<Byte> SPECIES_MAX
4374         = new ByteSpecies(VectorShape.S_Max_BIT,
4375                             ByteMaxVector.class,
4376                             ByteMaxVector.ByteMaxMask.class,
4377                             ByteMaxVector::new);
4378 
4379     /**
4380      * Preferred species for {@link ByteVector}s.
4381      * A preferred species is a species of maximal bit-size for the platform.
4382      */
4383     public static final VectorSpecies<Byte> SPECIES_PREFERRED
4384         = (ByteSpecies) VectorSpecies.ofPreferred(byte.class);
4385 }