1 /*
   2  * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.BinaryOperator;
  33 import java.util.function.Function;
  34 import java.util.function.UnaryOperator;
  35 
  36 import jdk.internal.misc.ScopedMemoryAccess;
  37 import jdk.internal.misc.Unsafe;
  38 import jdk.internal.vm.annotation.ForceInline;
  39 import jdk.internal.vm.vector.VectorSupport;
  40 
  41 import static jdk.internal.vm.vector.VectorSupport.*;
  42 import static jdk.incubator.vector.VectorIntrinsics.*;
  43 
  44 import static jdk.incubator.vector.VectorOperators.*;
  45 
  46 // -- This file was mechanically generated: Do not edit! -- //
  47 
  48 /**
  49  * A specialized {@link Vector} representing an ordered immutable sequence of
  50  * {@code byte} values.
  51  */
  52 @SuppressWarnings("cast")  // warning: redundant cast
  53 public abstract class ByteVector extends AbstractVector<Byte> {
  54 
  55     ByteVector(byte[] vec) {
  56         super(vec);
  57     }
  58 
  59     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  60 
  61     @ForceInline
  62     static int opCode(Operator op) {
  63         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  64     }
  65     @ForceInline
  66     static int opCode(Operator op, int requireKind) {
  67         requireKind |= VO_OPCODE_VALID;
  68         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  69     }
  70     @ForceInline
  71     static boolean opKind(Operator op, int bit) {
  72         return VectorOperators.opKind(op, bit);
  73     }
  74 
  75     // Virtualized factories and operators,
  76     // coded with portable definitions.
  77     // These are all @ForceInline in case
  78     // they need to be used performantly.
  79     // The various shape-specific subclasses
  80     // also specialize them by wrapping
  81     // them in a call like this:
  82     //    return (Byte128Vector)
  83     //       super.bOp((Byte128Vector) o);
  84     // The purpose of that is to forcibly inline
  85     // the generic definition from this file
  86     // into a sharply type- and size-specific
  87     // wrapper in the subclass file, so that
  88     // the JIT can specialize the code.
  89     // The code is only inlined and expanded
  90     // if it gets hot.  Think of it as a cheap
  91     // and lazy version of C++ templates.
  92 
  93     // Virtualized getter
  94 
  95     /*package-private*/
  96     abstract byte[] vec();
  97 
  98     // Virtualized constructors
  99 
 100     /**
 101      * Build a vector directly using my own constructor.
 102      * It is an error if the array is aliased elsewhere.
 103      */
 104     /*package-private*/
 105     abstract ByteVector vectorFactory(byte[] vec);
 106 
 107     /**
 108      * Build a mask directly using my species.
 109      * It is an error if the array is aliased elsewhere.
 110      */
 111     /*package-private*/
 112     @ForceInline
 113     final
 114     AbstractMask<Byte> maskFactory(boolean[] bits) {
 115         return vspecies().maskFactory(bits);
 116     }
 117 
 118     // Constant loader (takes dummy as vector arg)
 119     interface FVOp {
 120         byte apply(int i);
 121     }
 122 
 123     /*package-private*/
 124     @ForceInline
 125     final
 126     ByteVector vOp(FVOp f) {
 127         byte[] res = new byte[length()];
 128         for (int i = 0; i < res.length; i++) {
 129             res[i] = f.apply(i);
 130         }
 131         return vectorFactory(res);
 132     }
 133 
 134     @ForceInline
 135     final
 136     ByteVector vOp(VectorMask<Byte> m, FVOp f) {
 137         byte[] res = new byte[length()];
 138         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 139         for (int i = 0; i < res.length; i++) {
 140             if (mbits[i]) {
 141                 res[i] = f.apply(i);
 142             }
 143         }
 144         return vectorFactory(res);
 145     }
 146 
 147     // Unary operator
 148 
 149     /*package-private*/
 150     interface FUnOp {
 151         byte apply(int i, byte a);
 152     }
 153 
 154     /*package-private*/
 155     abstract
 156     ByteVector uOp(FUnOp f);
 157     @ForceInline
 158     final
 159     ByteVector uOpTemplate(FUnOp f) {
 160         byte[] vec = vec();
 161         byte[] res = new byte[length()];
 162         for (int i = 0; i < res.length; i++) {
 163             res[i] = f.apply(i, vec[i]);
 164         }
 165         return vectorFactory(res);
 166     }
 167 
 168     /*package-private*/
 169     abstract
 170     ByteVector uOp(VectorMask<Byte> m,
 171                              FUnOp f);
 172     @ForceInline
 173     final
 174     ByteVector uOpTemplate(VectorMask<Byte> m,
 175                                      FUnOp f) {
 176         byte[] vec = vec();
 177         byte[] res = new byte[length()];
 178         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 179         for (int i = 0; i < res.length; i++) {
 180             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 181         }
 182         return vectorFactory(res);
 183     }
 184 
 185     // Binary operator
 186 
 187     /*package-private*/
 188     interface FBinOp {
 189         byte apply(int i, byte a, byte b);
 190     }
 191 
 192     /*package-private*/
 193     abstract
 194     ByteVector bOp(Vector<Byte> o,
 195                              FBinOp f);
 196     @ForceInline
 197     final
 198     ByteVector bOpTemplate(Vector<Byte> o,
 199                                      FBinOp f) {
 200         byte[] res = new byte[length()];
 201         byte[] vec1 = this.vec();
 202         byte[] vec2 = ((ByteVector)o).vec();
 203         for (int i = 0; i < res.length; i++) {
 204             res[i] = f.apply(i, vec1[i], vec2[i]);
 205         }
 206         return vectorFactory(res);
 207     }
 208 
 209     /*package-private*/
 210     abstract
 211     ByteVector bOp(Vector<Byte> o,
 212                              VectorMask<Byte> m,
 213                              FBinOp f);
 214     @ForceInline
 215     final
 216     ByteVector bOpTemplate(Vector<Byte> o,
 217                                      VectorMask<Byte> m,
 218                                      FBinOp f) {
 219         byte[] res = new byte[length()];
 220         byte[] vec1 = this.vec();
 221         byte[] vec2 = ((ByteVector)o).vec();
 222         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 223         for (int i = 0; i < res.length; i++) {
 224             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 225         }
 226         return vectorFactory(res);
 227     }
 228 
 229     // Ternary operator
 230 
 231     /*package-private*/
 232     interface FTriOp {
 233         byte apply(int i, byte a, byte b, byte c);
 234     }
 235 
 236     /*package-private*/
 237     abstract
 238     ByteVector tOp(Vector<Byte> o1,
 239                              Vector<Byte> o2,
 240                              FTriOp f);
 241     @ForceInline
 242     final
 243     ByteVector tOpTemplate(Vector<Byte> o1,
 244                                      Vector<Byte> o2,
 245                                      FTriOp f) {
 246         byte[] res = new byte[length()];
 247         byte[] vec1 = this.vec();
 248         byte[] vec2 = ((ByteVector)o1).vec();
 249         byte[] vec3 = ((ByteVector)o2).vec();
 250         for (int i = 0; i < res.length; i++) {
 251             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 252         }
 253         return vectorFactory(res);
 254     }
 255 
 256     /*package-private*/
 257     abstract
 258     ByteVector tOp(Vector<Byte> o1,
 259                              Vector<Byte> o2,
 260                              VectorMask<Byte> m,
 261                              FTriOp f);
 262     @ForceInline
 263     final
 264     ByteVector tOpTemplate(Vector<Byte> o1,
 265                                      Vector<Byte> o2,
 266                                      VectorMask<Byte> m,
 267                                      FTriOp f) {
 268         byte[] res = new byte[length()];
 269         byte[] vec1 = this.vec();
 270         byte[] vec2 = ((ByteVector)o1).vec();
 271         byte[] vec3 = ((ByteVector)o2).vec();
 272         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 273         for (int i = 0; i < res.length; i++) {
 274             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 275         }
 276         return vectorFactory(res);
 277     }
 278 
 279     // Reduction operator
 280 
 281     /*package-private*/
 282     abstract
 283     byte rOp(byte v, FBinOp f);
 284     @ForceInline
 285     final
 286     byte rOpTemplate(byte v, FBinOp f) {
 287         byte[] vec = vec();
 288         for (int i = 0; i < vec.length; i++) {
 289             v = f.apply(i, v, vec[i]);
 290         }
 291         return v;
 292     }
 293 
 294     // Memory reference
 295 
 296     /*package-private*/
 297     interface FLdOp<M> {
 298         byte apply(M memory, int offset, int i);
 299     }
 300 
 301     /*package-private*/
 302     @ForceInline
 303     final
 304     <M> ByteVector ldOp(M memory, int offset,
 305                                   FLdOp<M> f) {
 306         //dummy; no vec = vec();
 307         byte[] res = new byte[length()];
 308         for (int i = 0; i < res.length; i++) {
 309             res[i] = f.apply(memory, offset, i);
 310         }
 311         return vectorFactory(res);
 312     }
 313 
 314     /*package-private*/
 315     @ForceInline
 316     final
 317     <M> ByteVector ldOp(M memory, int offset,
 318                                   VectorMask<Byte> m,
 319                                   FLdOp<M> f) {
 320         //byte[] vec = vec();
 321         byte[] res = new byte[length()];
 322         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 323         for (int i = 0; i < res.length; i++) {
 324             if (mbits[i]) {
 325                 res[i] = f.apply(memory, offset, i);
 326             }
 327         }
 328         return vectorFactory(res);
 329     }
 330 
 331     interface FStOp<M> {
 332         void apply(M memory, int offset, int i, byte a);
 333     }
 334 
 335     /*package-private*/
 336     @ForceInline
 337     final
 338     <M> void stOp(M memory, int offset,
 339                   FStOp<M> f) {
 340         byte[] vec = vec();
 341         for (int i = 0; i < vec.length; i++) {
 342             f.apply(memory, offset, i, vec[i]);
 343         }
 344     }
 345 
 346     /*package-private*/
 347     @ForceInline
 348     final
 349     <M> void stOp(M memory, int offset,
 350                   VectorMask<Byte> m,
 351                   FStOp<M> f) {
 352         byte[] vec = vec();
 353         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 354         for (int i = 0; i < vec.length; i++) {
 355             if (mbits[i]) {
 356                 f.apply(memory, offset, i, vec[i]);
 357             }
 358         }
 359     }
 360 
 361     // Binary test
 362 
 363     /*package-private*/
 364     interface FBinTest {
 365         boolean apply(int cond, int i, byte a, byte b);
 366     }
 367 
 368     /*package-private*/
 369     @ForceInline
 370     final
 371     AbstractMask<Byte> bTest(int cond,
 372                                   Vector<Byte> o,
 373                                   FBinTest f) {
 374         byte[] vec1 = vec();
 375         byte[] vec2 = ((ByteVector)o).vec();
 376         boolean[] bits = new boolean[length()];
 377         for (int i = 0; i < length(); i++){
 378             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 379         }
 380         return maskFactory(bits);
 381     }
 382 
 383     /*package-private*/
 384     @ForceInline
 385     static byte rotateLeft(byte a, int n) {
 386         return (byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (Byte.SIZE - (n & Byte.SIZE-1))));
 387     }
 388 
 389     /*package-private*/
 390     @ForceInline
 391     static byte rotateRight(byte a, int n) {
 392         return (byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (Byte.SIZE - (n & Byte.SIZE-1))));
 393     }
 394 
 395     /*package-private*/
 396     @Override
 397     abstract ByteSpecies vspecies();
 398 
 399     /*package-private*/
 400     @ForceInline
 401     static long toBits(byte e) {
 402         return  e;
 403     }
 404 
 405     /*package-private*/
 406     @ForceInline
 407     static byte fromBits(long bits) {
 408         return ((byte)bits);
 409     }
 410 
 411     // Static factories (other than memory operations)
 412 
 413     // Note: A surprising behavior in javadoc
 414     // sometimes makes a lone /** {@inheritDoc} */
 415     // comment drop the method altogether,
 416     // apparently if the method mentions an
 417     // parameter or return type of Vector<Byte>
 418     // instead of Vector<E> as originally specified.
 419     // Adding an empty HTML fragment appears to
 420     // nudge javadoc into providing the desired
 421     // inherited documentation.  We use the HTML
 422     // comment <!--workaround--> for this.
 423 
 424     /**
 425      * Returns a vector of the given species
 426      * where all lane elements are set to
 427      * zero, the default primitive value.
 428      *
 429      * @param species species of the desired zero vector
 430      * @return a zero vector
 431      */
 432     @ForceInline
 433     public static ByteVector zero(VectorSpecies<Byte> species) {
 434         ByteSpecies vsp = (ByteSpecies) species;
 435         return VectorSupport.broadcastCoerced(vsp.vectorType(), byte.class, species.length(),
 436                                 0, vsp,
 437                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 438     }
 439 
 440     /**
 441      * Returns a vector of the same species as this one
 442      * where all lane elements are set to
 443      * the primitive value {@code e}.
 444      *
 445      * The contents of the current vector are discarded;
 446      * only the species is relevant to this operation.
 447      *
 448      * <p> This method returns the value of this expression:
 449      * {@code ByteVector.broadcast(this.species(), e)}.
 450      *
 451      * @apiNote
 452      * Unlike the similar method named {@code broadcast()}
 453      * in the supertype {@code Vector}, this method does not
 454      * need to validate its argument, and cannot throw
 455      * {@code IllegalArgumentException}.  This method is
 456      * therefore preferable to the supertype method.
 457      *
 458      * @param e the value to broadcast
 459      * @return a vector where all lane elements are set to
 460      *         the primitive value {@code e}
 461      * @see #broadcast(VectorSpecies,long)
 462      * @see Vector#broadcast(long)
 463      * @see VectorSpecies#broadcast(long)
 464      */
 465     public abstract ByteVector broadcast(byte e);
 466 
 467     /**
 468      * Returns a vector of the given species
 469      * where all lane elements are set to
 470      * the primitive value {@code e}.
 471      *
 472      * @param species species of the desired vector
 473      * @param e the value to broadcast
 474      * @return a vector where all lane elements are set to
 475      *         the primitive value {@code e}
 476      * @see #broadcast(long)
 477      * @see Vector#broadcast(long)
 478      * @see VectorSpecies#broadcast(long)
 479      */
 480     @ForceInline
 481     public static ByteVector broadcast(VectorSpecies<Byte> species, byte e) {
 482         ByteSpecies vsp = (ByteSpecies) species;
 483         return vsp.broadcast(e);
 484     }
 485 
 486     /*package-private*/
 487     @ForceInline
 488     final ByteVector broadcastTemplate(byte e) {
 489         ByteSpecies vsp = vspecies();
 490         return vsp.broadcast(e);
 491     }
 492 
 493     /**
 494      * {@inheritDoc} <!--workaround-->
 495      * @apiNote
 496      * When working with vector subtypes like {@code ByteVector},
 497      * {@linkplain #broadcast(byte) the more strongly typed method}
 498      * is typically selected.  It can be explicitly selected
 499      * using a cast: {@code v.broadcast((byte)e)}.
 500      * The two expressions will produce numerically identical results.
 501      */
 502     @Override
 503     public abstract ByteVector broadcast(long e);
 504 
 505     /**
 506      * Returns a vector of the given species
 507      * where all lane elements are set to
 508      * the primitive value {@code e}.
 509      *
 510      * The {@code long} value must be accurately representable
 511      * by the {@code ETYPE} of the vector species, so that
 512      * {@code e==(long)(ETYPE)e}.
 513      *
 514      * @param species species of the desired vector
 515      * @param e the value to broadcast
 516      * @return a vector where all lane elements are set to
 517      *         the primitive value {@code e}
 518      * @throws IllegalArgumentException
 519      *         if the given {@code long} value cannot
 520      *         be represented by the vector's {@code ETYPE}
 521      * @see #broadcast(VectorSpecies,byte)
 522      * @see VectorSpecies#checkValue(long)
 523      */
 524     @ForceInline
 525     public static ByteVector broadcast(VectorSpecies<Byte> species, long e) {
 526         ByteSpecies vsp = (ByteSpecies) species;
 527         return vsp.broadcast(e);
 528     }
 529 
 530     /*package-private*/
 531     @ForceInline
 532     final ByteVector broadcastTemplate(long e) {
 533         return vspecies().broadcast(e);
 534     }
 535 
 536     // Unary lanewise support
 537 
 538     /**
 539      * {@inheritDoc} <!--workaround-->
 540      */
 541     public abstract
 542     ByteVector lanewise(VectorOperators.Unary op);
 543 
 544     @ForceInline
 545     final
 546     ByteVector lanewiseTemplate(VectorOperators.Unary op) {
 547         if (opKind(op, VO_SPECIAL)) {
 548             if (op == ZOMO) {
 549                 return blend(broadcast(-1), compare(NE, 0));
 550             }
 551             if (op == NOT) {
 552                 return broadcast(-1).lanewiseTemplate(XOR, this);
 553             } else if (op == NEG) {
 554                 // FIXME: Support this in the JIT.
 555                 return broadcast(0).lanewiseTemplate(SUB, this);
 556             }
 557         }
 558         int opc = opCode(op);
 559         return VectorSupport.unaryOp(
 560             opc, getClass(), byte.class, length(),
 561             this,
 562             UN_IMPL.find(op, opc, (opc_) -> {
 563               switch (opc_) {
 564                 case VECTOR_OP_NEG: return v0 ->
 565                         v0.uOp((i, a) -> (byte) -a);
 566                 case VECTOR_OP_ABS: return v0 ->
 567                         v0.uOp((i, a) -> (byte) Math.abs(a));
 568                 default: return null;
 569               }}));
 570     }
 571     private static final
 572     ImplCache<Unary,UnaryOperator<ByteVector>> UN_IMPL
 573         = new ImplCache<>(Unary.class, ByteVector.class);
 574 
 575     /**
 576      * {@inheritDoc} <!--workaround-->
 577      */
 578     @ForceInline
 579     public final
 580     ByteVector lanewise(VectorOperators.Unary op,
 581                                   VectorMask<Byte> m) {
 582         return blend(lanewise(op), m);
 583     }
 584 
 585     // Binary lanewise support
 586 
 587     /**
 588      * {@inheritDoc} <!--workaround-->
 589      * @see #lanewise(VectorOperators.Binary,byte)
 590      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 591      */
 592     @Override
 593     public abstract
 594     ByteVector lanewise(VectorOperators.Binary op,
 595                                   Vector<Byte> v);
 596     @ForceInline
 597     final
 598     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 599                                           Vector<Byte> v) {
 600         ByteVector that = (ByteVector) v;
 601         that.check(this);
 602         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 603             if (op == FIRST_NONZERO) {
 604                 // FIXME: Support this in the JIT.
 605                 VectorMask<Byte> thisNZ
 606                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 607                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 608                 op = OR_UNCHECKED;
 609             }
 610             if (opKind(op, VO_SHIFT)) {
 611                 // As per shift specification for Java, mask the shift count.
 612                 // This allows the JIT to ignore some ISA details.
 613                 that = that.lanewise(AND, SHIFT_MASK);
 614             }
 615             if (op == AND_NOT) {
 616                 // FIXME: Support this in the JIT.
 617                 that = that.lanewise(NOT);
 618                 op = AND;
 619             } else if (op == DIV) {
 620                 VectorMask<Byte> eqz = that.eq((byte)0);
 621                 if (eqz.anyTrue()) {
 622                     throw that.divZeroException();
 623                 }
 624             }
 625         }
 626         int opc = opCode(op);
 627         return VectorSupport.binaryOp(
 628             opc, getClass(), byte.class, length(),
 629             this, that,
 630             BIN_IMPL.find(op, opc, (opc_) -> {
 631               switch (opc_) {
 632                 case VECTOR_OP_ADD: return (v0, v1) ->
 633                         v0.bOp(v1, (i, a, b) -> (byte)(a + b));
 634                 case VECTOR_OP_SUB: return (v0, v1) ->
 635                         v0.bOp(v1, (i, a, b) -> (byte)(a - b));
 636                 case VECTOR_OP_MUL: return (v0, v1) ->
 637                         v0.bOp(v1, (i, a, b) -> (byte)(a * b));
 638                 case VECTOR_OP_DIV: return (v0, v1) ->
 639                         v0.bOp(v1, (i, a, b) -> (byte)(a / b));
 640                 case VECTOR_OP_MAX: return (v0, v1) ->
 641                         v0.bOp(v1, (i, a, b) -> (byte)Math.max(a, b));
 642                 case VECTOR_OP_MIN: return (v0, v1) ->
 643                         v0.bOp(v1, (i, a, b) -> (byte)Math.min(a, b));
 644                 case VECTOR_OP_AND: return (v0, v1) ->
 645                         v0.bOp(v1, (i, a, b) -> (byte)(a & b));
 646                 case VECTOR_OP_OR: return (v0, v1) ->
 647                         v0.bOp(v1, (i, a, b) -> (byte)(a | b));
 648                 case VECTOR_OP_XOR: return (v0, v1) ->
 649                         v0.bOp(v1, (i, a, b) -> (byte)(a ^ b));
 650                 case VECTOR_OP_LSHIFT: return (v0, v1) ->
 651                         v0.bOp(v1, (i, a, n) -> (byte)(a << n));
 652                 case VECTOR_OP_RSHIFT: return (v0, v1) ->
 653                         v0.bOp(v1, (i, a, n) -> (byte)(a >> n));
 654                 case VECTOR_OP_URSHIFT: return (v0, v1) ->
 655                         v0.bOp(v1, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 656                 case VECTOR_OP_LROTATE: return (v0, v1) ->
 657                         v0.bOp(v1, (i, a, n) -> rotateLeft(a, (int)n));
 658                 case VECTOR_OP_RROTATE: return (v0, v1) ->
 659                         v0.bOp(v1, (i, a, n) -> rotateRight(a, (int)n));
 660                 default: return null;
 661                 }}));
 662     }
 663     private static final
 664     ImplCache<Binary,BinaryOperator<ByteVector>> BIN_IMPL
 665         = new ImplCache<>(Binary.class, ByteVector.class);
 666 
 667     /**
 668      * {@inheritDoc} <!--workaround-->
 669      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 670      */
 671     @ForceInline
 672     public final
 673     ByteVector lanewise(VectorOperators.Binary op,
 674                                   Vector<Byte> v,
 675                                   VectorMask<Byte> m) {
 676         ByteVector that = (ByteVector) v;
 677         if (op == DIV) {
 678             VectorMask<Byte> eqz = that.eq((byte)0);
 679             if (eqz.and(m).anyTrue()) {
 680                 throw that.divZeroException();
 681             }
 682             // suppress div/0 exceptions in unset lanes
 683             that = that.lanewise(NOT, eqz);
 684             return blend(lanewise(DIV, that), m);
 685         }
 686         return blend(lanewise(op, v), m);
 687     }
 688     // FIXME: Maybe all of the public final methods in this file (the
 689     // simple ones that just call lanewise) should be pushed down to
 690     // the X-VectorBits template.  They can't optimize properly at
 691     // this level, and must rely on inlining.  Does it work?
 692     // (If it works, of course keep the code here.)
 693 
 694     /**
 695      * Combines the lane values of this vector
 696      * with the value of a broadcast scalar.
 697      *
 698      * This is a lane-wise binary operation which applies
 699      * the selected operation to each lane.
 700      * The return value will be equal to this expression:
 701      * {@code this.lanewise(op, this.broadcast(e))}.
 702      *
 703      * @param op the operation used to process lane values
 704      * @param e the input scalar
 705      * @return the result of applying the operation lane-wise
 706      *         to the two input vectors
 707      * @throws UnsupportedOperationException if this vector does
 708      *         not support the requested operation
 709      * @see #lanewise(VectorOperators.Binary,Vector)
 710      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 711      */
 712     @ForceInline
 713     public final
 714     ByteVector lanewise(VectorOperators.Binary op,
 715                                   byte e) {
 716         if (opKind(op, VO_SHIFT) && (byte)(int)e == e) {
 717             return lanewiseShift(op, (int) e);
 718         }
 719         if (op == AND_NOT) {
 720             op = AND; e = (byte) ~e;
 721         }
 722         return lanewise(op, broadcast(e));
 723     }
 724 
 725     /**
 726      * Combines the lane values of this vector
 727      * with the value of a broadcast scalar,
 728      * with selection of lane elements controlled by a mask.
 729      *
 730      * This is a masked lane-wise binary operation which applies
 731      * the selected operation to each lane.
 732      * The return value will be equal to this expression:
 733      * {@code this.lanewise(op, this.broadcast(e), m)}.
 734      *
 735      * @param op the operation used to process lane values
 736      * @param e the input scalar
 737      * @param m the mask controlling lane selection
 738      * @return the result of applying the operation lane-wise
 739      *         to the input vector and the scalar
 740      * @throws UnsupportedOperationException if this vector does
 741      *         not support the requested operation
 742      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 743      * @see #lanewise(VectorOperators.Binary,byte)
 744      */
 745     @ForceInline
 746     public final
 747     ByteVector lanewise(VectorOperators.Binary op,
 748                                   byte e,
 749                                   VectorMask<Byte> m) {
 750         return blend(lanewise(op, e), m);
 751     }
 752 
 753     /**
 754      * {@inheritDoc} <!--workaround-->
 755      * @apiNote
 756      * When working with vector subtypes like {@code ByteVector},
 757      * {@linkplain #lanewise(VectorOperators.Binary,byte)
 758      * the more strongly typed method}
 759      * is typically selected.  It can be explicitly selected
 760      * using a cast: {@code v.lanewise(op,(byte)e)}.
 761      * The two expressions will produce numerically identical results.
 762      */
 763     @ForceInline
 764     public final
 765     ByteVector lanewise(VectorOperators.Binary op,
 766                                   long e) {
 767         byte e1 = (byte) e;
 768         if ((long)e1 != e
 769             // allow shift ops to clip down their int parameters
 770             && !(opKind(op, VO_SHIFT) && (int)e1 == e)
 771             ) {
 772             vspecies().checkValue(e);  // for exception
 773         }
 774         return lanewise(op, e1);
 775     }
 776 
 777     /**
 778      * {@inheritDoc} <!--workaround-->
 779      * @apiNote
 780      * When working with vector subtypes like {@code ByteVector},
 781      * {@linkplain #lanewise(VectorOperators.Binary,byte,VectorMask)
 782      * the more strongly typed method}
 783      * is typically selected.  It can be explicitly selected
 784      * using a cast: {@code v.lanewise(op,(byte)e,m)}.
 785      * The two expressions will produce numerically identical results.
 786      */
 787     @ForceInline
 788     public final
 789     ByteVector lanewise(VectorOperators.Binary op,
 790                                   long e, VectorMask<Byte> m) {
 791         return blend(lanewise(op, e), m);
 792     }
 793 
 794     /*package-private*/
 795     abstract ByteVector
 796     lanewiseShift(VectorOperators.Binary op, int e);
 797 
 798     /*package-private*/
 799     @ForceInline
 800     final ByteVector
 801     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 802         // Special handling for these.  FIXME: Refactor?
 803         assert(opKind(op, VO_SHIFT));
 804         // As per shift specification for Java, mask the shift count.
 805         e &= SHIFT_MASK;
 806         int opc = opCode(op);
 807         return VectorSupport.broadcastInt(
 808             opc, getClass(), byte.class, length(),
 809             this, e,
 810             BIN_INT_IMPL.find(op, opc, (opc_) -> {
 811               switch (opc_) {
 812                 case VECTOR_OP_LSHIFT: return (v, n) ->
 813                         v.uOp((i, a) -> (byte)(a << n));
 814                 case VECTOR_OP_RSHIFT: return (v, n) ->
 815                         v.uOp((i, a) -> (byte)(a >> n));
 816                 case VECTOR_OP_URSHIFT: return (v, n) ->
 817                         v.uOp((i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 818                 case VECTOR_OP_LROTATE: return (v, n) ->
 819                         v.uOp((i, a) -> rotateLeft(a, (int)n));
 820                 case VECTOR_OP_RROTATE: return (v, n) ->
 821                         v.uOp((i, a) -> rotateRight(a, (int)n));
 822                 default: return null;
 823                 }}));
 824     }
 825     private static final
 826     ImplCache<Binary,VectorBroadcastIntOp<ByteVector>> BIN_INT_IMPL
 827         = new ImplCache<>(Binary.class, ByteVector.class);
 828 
 829     // As per shift specification for Java, mask the shift count.
 830     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 831     // The latter two maskings go beyond the JLS, but seem reasonable
 832     // since our lane types are first-class types, not just dressed
 833     // up ints.
 834     private static final int SHIFT_MASK = (Byte.SIZE - 1);
 835     // Also simulate >>> on sub-word variables with a mask.
 836     private static final int LSHR_SETUP_MASK = ((1 << Byte.SIZE) - 1);
 837 
 838     // Ternary lanewise support
 839 
 840     // Ternary operators come in eight variations:
 841     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 842     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 843 
 844     // It is annoying to support all of these variations of masking
 845     // and broadcast, but it would be more surprising not to continue
 846     // the obvious pattern started by unary and binary.
 847 
 848    /**
 849      * {@inheritDoc} <!--workaround-->
 850      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
 851      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
 852      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
 853      * @see #lanewise(VectorOperators.Ternary,byte,byte)
 854      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
 855      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
 856      */
 857     @Override
 858     public abstract
 859     ByteVector lanewise(VectorOperators.Ternary op,
 860                                                   Vector<Byte> v1,
 861                                                   Vector<Byte> v2);
 862     @ForceInline
 863     final
 864     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
 865                                           Vector<Byte> v1,
 866                                           Vector<Byte> v2) {
 867         ByteVector that = (ByteVector) v1;
 868         ByteVector tother = (ByteVector) v2;
 869         // It's a word: https://www.dictionary.com/browse/tother
 870         // See also Chapter 11 of Dickens, Our Mutual Friend:
 871         // "Totherest Governor," replied Mr Riderhood...
 872         that.check(this);
 873         tother.check(this);
 874         if (op == BITWISE_BLEND) {
 875             // FIXME: Support this in the JIT.
 876             that = this.lanewise(XOR, that).lanewise(AND, tother);
 877             return this.lanewise(XOR, that);
 878         }
 879         int opc = opCode(op);
 880         return VectorSupport.ternaryOp(
 881             opc, getClass(), byte.class, length(),
 882             this, that, tother,
 883             TERN_IMPL.find(op, opc, (opc_) -> {
 884               switch (opc_) {
 885                 default: return null;
 886                 }}));
 887     }
 888     private static final
 889     ImplCache<Ternary,TernaryOperation<ByteVector>> TERN_IMPL
 890         = new ImplCache<>(Ternary.class, ByteVector.class);
 891 
 892     /**
 893      * {@inheritDoc} <!--workaround-->
 894      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
 895      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
 896      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
 897      */
 898     @ForceInline
 899     public final
 900     ByteVector lanewise(VectorOperators.Ternary op,
 901                                   Vector<Byte> v1,
 902                                   Vector<Byte> v2,
 903                                   VectorMask<Byte> m) {
 904         return blend(lanewise(op, v1, v2), m);
 905     }
 906 
 907     /**
 908      * Combines the lane values of this vector
 909      * with the values of two broadcast scalars.
 910      *
 911      * This is a lane-wise ternary operation which applies
 912      * the selected operation to each lane.
 913      * The return value will be equal to this expression:
 914      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
 915      *
 916      * @param op the operation used to combine lane values
 917      * @param e1 the first input scalar
 918      * @param e2 the second input scalar
 919      * @return the result of applying the operation lane-wise
 920      *         to the input vector and the scalars
 921      * @throws UnsupportedOperationException if this vector does
 922      *         not support the requested operation
 923      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 924      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
 925      */
 926     @ForceInline
 927     public final
 928     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
 929                                   byte e1,
 930                                   byte e2) {
 931         return lanewise(op, broadcast(e1), broadcast(e2));
 932     }
 933 
 934     /**
 935      * Combines the lane values of this vector
 936      * with the values of two broadcast scalars,
 937      * with selection of lane elements controlled by a mask.
 938      *
 939      * This is a masked lane-wise ternary operation which applies
 940      * the selected operation to each lane.
 941      * The return value will be equal to this expression:
 942      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
 943      *
 944      * @param op the operation used to combine lane values
 945      * @param e1 the first input scalar
 946      * @param e2 the second input scalar
 947      * @param m the mask controlling lane selection
 948      * @return the result of applying the operation lane-wise
 949      *         to the input vector and the scalars
 950      * @throws UnsupportedOperationException if this vector does
 951      *         not support the requested operation
 952      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
 953      * @see #lanewise(VectorOperators.Ternary,byte,byte)
 954      */
 955     @ForceInline
 956     public final
 957     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
 958                                   byte e1,
 959                                   byte e2,
 960                                   VectorMask<Byte> m) {
 961         return blend(lanewise(op, e1, e2), m);
 962     }
 963 
 964     /**
 965      * Combines the lane values of this vector
 966      * with the values of another vector and a broadcast scalar.
 967      *
 968      * This is a lane-wise ternary operation which applies
 969      * the selected operation to each lane.
 970      * The return value will be equal to this expression:
 971      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
 972      *
 973      * @param op the operation used to combine lane values
 974      * @param v1 the other input vector
 975      * @param e2 the input scalar
 976      * @return the result of applying the operation lane-wise
 977      *         to the input vectors and the scalar
 978      * @throws UnsupportedOperationException if this vector does
 979      *         not support the requested operation
 980      * @see #lanewise(VectorOperators.Ternary,byte,byte)
 981      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
 982      */
 983     @ForceInline
 984     public final
 985     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
 986                                   Vector<Byte> v1,
 987                                   byte e2) {
 988         return lanewise(op, v1, broadcast(e2));
 989     }
 990 
 991     /**
 992      * Combines the lane values of this vector
 993      * with the values of another vector and a broadcast scalar,
 994      * with selection of lane elements controlled by a mask.
 995      *
 996      * This is a masked lane-wise ternary operation which applies
 997      * the selected operation to each lane.
 998      * The return value will be equal to this expression:
 999      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1000      *
1001      * @param op the operation used to combine lane values
1002      * @param v1 the other input vector
1003      * @param e2 the input scalar
1004      * @param m the mask controlling lane selection
1005      * @return the result of applying the operation lane-wise
1006      *         to the input vectors and the scalar
1007      * @throws UnsupportedOperationException if this vector does
1008      *         not support the requested operation
1009      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1010      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1011      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
1012      */
1013     @ForceInline
1014     public final
1015     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1016                                   Vector<Byte> v1,
1017                                   byte e2,
1018                                   VectorMask<Byte> m) {
1019         return blend(lanewise(op, v1, e2), m);
1020     }
1021 
1022     /**
1023      * Combines the lane values of this vector
1024      * with the values of another vector and a broadcast scalar.
1025      *
1026      * This is a lane-wise ternary operation which applies
1027      * the selected operation to each lane.
1028      * The return value will be equal to this expression:
1029      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1030      *
1031      * @param op the operation used to combine lane values
1032      * @param e1 the input scalar
1033      * @param v2 the other input vector
1034      * @return the result of applying the operation lane-wise
1035      *         to the input vectors and the scalar
1036      * @throws UnsupportedOperationException if this vector does
1037      *         not support the requested operation
1038      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1039      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1040      */
1041     @ForceInline
1042     public final
1043     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1044                                   byte e1,
1045                                   Vector<Byte> v2) {
1046         return lanewise(op, broadcast(e1), v2);
1047     }
1048 
1049     /**
1050      * Combines the lane values of this vector
1051      * with the values of another vector and a broadcast scalar,
1052      * with selection of lane elements controlled by a mask.
1053      *
1054      * This is a masked lane-wise ternary operation which applies
1055      * the selected operation to each lane.
1056      * The return value will be equal to this expression:
1057      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1058      *
1059      * @param op the operation used to combine lane values
1060      * @param e1 the input scalar
1061      * @param v2 the other input vector
1062      * @param m the mask controlling lane selection
1063      * @return the result of applying the operation lane-wise
1064      *         to the input vectors and the scalar
1065      * @throws UnsupportedOperationException if this vector does
1066      *         not support the requested operation
1067      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1068      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
1069      */
1070     @ForceInline
1071     public final
1072     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1073                                   byte e1,
1074                                   Vector<Byte> v2,
1075                                   VectorMask<Byte> m) {
1076         return blend(lanewise(op, e1, v2), m);
1077     }
1078 
1079     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1080     // https://en.wikipedia.org/wiki/Ogdoad
1081 
1082     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1083     //
1084     // These include masked and non-masked versions.
1085     // This subclass adds broadcast (masked or not).
1086 
1087     /**
1088      * {@inheritDoc} <!--workaround-->
1089      * @see #add(byte)
1090      */
1091     @Override
1092     @ForceInline
1093     public final ByteVector add(Vector<Byte> v) {
1094         return lanewise(ADD, v);
1095     }
1096 
1097     /**
1098      * Adds this vector to the broadcast of an input scalar.
1099      *
1100      * This is a lane-wise binary operation which applies
1101      * the primitive addition operation ({@code +}) to each lane.
1102      *
1103      * This method is also equivalent to the expression
1104      * {@link #lanewise(VectorOperators.Binary,byte)
1105      *    lanewise}{@code (}{@link VectorOperators#ADD
1106      *    ADD}{@code , e)}.
1107      *
1108      * @param e the input scalar
1109      * @return the result of adding each lane of this vector to the scalar
1110      * @see #add(Vector)
1111      * @see #broadcast(byte)
1112      * @see #add(byte,VectorMask)
1113      * @see VectorOperators#ADD
1114      * @see #lanewise(VectorOperators.Binary,Vector)
1115      * @see #lanewise(VectorOperators.Binary,byte)
1116      */
1117     @ForceInline
1118     public final
1119     ByteVector add(byte e) {
1120         return lanewise(ADD, e);
1121     }
1122 
1123     /**
1124      * {@inheritDoc} <!--workaround-->
1125      * @see #add(byte,VectorMask)
1126      */
1127     @Override
1128     @ForceInline
1129     public final ByteVector add(Vector<Byte> v,
1130                                           VectorMask<Byte> m) {
1131         return lanewise(ADD, v, m);
1132     }
1133 
1134     /**
1135      * Adds this vector to the broadcast of an input scalar,
1136      * selecting lane elements controlled by a mask.
1137      *
1138      * This is a masked lane-wise binary operation which applies
1139      * the primitive addition operation ({@code +}) to each lane.
1140      *
1141      * This method is also equivalent to the expression
1142      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1143      *    lanewise}{@code (}{@link VectorOperators#ADD
1144      *    ADD}{@code , s, m)}.
1145      *
1146      * @param e the input scalar
1147      * @param m the mask controlling lane selection
1148      * @return the result of adding each lane of this vector to the scalar
1149      * @see #add(Vector,VectorMask)
1150      * @see #broadcast(byte)
1151      * @see #add(byte)
1152      * @see VectorOperators#ADD
1153      * @see #lanewise(VectorOperators.Binary,Vector)
1154      * @see #lanewise(VectorOperators.Binary,byte)
1155      */
1156     @ForceInline
1157     public final ByteVector add(byte e,
1158                                           VectorMask<Byte> m) {
1159         return lanewise(ADD, e, m);
1160     }
1161 
1162     /**
1163      * {@inheritDoc} <!--workaround-->
1164      * @see #sub(byte)
1165      */
1166     @Override
1167     @ForceInline
1168     public final ByteVector sub(Vector<Byte> v) {
1169         return lanewise(SUB, v);
1170     }
1171 
1172     /**
1173      * Subtracts an input scalar from this vector.
1174      *
1175      * This is a masked lane-wise binary operation which applies
1176      * the primitive subtraction operation ({@code -}) to each lane.
1177      *
1178      * This method is also equivalent to the expression
1179      * {@link #lanewise(VectorOperators.Binary,byte)
1180      *    lanewise}{@code (}{@link VectorOperators#SUB
1181      *    SUB}{@code , e)}.
1182      *
1183      * @param e the input scalar
1184      * @return the result of subtracting the scalar from each lane of this vector
1185      * @see #sub(Vector)
1186      * @see #broadcast(byte)
1187      * @see #sub(byte,VectorMask)
1188      * @see VectorOperators#SUB
1189      * @see #lanewise(VectorOperators.Binary,Vector)
1190      * @see #lanewise(VectorOperators.Binary,byte)
1191      */
1192     @ForceInline
1193     public final ByteVector sub(byte e) {
1194         return lanewise(SUB, e);
1195     }
1196 
1197     /**
1198      * {@inheritDoc} <!--workaround-->
1199      * @see #sub(byte,VectorMask)
1200      */
1201     @Override
1202     @ForceInline
1203     public final ByteVector sub(Vector<Byte> v,
1204                                           VectorMask<Byte> m) {
1205         return lanewise(SUB, v, m);
1206     }
1207 
1208     /**
1209      * Subtracts an input scalar from this vector
1210      * under the control of a mask.
1211      *
1212      * This is a masked lane-wise binary operation which applies
1213      * the primitive subtraction operation ({@code -}) to each lane.
1214      *
1215      * This method is also equivalent to the expression
1216      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1217      *    lanewise}{@code (}{@link VectorOperators#SUB
1218      *    SUB}{@code , s, m)}.
1219      *
1220      * @param e the input scalar
1221      * @param m the mask controlling lane selection
1222      * @return the result of subtracting the scalar from each lane of this vector
1223      * @see #sub(Vector,VectorMask)
1224      * @see #broadcast(byte)
1225      * @see #sub(byte)
1226      * @see VectorOperators#SUB
1227      * @see #lanewise(VectorOperators.Binary,Vector)
1228      * @see #lanewise(VectorOperators.Binary,byte)
1229      */
1230     @ForceInline
1231     public final ByteVector sub(byte e,
1232                                           VectorMask<Byte> m) {
1233         return lanewise(SUB, e, m);
1234     }
1235 
1236     /**
1237      * {@inheritDoc} <!--workaround-->
1238      * @see #mul(byte)
1239      */
1240     @Override
1241     @ForceInline
1242     public final ByteVector mul(Vector<Byte> v) {
1243         return lanewise(MUL, v);
1244     }
1245 
1246     /**
1247      * Multiplies this vector by the broadcast of an input scalar.
1248      *
1249      * This is a lane-wise binary operation which applies
1250      * the primitive multiplication operation ({@code *}) to each lane.
1251      *
1252      * This method is also equivalent to the expression
1253      * {@link #lanewise(VectorOperators.Binary,byte)
1254      *    lanewise}{@code (}{@link VectorOperators#MUL
1255      *    MUL}{@code , e)}.
1256      *
1257      * @param e the input scalar
1258      * @return the result of multiplying this vector by the given scalar
1259      * @see #mul(Vector)
1260      * @see #broadcast(byte)
1261      * @see #mul(byte,VectorMask)
1262      * @see VectorOperators#MUL
1263      * @see #lanewise(VectorOperators.Binary,Vector)
1264      * @see #lanewise(VectorOperators.Binary,byte)
1265      */
1266     @ForceInline
1267     public final ByteVector mul(byte e) {
1268         return lanewise(MUL, e);
1269     }
1270 
1271     /**
1272      * {@inheritDoc} <!--workaround-->
1273      * @see #mul(byte,VectorMask)
1274      */
1275     @Override
1276     @ForceInline
1277     public final ByteVector mul(Vector<Byte> v,
1278                                           VectorMask<Byte> m) {
1279         return lanewise(MUL, v, m);
1280     }
1281 
1282     /**
1283      * Multiplies this vector by the broadcast of an input scalar,
1284      * selecting lane elements controlled by a mask.
1285      *
1286      * This is a masked lane-wise binary operation which applies
1287      * the primitive multiplication operation ({@code *}) to each lane.
1288      *
1289      * This method is also equivalent to the expression
1290      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1291      *    lanewise}{@code (}{@link VectorOperators#MUL
1292      *    MUL}{@code , s, m)}.
1293      *
1294      * @param e the input scalar
1295      * @param m the mask controlling lane selection
1296      * @return the result of muling each lane of this vector to the scalar
1297      * @see #mul(Vector,VectorMask)
1298      * @see #broadcast(byte)
1299      * @see #mul(byte)
1300      * @see VectorOperators#MUL
1301      * @see #lanewise(VectorOperators.Binary,Vector)
1302      * @see #lanewise(VectorOperators.Binary,byte)
1303      */
1304     @ForceInline
1305     public final ByteVector mul(byte e,
1306                                           VectorMask<Byte> m) {
1307         return lanewise(MUL, e, m);
1308     }
1309 
1310     /**
1311      * {@inheritDoc} <!--workaround-->
1312      * @apiNote If there is a zero divisor, {@code
1313      * ArithmeticException} will be thrown.
1314      */
1315     @Override
1316     @ForceInline
1317     public final ByteVector div(Vector<Byte> v) {
1318         return lanewise(DIV, v);
1319     }
1320 
1321     /**
1322      * Divides this vector by the broadcast of an input scalar.
1323      *
1324      * This is a lane-wise binary operation which applies
1325      * the primitive division operation ({@code /}) to each lane.
1326      *
1327      * This method is also equivalent to the expression
1328      * {@link #lanewise(VectorOperators.Binary,byte)
1329      *    lanewise}{@code (}{@link VectorOperators#DIV
1330      *    DIV}{@code , e)}.
1331      *
1332      * @apiNote If there is a zero divisor, {@code
1333      * ArithmeticException} will be thrown.
1334      *
1335      * @param e the input scalar
1336      * @return the result of dividing each lane of this vector by the scalar
1337      * @see #div(Vector)
1338      * @see #broadcast(byte)
1339      * @see #div(byte,VectorMask)
1340      * @see VectorOperators#DIV
1341      * @see #lanewise(VectorOperators.Binary,Vector)
1342      * @see #lanewise(VectorOperators.Binary,byte)
1343      */
1344     @ForceInline
1345     public final ByteVector div(byte e) {
1346         return lanewise(DIV, e);
1347     }
1348 
1349     /**
1350      * {@inheritDoc} <!--workaround-->
1351      * @see #div(byte,VectorMask)
1352      * @apiNote If there is a zero divisor, {@code
1353      * ArithmeticException} will be thrown.
1354      */
1355     @Override
1356     @ForceInline
1357     public final ByteVector div(Vector<Byte> v,
1358                                           VectorMask<Byte> m) {
1359         return lanewise(DIV, v, m);
1360     }
1361 
1362     /**
1363      * Divides this vector by the broadcast of an input scalar,
1364      * selecting lane elements controlled by a mask.
1365      *
1366      * This is a masked lane-wise binary operation which applies
1367      * the primitive division operation ({@code /}) to each lane.
1368      *
1369      * This method is also equivalent to the expression
1370      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1371      *    lanewise}{@code (}{@link VectorOperators#DIV
1372      *    DIV}{@code , s, m)}.
1373      *
1374      * @apiNote If there is a zero divisor, {@code
1375      * ArithmeticException} will be thrown.
1376      *
1377      * @param e the input scalar
1378      * @param m the mask controlling lane selection
1379      * @return the result of dividing each lane of this vector by the scalar
1380      * @see #div(Vector,VectorMask)
1381      * @see #broadcast(byte)
1382      * @see #div(byte)
1383      * @see VectorOperators#DIV
1384      * @see #lanewise(VectorOperators.Binary,Vector)
1385      * @see #lanewise(VectorOperators.Binary,byte)
1386      */
1387     @ForceInline
1388     public final ByteVector div(byte e,
1389                                           VectorMask<Byte> m) {
1390         return lanewise(DIV, e, m);
1391     }
1392 
1393     /// END OF FULL-SERVICE BINARY METHODS
1394 
1395     /// SECOND-TIER BINARY METHODS
1396     //
1397     // There are no masked versions.
1398 
1399     /**
1400      * {@inheritDoc} <!--workaround-->
1401      */
1402     @Override
1403     @ForceInline
1404     public final ByteVector min(Vector<Byte> v) {
1405         return lanewise(MIN, v);
1406     }
1407 
1408     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1409     /**
1410      * Computes the smaller of this vector and the broadcast of an input scalar.
1411      *
1412      * This is a lane-wise binary operation which applies the
1413      * operation {@code Math.min()} to each pair of
1414      * corresponding lane values.
1415      *
1416      * This method is also equivalent to the expression
1417      * {@link #lanewise(VectorOperators.Binary,byte)
1418      *    lanewise}{@code (}{@link VectorOperators#MIN
1419      *    MIN}{@code , e)}.
1420      *
1421      * @param e the input scalar
1422      * @return the result of multiplying this vector by the given scalar
1423      * @see #min(Vector)
1424      * @see #broadcast(byte)
1425      * @see VectorOperators#MIN
1426      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1427      */
1428     @ForceInline
1429     public final ByteVector min(byte e) {
1430         return lanewise(MIN, e);
1431     }
1432 
1433     /**
1434      * {@inheritDoc} <!--workaround-->
1435      */
1436     @Override
1437     @ForceInline
1438     public final ByteVector max(Vector<Byte> v) {
1439         return lanewise(MAX, v);
1440     }
1441 
1442     /**
1443      * Computes the larger of this vector and the broadcast of an input scalar.
1444      *
1445      * This is a lane-wise binary operation which applies the
1446      * operation {@code Math.max()} to each pair of
1447      * corresponding lane values.
1448      *
1449      * This method is also equivalent to the expression
1450      * {@link #lanewise(VectorOperators.Binary,byte)
1451      *    lanewise}{@code (}{@link VectorOperators#MAX
1452      *    MAX}{@code , e)}.
1453      *
1454      * @param e the input scalar
1455      * @return the result of multiplying this vector by the given scalar
1456      * @see #max(Vector)
1457      * @see #broadcast(byte)
1458      * @see VectorOperators#MAX
1459      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1460      */
1461     @ForceInline
1462     public final ByteVector max(byte e) {
1463         return lanewise(MAX, e);
1464     }
1465 
1466     // common bitwise operators: and, or, not (with scalar versions)
1467     /**
1468      * Computes the bitwise logical conjunction ({@code &})
1469      * of this vector and a second input vector.
1470      *
1471      * This is a lane-wise binary operation which applies the
1472      * the primitive bitwise "and" operation ({@code &})
1473      * to each pair of corresponding lane values.
1474      *
1475      * This method is also equivalent to the expression
1476      * {@link #lanewise(VectorOperators.Binary,Vector)
1477      *    lanewise}{@code (}{@link VectorOperators#AND
1478      *    AND}{@code , v)}.
1479      *
1480      * <p>
1481      * This is not a full-service named operation like
1482      * {@link #add(Vector) add}.  A masked version of
1483      * this operation is not directly available
1484      * but may be obtained via the masked version of
1485      * {@code lanewise}.
1486      *
1487      * @param v a second input vector
1488      * @return the bitwise {@code &} of this vector and the second input vector
1489      * @see #and(byte)
1490      * @see #or(Vector)
1491      * @see #not()
1492      * @see VectorOperators#AND
1493      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1494      */
1495     @ForceInline
1496     public final ByteVector and(Vector<Byte> v) {
1497         return lanewise(AND, v);
1498     }
1499 
1500     /**
1501      * Computes the bitwise logical conjunction ({@code &})
1502      * of this vector and a scalar.
1503      *
1504      * This is a lane-wise binary operation which applies the
1505      * the primitive bitwise "and" operation ({@code &})
1506      * to each pair of corresponding lane values.
1507      *
1508      * This method is also equivalent to the expression
1509      * {@link #lanewise(VectorOperators.Binary,Vector)
1510      *    lanewise}{@code (}{@link VectorOperators#AND
1511      *    AND}{@code , e)}.
1512      *
1513      * @param e an input scalar
1514      * @return the bitwise {@code &} of this vector and scalar
1515      * @see #and(Vector)
1516      * @see VectorOperators#AND
1517      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1518      */
1519     @ForceInline
1520     public final ByteVector and(byte e) {
1521         return lanewise(AND, e);
1522     }
1523 
1524     /**
1525      * Computes the bitwise logical disjunction ({@code |})
1526      * of this vector and a second input vector.
1527      *
1528      * This is a lane-wise binary operation which applies the
1529      * the primitive bitwise "or" operation ({@code |})
1530      * to each pair of corresponding lane values.
1531      *
1532      * This method is also equivalent to the expression
1533      * {@link #lanewise(VectorOperators.Binary,Vector)
1534      *    lanewise}{@code (}{@link VectorOperators#OR
1535      *    AND}{@code , v)}.
1536      *
1537      * <p>
1538      * This is not a full-service named operation like
1539      * {@link #add(Vector) add}.  A masked version of
1540      * this operation is not directly available
1541      * but may be obtained via the masked version of
1542      * {@code lanewise}.
1543      *
1544      * @param v a second input vector
1545      * @return the bitwise {@code |} of this vector and the second input vector
1546      * @see #or(byte)
1547      * @see #and(Vector)
1548      * @see #not()
1549      * @see VectorOperators#OR
1550      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1551      */
1552     @ForceInline
1553     public final ByteVector or(Vector<Byte> v) {
1554         return lanewise(OR, v);
1555     }
1556 
1557     /**
1558      * Computes the bitwise logical disjunction ({@code |})
1559      * of this vector and a scalar.
1560      *
1561      * This is a lane-wise binary operation which applies the
1562      * the primitive bitwise "or" operation ({@code |})
1563      * to each pair of corresponding lane values.
1564      *
1565      * This method is also equivalent to the expression
1566      * {@link #lanewise(VectorOperators.Binary,Vector)
1567      *    lanewise}{@code (}{@link VectorOperators#OR
1568      *    OR}{@code , e)}.
1569      *
1570      * @param e an input scalar
1571      * @return the bitwise {@code |} of this vector and scalar
1572      * @see #or(Vector)
1573      * @see VectorOperators#OR
1574      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1575      */
1576     @ForceInline
1577     public final ByteVector or(byte e) {
1578         return lanewise(OR, e);
1579     }
1580 
1581 
1582 
1583     /// UNARY METHODS
1584 
1585     /**
1586      * {@inheritDoc} <!--workaround-->
1587      */
1588     @Override
1589     @ForceInline
1590     public final
1591     ByteVector neg() {
1592         return lanewise(NEG);
1593     }
1594 
1595     /**
1596      * {@inheritDoc} <!--workaround-->
1597      */
1598     @Override
1599     @ForceInline
1600     public final
1601     ByteVector abs() {
1602         return lanewise(ABS);
1603     }
1604 
1605     // not (~)
1606     /**
1607      * Computes the bitwise logical complement ({@code ~})
1608      * of this vector.
1609      *
1610      * This is a lane-wise binary operation which applies the
1611      * the primitive bitwise "not" operation ({@code ~})
1612      * to each lane value.
1613      *
1614      * This method is also equivalent to the expression
1615      * {@link #lanewise(VectorOperators.Unary)
1616      *    lanewise}{@code (}{@link VectorOperators#NOT
1617      *    NOT}{@code )}.
1618      *
1619      * <p>
1620      * This is not a full-service named operation like
1621      * {@link #add(Vector) add}.  A masked version of
1622      * this operation is not directly available
1623      * but may be obtained via the masked version of
1624      * {@code lanewise}.
1625      *
1626      * @return the bitwise complement {@code ~} of this vector
1627      * @see #and(Vector)
1628      * @see VectorOperators#NOT
1629      * @see #lanewise(VectorOperators.Unary,VectorMask)
1630      */
1631     @ForceInline
1632     public final ByteVector not() {
1633         return lanewise(NOT);
1634     }
1635 
1636 
1637     /// COMPARISONS
1638 
1639     /**
1640      * {@inheritDoc} <!--workaround-->
1641      */
1642     @Override
1643     @ForceInline
1644     public final
1645     VectorMask<Byte> eq(Vector<Byte> v) {
1646         return compare(EQ, v);
1647     }
1648 
1649     /**
1650      * Tests if this vector is equal to an input scalar.
1651      *
1652      * This is a lane-wise binary test operation which applies
1653      * the primitive equals operation ({@code ==}) to each lane.
1654      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1655      *
1656      * @param e the input scalar
1657      * @return the result mask of testing if this vector
1658      *         is equal to {@code e}
1659      * @see #compare(VectorOperators.Comparison,byte)
1660      */
1661     @ForceInline
1662     public final
1663     VectorMask<Byte> eq(byte e) {
1664         return compare(EQ, e);
1665     }
1666 
1667     /**
1668      * {@inheritDoc} <!--workaround-->
1669      */
1670     @Override
1671     @ForceInline
1672     public final
1673     VectorMask<Byte> lt(Vector<Byte> v) {
1674         return compare(LT, v);
1675     }
1676 
1677     /**
1678      * Tests if this vector is less than an input scalar.
1679      *
1680      * This is a lane-wise binary test operation which applies
1681      * the primitive less than operation ({@code <}) to each lane.
1682      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1683      *
1684      * @param e the input scalar
1685      * @return the mask result of testing if this vector
1686      *         is less than the input scalar
1687      * @see #compare(VectorOperators.Comparison,byte)
1688      */
1689     @ForceInline
1690     public final
1691     VectorMask<Byte> lt(byte e) {
1692         return compare(LT, e);
1693     }
1694 
1695     /**
1696      * {@inheritDoc} <!--workaround-->
1697      */
1698     @Override
1699     public abstract
1700     VectorMask<Byte> test(VectorOperators.Test op);
1701 
1702     /*package-private*/
1703     @ForceInline
1704     final
1705     <M extends VectorMask<Byte>>
1706     M testTemplate(Class<M> maskType, Test op) {
1707         ByteSpecies vsp = vspecies();
1708         if (opKind(op, VO_SPECIAL)) {
1709             ByteVector bits = this.viewAsIntegralLanes();
1710             VectorMask<Byte> m;
1711             if (op == IS_DEFAULT) {
1712                 m = bits.compare(EQ, (byte) 0);
1713             } else if (op == IS_NEGATIVE) {
1714                 m = bits.compare(LT, (byte) 0);
1715             }
1716             else {
1717                 throw new AssertionError(op);
1718             }
1719             return maskType.cast(m);
1720         }
1721         int opc = opCode(op);
1722         throw new AssertionError(op);
1723     }
1724 
1725     /**
1726      * {@inheritDoc} <!--workaround-->
1727      */
1728     @Override
1729     @ForceInline
1730     public final
1731     VectorMask<Byte> test(VectorOperators.Test op,
1732                                   VectorMask<Byte> m) {
1733         return test(op).and(m);
1734     }
1735 
1736     /**
1737      * {@inheritDoc} <!--workaround-->
1738      */
1739     @Override
1740     public abstract
1741     VectorMask<Byte> compare(VectorOperators.Comparison op, Vector<Byte> v);
1742 
1743     /*package-private*/
1744     @ForceInline
1745     final
1746     <M extends VectorMask<Byte>>
1747     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v) {
1748         Objects.requireNonNull(v);
1749         ByteSpecies vsp = vspecies();
1750         ByteVector that = (ByteVector) v;
1751         that.check(this);
1752         int opc = opCode(op);
1753         return VectorSupport.compare(
1754             opc, getClass(), maskType, byte.class, length(),
1755             this, that,
1756             (cond, v0, v1) -> {
1757                 AbstractMask<Byte> m
1758                     = v0.bTest(cond, v1, (cond_, i, a, b)
1759                                -> compareWithOp(cond, a, b));
1760                 @SuppressWarnings("unchecked")
1761                 M m2 = (M) m;
1762                 return m2;
1763             });
1764     }
1765 
1766     @ForceInline
1767     private static boolean compareWithOp(int cond, byte a, byte b) {
1768         return switch (cond) {
1769             case BT_eq -> a == b;
1770             case BT_ne -> a != b;
1771             case BT_lt -> a < b;
1772             case BT_le -> a <= b;
1773             case BT_gt -> a > b;
1774             case BT_ge -> a >= b;
1775             case BT_ult -> Byte.compareUnsigned(a, b) < 0;
1776             case BT_ule -> Byte.compareUnsigned(a, b) <= 0;
1777             case BT_ugt -> Byte.compareUnsigned(a, b) > 0;
1778             case BT_uge -> Byte.compareUnsigned(a, b) >= 0;
1779             default -> throw new AssertionError();
1780         };
1781     }
1782 
1783     /**
1784      * {@inheritDoc} <!--workaround-->
1785      */
1786     @Override
1787     @ForceInline
1788     public final
1789     VectorMask<Byte> compare(VectorOperators.Comparison op,
1790                                   Vector<Byte> v,
1791                                   VectorMask<Byte> m) {
1792         return compare(op, v).and(m);
1793     }
1794 
1795     /**
1796      * Tests this vector by comparing it with an input scalar,
1797      * according to the given comparison operation.
1798      *
1799      * This is a lane-wise binary test operation which applies
1800      * the comparison operation to each lane.
1801      * <p>
1802      * The result is the same as
1803      * {@code compare(op, broadcast(species(), e))}.
1804      * That is, the scalar may be regarded as broadcast to
1805      * a vector of the same species, and then compared
1806      * against the original vector, using the selected
1807      * comparison operation.
1808      *
1809      * @param op the operation used to compare lane values
1810      * @param e the input scalar
1811      * @return the mask result of testing lane-wise if this vector
1812      *         compares to the input, according to the selected
1813      *         comparison operator
1814      * @see ByteVector#compare(VectorOperators.Comparison,Vector)
1815      * @see #eq(byte)
1816      * @see #lt(byte)
1817      */
1818     public abstract
1819     VectorMask<Byte> compare(Comparison op, byte e);
1820 
1821     /*package-private*/
1822     @ForceInline
1823     final
1824     <M extends VectorMask<Byte>>
1825     M compareTemplate(Class<M> maskType, Comparison op, byte e) {
1826         return compareTemplate(maskType, op, broadcast(e));
1827     }
1828 
1829     /**
1830      * Tests this vector by comparing it with an input scalar,
1831      * according to the given comparison operation,
1832      * in lanes selected by a mask.
1833      *
1834      * This is a masked lane-wise binary test operation which applies
1835      * to each pair of corresponding lane values.
1836      *
1837      * The returned result is equal to the expression
1838      * {@code compare(op,s).and(m)}.
1839      *
1840      * @param op the operation used to compare lane values
1841      * @param e the input scalar
1842      * @param m the mask controlling lane selection
1843      * @return the mask result of testing lane-wise if this vector
1844      *         compares to the input, according to the selected
1845      *         comparison operator,
1846      *         and only in the lanes selected by the mask
1847      * @see ByteVector#compare(VectorOperators.Comparison,Vector,VectorMask)
1848      */
1849     @ForceInline
1850     public final VectorMask<Byte> compare(VectorOperators.Comparison op,
1851                                                byte e,
1852                                                VectorMask<Byte> m) {
1853         return compare(op, e).and(m);
1854     }
1855 
1856     /**
1857      * {@inheritDoc} <!--workaround-->
1858      */
1859     @Override
1860     public abstract
1861     VectorMask<Byte> compare(Comparison op, long e);
1862 
1863     /*package-private*/
1864     @ForceInline
1865     final
1866     <M extends VectorMask<Byte>>
1867     M compareTemplate(Class<M> maskType, Comparison op, long e) {
1868         return compareTemplate(maskType, op, broadcast(e));
1869     }
1870 
1871     /**
1872      * {@inheritDoc} <!--workaround-->
1873      */
1874     @Override
1875     @ForceInline
1876     public final
1877     VectorMask<Byte> compare(Comparison op, long e, VectorMask<Byte> m) {
1878         return compare(op, broadcast(e), m);
1879     }
1880 
1881 
1882 
1883     /**
1884      * {@inheritDoc} <!--workaround-->
1885      */
1886     @Override public abstract
1887     ByteVector blend(Vector<Byte> v, VectorMask<Byte> m);
1888 
1889     /*package-private*/
1890     @ForceInline
1891     final
1892     <M extends VectorMask<Byte>>
1893     ByteVector
1894     blendTemplate(Class<M> maskType, ByteVector v, M m) {
1895         v.check(this);
1896         return VectorSupport.blend(
1897             getClass(), maskType, byte.class, length(),
1898             this, v, m,
1899             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
1900     }
1901 
1902     /**
1903      * {@inheritDoc} <!--workaround-->
1904      */
1905     @Override public abstract ByteVector addIndex(int scale);
1906 
1907     /*package-private*/
1908     @ForceInline
1909     final ByteVector addIndexTemplate(int scale) {
1910         ByteSpecies vsp = vspecies();
1911         // make sure VLENGTH*scale doesn't overflow:
1912         vsp.checkScale(scale);
1913         return VectorSupport.indexVector(
1914             getClass(), byte.class, length(),
1915             this, scale, vsp,
1916             (v, scale_, s)
1917             -> {
1918                 // If the platform doesn't support an INDEX
1919                 // instruction directly, load IOTA from memory
1920                 // and multiply.
1921                 ByteVector iota = s.iota();
1922                 byte sc = (byte) scale_;
1923                 return v.add(sc == 1 ? iota : iota.mul(sc));
1924             });
1925     }
1926 
1927     /**
1928      * Replaces selected lanes of this vector with
1929      * a scalar value
1930      * under the control of a mask.
1931      *
1932      * This is a masked lane-wise binary operation which
1933      * selects each lane value from one or the other input.
1934      *
1935      * The returned result is equal to the expression
1936      * {@code blend(broadcast(e),m)}.
1937      *
1938      * @param e the input scalar, containing the replacement lane value
1939      * @param m the mask controlling lane selection of the scalar
1940      * @return the result of blending the lane elements of this vector with
1941      *         the scalar value
1942      */
1943     @ForceInline
1944     public final ByteVector blend(byte e,
1945                                             VectorMask<Byte> m) {
1946         return blend(broadcast(e), m);
1947     }
1948 
1949     /**
1950      * Replaces selected lanes of this vector with
1951      * a scalar value
1952      * under the control of a mask.
1953      *
1954      * This is a masked lane-wise binary operation which
1955      * selects each lane value from one or the other input.
1956      *
1957      * The returned result is equal to the expression
1958      * {@code blend(broadcast(e),m)}.
1959      *
1960      * @param e the input scalar, containing the replacement lane value
1961      * @param m the mask controlling lane selection of the scalar
1962      * @return the result of blending the lane elements of this vector with
1963      *         the scalar value
1964      */
1965     @ForceInline
1966     public final ByteVector blend(long e,
1967                                             VectorMask<Byte> m) {
1968         return blend(broadcast(e), m);
1969     }
1970 
1971     /**
1972      * {@inheritDoc} <!--workaround-->
1973      */
1974     @Override
1975     public abstract
1976     ByteVector slice(int origin, Vector<Byte> v1);
1977 
1978     /*package-private*/
1979     final
1980     @ForceInline
1981     ByteVector sliceTemplate(int origin, Vector<Byte> v1) {
1982         ByteVector that = (ByteVector) v1;
1983         that.check(this);
1984         Objects.checkIndex(origin, length() + 1);
1985         VectorShuffle<Byte> iota = iotaShuffle();
1986         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
1987         iota = iotaShuffle(origin, 1, true);
1988         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
1989     }
1990 
1991     /**
1992      * {@inheritDoc} <!--workaround-->
1993      */
1994     @Override
1995     @ForceInline
1996     public final
1997     ByteVector slice(int origin,
1998                                Vector<Byte> w,
1999                                VectorMask<Byte> m) {
2000         return broadcast(0).blend(slice(origin, w), m);
2001     }
2002 
2003     /**
2004      * {@inheritDoc} <!--workaround-->
2005      */
2006     @Override
2007     public abstract
2008     ByteVector slice(int origin);
2009 
2010     /*package-private*/
2011     final
2012     @ForceInline
2013     ByteVector sliceTemplate(int origin) {
2014         Objects.checkIndex(origin, length() + 1);
2015         VectorShuffle<Byte> iota = iotaShuffle();
2016         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
2017         iota = iotaShuffle(origin, 1, true);
2018         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2019     }
2020 
2021     /**
2022      * {@inheritDoc} <!--workaround-->
2023      */
2024     @Override
2025     public abstract
2026     ByteVector unslice(int origin, Vector<Byte> w, int part);
2027 
2028     /*package-private*/
2029     final
2030     @ForceInline
2031     ByteVector
2032     unsliceTemplate(int origin, Vector<Byte> w, int part) {
2033         ByteVector that = (ByteVector) w;
2034         that.check(this);
2035         Objects.checkIndex(origin, length() + 1);
2036         VectorShuffle<Byte> iota = iotaShuffle();
2037         VectorMask<Byte> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
2038                                                                   (broadcast((byte)(origin))));
2039         iota = iotaShuffle(-origin, 1, true);
2040         return that.blend(this.rearrange(iota), blendMask);
2041     }
2042 
2043     /*package-private*/
2044     final
2045     @ForceInline
2046     <M extends VectorMask<Byte>>
2047     ByteVector
2048     unsliceTemplate(Class<M> maskType, int origin, Vector<Byte> w, int part, M m) {
2049         ByteVector that = (ByteVector) w;
2050         that.check(this);
2051         ByteVector slice = that.sliceTemplate(origin, that);
2052         slice = slice.blendTemplate(maskType, this, m);
2053         return slice.unsliceTemplate(origin, w, part);
2054     }
2055 
2056     /**
2057      * {@inheritDoc} <!--workaround-->
2058      */
2059     @Override
2060     public abstract
2061     ByteVector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m);
2062 
2063     /**
2064      * {@inheritDoc} <!--workaround-->
2065      */
2066     @Override
2067     public abstract
2068     ByteVector unslice(int origin);
2069 
2070     /*package-private*/
2071     final
2072     @ForceInline
2073     ByteVector
2074     unsliceTemplate(int origin) {
2075         Objects.checkIndex(origin, length() + 1);
2076         VectorShuffle<Byte> iota = iotaShuffle();
2077         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.GE,
2078                                                                   (broadcast((byte)(origin))));
2079         iota = iotaShuffle(-origin, 1, true);
2080         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2081     }
2082 
2083     private ArrayIndexOutOfBoundsException
2084     wrongPartForSlice(int part) {
2085         String msg = String.format("bad part number %d for slice operation",
2086                                    part);
2087         return new ArrayIndexOutOfBoundsException(msg);
2088     }
2089 
2090     /**
2091      * {@inheritDoc} <!--workaround-->
2092      */
2093     @Override
2094     public abstract
2095     ByteVector rearrange(VectorShuffle<Byte> m);
2096 
2097     /*package-private*/
2098     @ForceInline
2099     final
2100     <S extends VectorShuffle<Byte>>
2101     ByteVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2102         shuffle.checkIndexes();
2103         return VectorSupport.rearrangeOp(
2104             getClass(), shuffletype, byte.class, length(),
2105             this, shuffle,
2106             (v1, s_) -> v1.uOp((i, a) -> {
2107                 int ei = s_.laneSource(i);
2108                 return v1.lane(ei);
2109             }));
2110     }
2111 
2112     /**
2113      * {@inheritDoc} <!--workaround-->
2114      */
2115     @Override
2116     public abstract
2117     ByteVector rearrange(VectorShuffle<Byte> s,
2118                                    VectorMask<Byte> m);
2119 
2120     /*package-private*/
2121     @ForceInline
2122     final
2123     <S extends VectorShuffle<Byte>>
2124     ByteVector rearrangeTemplate(Class<S> shuffletype,
2125                                            S shuffle,
2126                                            VectorMask<Byte> m) {
2127         ByteVector unmasked =
2128             VectorSupport.rearrangeOp(
2129                 getClass(), shuffletype, byte.class, length(),
2130                 this, shuffle,
2131                 (v1, s_) -> v1.uOp((i, a) -> {
2132                     int ei = s_.laneSource(i);
2133                     return ei < 0 ? 0 : v1.lane(ei);
2134                 }));
2135         VectorMask<Byte> valid = shuffle.laneIsValid();
2136         if (m.andNot(valid).anyTrue()) {
2137             shuffle.checkIndexes();
2138             throw new AssertionError();
2139         }
2140         return broadcast((byte)0).blend(unmasked, m);
2141     }
2142 
2143     /**
2144      * {@inheritDoc} <!--workaround-->
2145      */
2146     @Override
2147     public abstract
2148     ByteVector rearrange(VectorShuffle<Byte> s,
2149                                    Vector<Byte> v);
2150 
2151     /*package-private*/
2152     @ForceInline
2153     final
2154     <S extends VectorShuffle<Byte>>
2155     ByteVector rearrangeTemplate(Class<S> shuffletype,
2156                                            S shuffle,
2157                                            ByteVector v) {
2158         VectorMask<Byte> valid = shuffle.laneIsValid();
2159         @SuppressWarnings("unchecked")
2160         S ws = (S) shuffle.wrapIndexes();
2161         ByteVector r0 =
2162             VectorSupport.rearrangeOp(
2163                 getClass(), shuffletype, byte.class, length(),
2164                 this, ws,
2165                 (v0, s_) -> v0.uOp((i, a) -> {
2166                     int ei = s_.laneSource(i);
2167                     return v0.lane(ei);
2168                 }));
2169         ByteVector r1 =
2170             VectorSupport.rearrangeOp(
2171                 getClass(), shuffletype, byte.class, length(),
2172                 v, ws,
2173                 (v1, s_) -> v1.uOp((i, a) -> {
2174                     int ei = s_.laneSource(i);
2175                     return v1.lane(ei);
2176                 }));
2177         return r1.blend(r0, valid);
2178     }
2179 
2180     @ForceInline
2181     private final
2182     VectorShuffle<Byte> toShuffle0(ByteSpecies dsp) {
2183         byte[] a = toArray();
2184         int[] sa = new int[a.length];
2185         for (int i = 0; i < a.length; i++) {
2186             sa[i] = (int) a[i];
2187         }
2188         return VectorShuffle.fromArray(dsp, sa, 0);
2189     }
2190 
2191     /*package-private*/
2192     @ForceInline
2193     final
2194     VectorShuffle<Byte> toShuffleTemplate(Class<?> shuffleType) {
2195         ByteSpecies vsp = vspecies();
2196         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2197                                      getClass(), byte.class, length(),
2198                                      shuffleType, byte.class, length(),
2199                                      this, vsp,
2200                                      ByteVector::toShuffle0);
2201     }
2202 
2203     /**
2204      * {@inheritDoc} <!--workaround-->
2205      */
2206     @Override
2207     public abstract
2208     ByteVector selectFrom(Vector<Byte> v);
2209 
2210     /*package-private*/
2211     @ForceInline
2212     final ByteVector selectFromTemplate(ByteVector v) {
2213         return v.rearrange(this.toShuffle());
2214     }
2215 
2216     /**
2217      * {@inheritDoc} <!--workaround-->
2218      */
2219     @Override
2220     public abstract
2221     ByteVector selectFrom(Vector<Byte> s, VectorMask<Byte> m);
2222 
2223     /*package-private*/
2224     @ForceInline
2225     final ByteVector selectFromTemplate(ByteVector v,
2226                                                   AbstractMask<Byte> m) {
2227         return v.rearrange(this.toShuffle(), m);
2228     }
2229 
2230     /// Ternary operations
2231 
2232     /**
2233      * Blends together the bits of two vectors under
2234      * the control of a third, which supplies mask bits.
2235      *
2236      * This is a lane-wise ternary operation which performs
2237      * a bitwise blending operation {@code (a&~c)|(b&c)}
2238      * to each lane.
2239      *
2240      * This method is also equivalent to the expression
2241      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2242      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2243      *    BITWISE_BLEND}{@code , bits, mask)}.
2244      *
2245      * @param bits input bits to blend into the current vector
2246      * @param mask a bitwise mask to enable blending of the input bits
2247      * @return the bitwise blend of the given bits into the current vector,
2248      *         under control of the bitwise mask
2249      * @see #bitwiseBlend(byte,byte)
2250      * @see #bitwiseBlend(byte,Vector)
2251      * @see #bitwiseBlend(Vector,byte)
2252      * @see VectorOperators#BITWISE_BLEND
2253      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2254      */
2255     @ForceInline
2256     public final
2257     ByteVector bitwiseBlend(Vector<Byte> bits, Vector<Byte> mask) {
2258         return lanewise(BITWISE_BLEND, bits, mask);
2259     }
2260 
2261     /**
2262      * Blends together the bits of a vector and a scalar under
2263      * the control of another scalar, which supplies mask bits.
2264      *
2265      * This is a lane-wise ternary operation which performs
2266      * a bitwise blending operation {@code (a&~c)|(b&c)}
2267      * to each lane.
2268      *
2269      * This method is also equivalent to the expression
2270      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2271      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2272      *    BITWISE_BLEND}{@code , bits, mask)}.
2273      *
2274      * @param bits input bits to blend into the current vector
2275      * @param mask a bitwise mask to enable blending of the input bits
2276      * @return the bitwise blend of the given bits into the current vector,
2277      *         under control of the bitwise mask
2278      * @see #bitwiseBlend(Vector,Vector)
2279      * @see VectorOperators#BITWISE_BLEND
2280      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
2281      */
2282     @ForceInline
2283     public final
2284     ByteVector bitwiseBlend(byte bits, byte mask) {
2285         return lanewise(BITWISE_BLEND, bits, mask);
2286     }
2287 
2288     /**
2289      * Blends together the bits of a vector and a scalar under
2290      * the control of another vector, which supplies mask bits.
2291      *
2292      * This is a lane-wise ternary operation which performs
2293      * a bitwise blending operation {@code (a&~c)|(b&c)}
2294      * to each lane.
2295      *
2296      * This method is also equivalent to the expression
2297      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2298      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2299      *    BITWISE_BLEND}{@code , bits, mask)}.
2300      *
2301      * @param bits input bits to blend into the current vector
2302      * @param mask a bitwise mask to enable blending of the input bits
2303      * @return the bitwise blend of the given bits into the current vector,
2304      *         under control of the bitwise mask
2305      * @see #bitwiseBlend(Vector,Vector)
2306      * @see VectorOperators#BITWISE_BLEND
2307      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
2308      */
2309     @ForceInline
2310     public final
2311     ByteVector bitwiseBlend(byte bits, Vector<Byte> mask) {
2312         return lanewise(BITWISE_BLEND, bits, mask);
2313     }
2314 
2315     /**
2316      * Blends together the bits of two vectors under
2317      * the control of a scalar, which supplies mask bits.
2318      *
2319      * This is a lane-wise ternary operation which performs
2320      * a bitwise blending operation {@code (a&~c)|(b&c)}
2321      * to each lane.
2322      *
2323      * This method is also equivalent to the expression
2324      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2325      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2326      *    BITWISE_BLEND}{@code , bits, mask)}.
2327      *
2328      * @param bits input bits to blend into the current vector
2329      * @param mask a bitwise mask to enable blending of the input bits
2330      * @return the bitwise blend of the given bits into the current vector,
2331      *         under control of the bitwise mask
2332      * @see #bitwiseBlend(Vector,Vector)
2333      * @see VectorOperators#BITWISE_BLEND
2334      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
2335      */
2336     @ForceInline
2337     public final
2338     ByteVector bitwiseBlend(Vector<Byte> bits, byte mask) {
2339         return lanewise(BITWISE_BLEND, bits, mask);
2340     }
2341 
2342 
2343     // Type specific horizontal reductions
2344 
2345     /**
2346      * Returns a value accumulated from all the lanes of this vector.
2347      *
2348      * This is an associative cross-lane reduction operation which
2349      * applies the specified operation to all the lane elements.
2350      * <p>
2351      * A few reduction operations do not support arbitrary reordering
2352      * of their operands, yet are included here because of their
2353      * usefulness.
2354      * <ul>
2355      * <li>
2356      * In the case of {@code FIRST_NONZERO}, the reduction returns
2357      * the value from the lowest-numbered non-zero lane.
2358      * <li>
2359      * All other reduction operations are fully commutative and
2360      * associative.  The implementation can choose any order of
2361      * processing, yet it will always produce the same result.
2362      * </ul>
2363      *
2364      * @param op the operation used to combine lane values
2365      * @return the accumulated result
2366      * @throws UnsupportedOperationException if this vector does
2367      *         not support the requested operation
2368      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2369      * @see #add(Vector)
2370      * @see #mul(Vector)
2371      * @see #min(Vector)
2372      * @see #max(Vector)
2373      * @see #and(Vector)
2374      * @see #or(Vector)
2375      * @see VectorOperators#XOR
2376      * @see VectorOperators#FIRST_NONZERO
2377      */
2378     public abstract byte reduceLanes(VectorOperators.Associative op);
2379 
2380     /**
2381      * Returns a value accumulated from selected lanes of this vector,
2382      * controlled by a mask.
2383      *
2384      * This is an associative cross-lane reduction operation which
2385      * applies the specified operation to the selected lane elements.
2386      * <p>
2387      * If no elements are selected, an operation-specific identity
2388      * value is returned.
2389      * <ul>
2390      * <li>
2391      * If the operation is
2392      *  {@code ADD}, {@code XOR}, {@code OR},
2393      * or {@code FIRST_NONZERO},
2394      * then the identity value is zero, the default {@code byte} value.
2395      * <li>
2396      * If the operation is {@code MUL},
2397      * then the identity value is one.
2398      * <li>
2399      * If the operation is {@code AND},
2400      * then the identity value is minus one (all bits set).
2401      * <li>
2402      * If the operation is {@code MAX},
2403      * then the identity value is {@code Byte.MIN_VALUE}.
2404      * <li>
2405      * If the operation is {@code MIN},
2406      * then the identity value is {@code Byte.MAX_VALUE}.
2407      * </ul>
2408      * <p>
2409      * A few reduction operations do not support arbitrary reordering
2410      * of their operands, yet are included here because of their
2411      * usefulness.
2412      * <ul>
2413      * <li>
2414      * In the case of {@code FIRST_NONZERO}, the reduction returns
2415      * the value from the lowest-numbered non-zero lane.
2416      * <li>
2417      * All other reduction operations are fully commutative and
2418      * associative.  The implementation can choose any order of
2419      * processing, yet it will always produce the same result.
2420      * </ul>
2421      *
2422      * @param op the operation used to combine lane values
2423      * @param m the mask controlling lane selection
2424      * @return the reduced result accumulated from the selected lane values
2425      * @throws UnsupportedOperationException if this vector does
2426      *         not support the requested operation
2427      * @see #reduceLanes(VectorOperators.Associative)
2428      */
2429     public abstract byte reduceLanes(VectorOperators.Associative op,
2430                                        VectorMask<Byte> m);
2431 
2432     /*package-private*/
2433     @ForceInline
2434     final
2435     byte reduceLanesTemplate(VectorOperators.Associative op,
2436                                VectorMask<Byte> m) {
2437         ByteVector v = reduceIdentityVector(op).blend(this, m);
2438         return v.reduceLanesTemplate(op);
2439     }
2440 
2441     /*package-private*/
2442     @ForceInline
2443     final
2444     byte reduceLanesTemplate(VectorOperators.Associative op) {
2445         if (op == FIRST_NONZERO) {
2446             // FIXME:  The JIT should handle this, and other scan ops alos.
2447             VectorMask<Byte> thisNZ
2448                 = this.viewAsIntegralLanes().compare(NE, (byte) 0);
2449             return this.lane(thisNZ.firstTrue());
2450         }
2451         int opc = opCode(op);
2452         return fromBits(VectorSupport.reductionCoerced(
2453             opc, getClass(), byte.class, length(),
2454             this,
2455             REDUCE_IMPL.find(op, opc, (opc_) -> {
2456               switch (opc_) {
2457               case VECTOR_OP_ADD: return v ->
2458                       toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a + b)));
2459               case VECTOR_OP_MUL: return v ->
2460                       toBits(v.rOp((byte)1, (i, a, b) -> (byte)(a * b)));
2461               case VECTOR_OP_MIN: return v ->
2462                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (byte) Math.min(a, b)));
2463               case VECTOR_OP_MAX: return v ->
2464                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (byte) Math.max(a, b)));
2465               case VECTOR_OP_AND: return v ->
2466                       toBits(v.rOp((byte)-1, (i, a, b) -> (byte)(a & b)));
2467               case VECTOR_OP_OR: return v ->
2468                       toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a | b)));
2469               case VECTOR_OP_XOR: return v ->
2470                       toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a ^ b)));
2471               default: return null;
2472               }})));
2473     }
2474     private static final
2475     ImplCache<Associative,Function<ByteVector,Long>> REDUCE_IMPL
2476         = new ImplCache<>(Associative.class, ByteVector.class);
2477 
2478     private
2479     @ForceInline
2480     ByteVector reduceIdentityVector(VectorOperators.Associative op) {
2481         int opc = opCode(op);
2482         UnaryOperator<ByteVector> fn
2483             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2484                 switch (opc_) {
2485                 case VECTOR_OP_ADD:
2486                 case VECTOR_OP_OR:
2487                 case VECTOR_OP_XOR:
2488                     return v -> v.broadcast(0);
2489                 case VECTOR_OP_MUL:
2490                     return v -> v.broadcast(1);
2491                 case VECTOR_OP_AND:
2492                     return v -> v.broadcast(-1);
2493                 case VECTOR_OP_MIN:
2494                     return v -> v.broadcast(MAX_OR_INF);
2495                 case VECTOR_OP_MAX:
2496                     return v -> v.broadcast(MIN_OR_INF);
2497                 default: return null;
2498                 }
2499             });
2500         return fn.apply(this);
2501     }
2502     private static final
2503     ImplCache<Associative,UnaryOperator<ByteVector>> REDUCE_ID_IMPL
2504         = new ImplCache<>(Associative.class, ByteVector.class);
2505 
2506     private static final byte MIN_OR_INF = Byte.MIN_VALUE;
2507     private static final byte MAX_OR_INF = Byte.MAX_VALUE;
2508 
2509     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2510     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2511                                                      VectorMask<Byte> m);
2512 
2513     // Type specific accessors
2514 
2515     /**
2516      * Gets the lane element at lane index {@code i}
2517      *
2518      * @param i the lane index
2519      * @return the lane element at lane index {@code i}
2520      * @throws IllegalArgumentException if the index is is out of range
2521      * ({@code < 0 || >= length()})
2522      */
2523     public abstract byte lane(int i);
2524 
2525     /**
2526      * Replaces the lane element of this vector at lane index {@code i} with
2527      * value {@code e}.
2528      *
2529      * This is a cross-lane operation and behaves as if it returns the result
2530      * of blending this vector with an input vector that is the result of
2531      * broadcasting {@code e} and a mask that has only one lane set at lane
2532      * index {@code i}.
2533      *
2534      * @param i the lane index of the lane element to be replaced
2535      * @param e the value to be placed
2536      * @return the result of replacing the lane element of this vector at lane
2537      * index {@code i} with value {@code e}.
2538      * @throws IllegalArgumentException if the index is is out of range
2539      * ({@code < 0 || >= length()})
2540      */
2541     public abstract ByteVector withLane(int i, byte e);
2542 
2543     // Memory load operations
2544 
2545     /**
2546      * Returns an array of type {@code byte[]}
2547      * containing all the lane values.
2548      * The array length is the same as the vector length.
2549      * The array elements are stored in lane order.
2550      * <p>
2551      * This method behaves as if it stores
2552      * this vector into an allocated array
2553      * (using {@link #intoArray(byte[], int) intoArray})
2554      * and returns the array as follows:
2555      * <pre>{@code
2556      *   byte[] a = new byte[this.length()];
2557      *   this.intoArray(a, 0);
2558      *   return a;
2559      * }</pre>
2560      *
2561      * @return an array containing the lane values of this vector
2562      */
2563     @ForceInline
2564     @Override
2565     public final byte[] toArray() {
2566         byte[] a = new byte[vspecies().laneCount()];
2567         intoArray(a, 0);
2568         return a;
2569     }
2570 
2571     /** {@inheritDoc} <!--workaround-->
2572      * @implNote
2573      * When this method is used on used on vectors
2574      * of type {@code ByteVector},
2575      * there will be no loss of precision or range,
2576      * and so no {@code UnsupportedOperationException} will
2577      * be thrown.
2578      */
2579     @ForceInline
2580     @Override
2581     public final int[] toIntArray() {
2582         byte[] a = toArray();
2583         int[] res = new int[a.length];
2584         for (int i = 0; i < a.length; i++) {
2585             byte e = a[i];
2586             res[i] = (int) ByteSpecies.toIntegralChecked(e, true);
2587         }
2588         return res;
2589     }
2590 
2591     /** {@inheritDoc} <!--workaround-->
2592      * @implNote
2593      * When this method is used on used on vectors
2594      * of type {@code ByteVector},
2595      * there will be no loss of precision or range,
2596      * and so no {@code UnsupportedOperationException} will
2597      * be thrown.
2598      */
2599     @ForceInline
2600     @Override
2601     public final long[] toLongArray() {
2602         byte[] a = toArray();
2603         long[] res = new long[a.length];
2604         for (int i = 0; i < a.length; i++) {
2605             byte e = a[i];
2606             res[i] = ByteSpecies.toIntegralChecked(e, false);
2607         }
2608         return res;
2609     }
2610 
2611     /** {@inheritDoc} <!--workaround-->
2612      * @implNote
2613      * When this method is used on used on vectors
2614      * of type {@code ByteVector},
2615      * there will be no loss of precision.
2616      */
2617     @ForceInline
2618     @Override
2619     public final double[] toDoubleArray() {
2620         byte[] a = toArray();
2621         double[] res = new double[a.length];
2622         for (int i = 0; i < a.length; i++) {
2623             res[i] = (double) a[i];
2624         }
2625         return res;
2626     }
2627 
2628     /**
2629      * Loads a vector from a byte array starting at an offset.
2630      * Bytes are composed into primitive lane elements according
2631      * to the specified byte order.
2632      * The vector is arranged into lanes according to
2633      * <a href="Vector.html#lane-order">memory ordering</a>.
2634      * <p>
2635      * This method behaves as if it returns the result of calling
2636      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2637      * fromByteBuffer()} as follows:
2638      * <pre>{@code
2639      * var bb = ByteBuffer.wrap(a);
2640      * var m = species.maskAll(true);
2641      * return fromByteBuffer(species, bb, offset, bo, m);
2642      * }</pre>
2643      *
2644      * @param species species of desired vector
2645      * @param a the byte array
2646      * @param offset the offset into the array
2647      * @param bo the intended byte order
2648      * @return a vector loaded from a byte array
2649      * @throws IndexOutOfBoundsException
2650      *         if {@code offset+N*ESIZE < 0}
2651      *         or {@code offset+(N+1)*ESIZE > a.length}
2652      *         for any lane {@code N} in the vector
2653      */
2654     @ForceInline
2655     public static
2656     ByteVector fromByteArray(VectorSpecies<Byte> species,
2657                                        byte[] a, int offset,
2658                                        ByteOrder bo) {
2659         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
2660         ByteSpecies vsp = (ByteSpecies) species;
2661         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
2662     }
2663 
2664     /**
2665      * Loads a vector from a byte array starting at an offset
2666      * and using a mask.
2667      * Lanes where the mask is unset are filled with the default
2668      * value of {@code byte} (zero).
2669      * Bytes are composed into primitive lane elements according
2670      * to the specified byte order.
2671      * The vector is arranged into lanes according to
2672      * <a href="Vector.html#lane-order">memory ordering</a>.
2673      * <p>
2674      * This method behaves as if it returns the result of calling
2675      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2676      * fromByteBuffer()} as follows:
2677      * <pre>{@code
2678      * var bb = ByteBuffer.wrap(a);
2679      * return fromByteBuffer(species, bb, offset, bo, m);
2680      * }</pre>
2681      *
2682      * @param species species of desired vector
2683      * @param a the byte array
2684      * @param offset the offset into the array
2685      * @param bo the intended byte order
2686      * @param m the mask controlling lane selection
2687      * @return a vector loaded from a byte array
2688      * @throws IndexOutOfBoundsException
2689      *         if {@code offset+N*ESIZE < 0}
2690      *         or {@code offset+(N+1)*ESIZE > a.length}
2691      *         for any lane {@code N} in the vector
2692      *         where the mask is set
2693      */
2694     @ForceInline
2695     public static
2696     ByteVector fromByteArray(VectorSpecies<Byte> species,
2697                                        byte[] a, int offset,
2698                                        ByteOrder bo,
2699                                        VectorMask<Byte> m) {
2700         ByteSpecies vsp = (ByteSpecies) species;
2701         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2702             ByteVector zero = vsp.zero();
2703             ByteVector v = zero.fromByteArray0(a, offset);
2704             return zero.blend(v.maybeSwap(bo), m);
2705         }
2706 
2707         // FIXME: optimize
2708         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2709         ByteBuffer wb = wrapper(a, bo);
2710         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
2711                    (wb_, o, i)  -> wb_.get(o + i * 1));
2712     }
2713 
2714     /**
2715      * Loads a vector from an array of type {@code byte[]}
2716      * starting at an offset.
2717      * For each vector lane, where {@code N} is the vector lane index, the
2718      * array element at index {@code offset + N} is placed into the
2719      * resulting vector at lane index {@code N}.
2720      *
2721      * @param species species of desired vector
2722      * @param a the array
2723      * @param offset the offset into the array
2724      * @return the vector loaded from an array
2725      * @throws IndexOutOfBoundsException
2726      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2727      *         for any lane {@code N} in the vector
2728      */
2729     @ForceInline
2730     public static
2731     ByteVector fromArray(VectorSpecies<Byte> species,
2732                                    byte[] a, int offset) {
2733         offset = checkFromIndexSize(offset, species.length(), a.length);
2734         ByteSpecies vsp = (ByteSpecies) species;
2735         return vsp.dummyVector().fromArray0(a, offset);
2736     }
2737 
2738     /**
2739      * Loads a vector from an array of type {@code byte[]}
2740      * starting at an offset and using a mask.
2741      * Lanes where the mask is unset are filled with the default
2742      * value of {@code byte} (zero).
2743      * For each vector lane, where {@code N} is the vector lane index,
2744      * if the mask lane at index {@code N} is set then the array element at
2745      * index {@code offset + N} is placed into the resulting vector at lane index
2746      * {@code N}, otherwise the default element value is placed into the
2747      * resulting vector at lane index {@code N}.
2748      *
2749      * @param species species of desired vector
2750      * @param a the array
2751      * @param offset the offset into the array
2752      * @param m the mask controlling lane selection
2753      * @return the vector loaded from an array
2754      * @throws IndexOutOfBoundsException
2755      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2756      *         for any lane {@code N} in the vector
2757      *         where the mask is set
2758      */
2759     @ForceInline
2760     public static
2761     ByteVector fromArray(VectorSpecies<Byte> species,
2762                                    byte[] a, int offset,
2763                                    VectorMask<Byte> m) {
2764         ByteSpecies vsp = (ByteSpecies) species;
2765         if (offset >= 0 && offset <= (a.length - species.length())) {
2766             ByteVector zero = vsp.zero();
2767             return zero.blend(zero.fromArray0(a, offset), m);
2768         }
2769 
2770         // FIXME: optimize
2771         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2772         return vsp.vOp(m, i -> a[offset + i]);
2773     }
2774 
2775     /**
2776      * Gathers a new vector composed of elements from an array of type
2777      * {@code byte[]},
2778      * using indexes obtained by adding a fixed {@code offset} to a
2779      * series of secondary offsets from an <em>index map</em>.
2780      * The index map is a contiguous sequence of {@code VLENGTH}
2781      * elements in a second array of {@code int}s, starting at a given
2782      * {@code mapOffset}.
2783      * <p>
2784      * For each vector lane, where {@code N} is the vector lane index,
2785      * the lane is loaded from the array
2786      * element {@code a[f(N)]}, where {@code f(N)} is the
2787      * index mapping expression
2788      * {@code offset + indexMap[mapOffset + N]]}.
2789      *
2790      * @param species species of desired vector
2791      * @param a the array
2792      * @param offset the offset into the array, may be negative if relative
2793      * indexes in the index map compensate to produce a value within the
2794      * array bounds
2795      * @param indexMap the index map
2796      * @param mapOffset the offset into the index map
2797      * @return the vector loaded from the indexed elements of the array
2798      * @throws IndexOutOfBoundsException
2799      *         if {@code mapOffset+N < 0}
2800      *         or if {@code mapOffset+N >= indexMap.length},
2801      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2802      *         is an invalid index into {@code a},
2803      *         for any lane {@code N} in the vector
2804      * @see ByteVector#toIntArray()
2805      */
2806     @ForceInline
2807     public static
2808     ByteVector fromArray(VectorSpecies<Byte> species,
2809                                    byte[] a, int offset,
2810                                    int[] indexMap, int mapOffset) {
2811         ByteSpecies vsp = (ByteSpecies) species;
2812         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
2813     }
2814 
2815     /**
2816      * Gathers a new vector composed of elements from an array of type
2817      * {@code byte[]},
2818      * under the control of a mask, and
2819      * using indexes obtained by adding a fixed {@code offset} to a
2820      * series of secondary offsets from an <em>index map</em>.
2821      * The index map is a contiguous sequence of {@code VLENGTH}
2822      * elements in a second array of {@code int}s, starting at a given
2823      * {@code mapOffset}.
2824      * <p>
2825      * For each vector lane, where {@code N} is the vector lane index,
2826      * if the lane is set in the mask,
2827      * the lane is loaded from the array
2828      * element {@code a[f(N)]}, where {@code f(N)} is the
2829      * index mapping expression
2830      * {@code offset + indexMap[mapOffset + N]]}.
2831      * Unset lanes in the resulting vector are set to zero.
2832      *
2833      * @param species species of desired vector
2834      * @param a the array
2835      * @param offset the offset into the array, may be negative if relative
2836      * indexes in the index map compensate to produce a value within the
2837      * array bounds
2838      * @param indexMap the index map
2839      * @param mapOffset the offset into the index map
2840      * @param m the mask controlling lane selection
2841      * @return the vector loaded from the indexed elements of the array
2842      * @throws IndexOutOfBoundsException
2843      *         if {@code mapOffset+N < 0}
2844      *         or if {@code mapOffset+N >= indexMap.length},
2845      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2846      *         is an invalid index into {@code a},
2847      *         for any lane {@code N} in the vector
2848      *         where the mask is set
2849      * @see ByteVector#toIntArray()
2850      */
2851     @ForceInline
2852     public static
2853     ByteVector fromArray(VectorSpecies<Byte> species,
2854                                    byte[] a, int offset,
2855                                    int[] indexMap, int mapOffset,
2856                                    VectorMask<Byte> m) {
2857         ByteSpecies vsp = (ByteSpecies) species;
2858         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
2859     }
2860 
2861 
2862     /**
2863      * Loads a vector from an array of type {@code boolean[]}
2864      * starting at an offset.
2865      * For each vector lane, where {@code N} is the vector lane index, the
2866      * array element at index {@code offset + N}
2867      * is first converted to a {@code byte} value and then
2868      * placed into the resulting vector at lane index {@code N}.
2869      * <p>
2870      * A {@code boolean} value is converted to a {@code byte} value by applying the
2871      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
2872      *
2873      * @param species species of desired vector
2874      * @param a the array
2875      * @param offset the offset into the array
2876      * @return the vector loaded from an array
2877      * @throws IndexOutOfBoundsException
2878      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2879      *         for any lane {@code N} in the vector
2880      */
2881     @ForceInline
2882     public static
2883     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
2884                                           boolean[] a, int offset) {
2885         offset = checkFromIndexSize(offset, species.length(), a.length);
2886         ByteSpecies vsp = (ByteSpecies) species;
2887         return vsp.dummyVector().fromBooleanArray0(a, offset);
2888     }
2889 
2890     /**
2891      * Loads a vector from an array of type {@code boolean[]}
2892      * starting at an offset and using a mask.
2893      * Lanes where the mask is unset are filled with the default
2894      * value of {@code byte} (zero).
2895      * For each vector lane, where {@code N} is the vector lane index,
2896      * if the mask lane at index {@code N} is set then the array element at
2897      * index {@code offset + N}
2898      * is first converted to a {@code byte} value and then
2899      * placed into the resulting vector at lane index
2900      * {@code N}, otherwise the default element value is placed into the
2901      * resulting vector at lane index {@code N}.
2902      * <p>
2903      * A {@code boolean} value is converted to a {@code byte} value by applying the
2904      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
2905      *
2906      * @param species species of desired vector
2907      * @param a the array
2908      * @param offset the offset into the array
2909      * @param m the mask controlling lane selection
2910      * @return the vector loaded from an array
2911      * @throws IndexOutOfBoundsException
2912      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2913      *         for any lane {@code N} in the vector
2914      *         where the mask is set
2915      */
2916     @ForceInline
2917     public static
2918     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
2919                                           boolean[] a, int offset,
2920                                           VectorMask<Byte> m) {
2921         ByteSpecies vsp = (ByteSpecies) species;
2922         if (offset >= 0 && offset <= (a.length - species.length())) {
2923             ByteVector zero = vsp.zero();
2924             return zero.blend(zero.fromBooleanArray0(a, offset), m);
2925         }
2926 
2927         // FIXME: optimize
2928         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2929         return vsp.vOp(m, i -> (byte) (a[offset + i] ? 1 : 0));
2930     }
2931 
2932     /**
2933      * Gathers a new vector composed of elements from an array of type
2934      * {@code boolean[]},
2935      * using indexes obtained by adding a fixed {@code offset} to a
2936      * series of secondary offsets from an <em>index map</em>.
2937      * The index map is a contiguous sequence of {@code VLENGTH}
2938      * elements in a second array of {@code int}s, starting at a given
2939      * {@code mapOffset}.
2940      * <p>
2941      * For each vector lane, where {@code N} is the vector lane index,
2942      * the lane is loaded from the expression
2943      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
2944      * index mapping expression
2945      * {@code offset + indexMap[mapOffset + N]]}.
2946      *
2947      * @param species species of desired vector
2948      * @param a the array
2949      * @param offset the offset into the array, may be negative if relative
2950      * indexes in the index map compensate to produce a value within the
2951      * array bounds
2952      * @param indexMap the index map
2953      * @param mapOffset the offset into the index map
2954      * @return the vector loaded from the indexed elements of the array
2955      * @throws IndexOutOfBoundsException
2956      *         if {@code mapOffset+N < 0}
2957      *         or if {@code mapOffset+N >= indexMap.length},
2958      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2959      *         is an invalid index into {@code a},
2960      *         for any lane {@code N} in the vector
2961      * @see ByteVector#toIntArray()
2962      */
2963     @ForceInline
2964     public static
2965     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
2966                                           boolean[] a, int offset,
2967                                           int[] indexMap, int mapOffset) {
2968         // FIXME: optimize
2969         ByteSpecies vsp = (ByteSpecies) species;
2970         return vsp.vOp(n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
2971     }
2972 
2973     /**
2974      * Gathers a new vector composed of elements from an array of type
2975      * {@code boolean[]},
2976      * under the control of a mask, and
2977      * using indexes obtained by adding a fixed {@code offset} to a
2978      * series of secondary offsets from an <em>index map</em>.
2979      * The index map is a contiguous sequence of {@code VLENGTH}
2980      * elements in a second array of {@code int}s, starting at a given
2981      * {@code mapOffset}.
2982      * <p>
2983      * For each vector lane, where {@code N} is the vector lane index,
2984      * if the lane is set in the mask,
2985      * the lane is loaded from the expression
2986      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
2987      * index mapping expression
2988      * {@code offset + indexMap[mapOffset + N]]}.
2989      * Unset lanes in the resulting vector are set to zero.
2990      *
2991      * @param species species of desired vector
2992      * @param a the array
2993      * @param offset the offset into the array, may be negative if relative
2994      * indexes in the index map compensate to produce a value within the
2995      * array bounds
2996      * @param indexMap the index map
2997      * @param mapOffset the offset into the index map
2998      * @param m the mask controlling lane selection
2999      * @return the vector loaded from the indexed elements of the array
3000      * @throws IndexOutOfBoundsException
3001      *         if {@code mapOffset+N < 0}
3002      *         or if {@code mapOffset+N >= indexMap.length},
3003      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3004      *         is an invalid index into {@code a},
3005      *         for any lane {@code N} in the vector
3006      *         where the mask is set
3007      * @see ByteVector#toIntArray()
3008      */
3009     @ForceInline
3010     public static
3011     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3012                                           boolean[] a, int offset,
3013                                           int[] indexMap, int mapOffset,
3014                                           VectorMask<Byte> m) {
3015         // FIXME: optimize
3016         ByteSpecies vsp = (ByteSpecies) species;
3017         return vsp.vOp(m, n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
3018     }
3019 
3020     /**
3021      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3022      * starting at an offset into the byte buffer.
3023      * Bytes are composed into primitive lane elements according
3024      * to the specified byte order.
3025      * The vector is arranged into lanes according to
3026      * <a href="Vector.html#lane-order">memory ordering</a>.
3027      * <p>
3028      * This method behaves as if it returns the result of calling
3029      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3030      * fromByteBuffer()} as follows:
3031      * <pre>{@code
3032      * var m = species.maskAll(true);
3033      * return fromByteBuffer(species, bb, offset, bo, m);
3034      * }</pre>
3035      *
3036      * @param species species of desired vector
3037      * @param bb the byte buffer
3038      * @param offset the offset into the byte buffer
3039      * @param bo the intended byte order
3040      * @return a vector loaded from a byte buffer
3041      * @throws IndexOutOfBoundsException
3042      *         if {@code offset+N*1 < 0}
3043      *         or {@code offset+N*1 >= bb.limit()}
3044      *         for any lane {@code N} in the vector
3045      */
3046     @ForceInline
3047     public static
3048     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3049                                         ByteBuffer bb, int offset,
3050                                         ByteOrder bo) {
3051         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
3052         ByteSpecies vsp = (ByteSpecies) species;
3053         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
3054     }
3055 
3056     /**
3057      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3058      * starting at an offset into the byte buffer
3059      * and using a mask.
3060      * Lanes where the mask is unset are filled with the default
3061      * value of {@code byte} (zero).
3062      * Bytes are composed into primitive lane elements according
3063      * to the specified byte order.
3064      * The vector is arranged into lanes according to
3065      * <a href="Vector.html#lane-order">memory ordering</a>.
3066      * <p>
3067      * The following pseudocode illustrates the behavior:
3068      * <pre>{@code
3069      * ByteBuffer eb = bb.duplicate()
3070      *     .position(offset);
3071      * byte[] ar = new byte[species.length()];
3072      * for (int n = 0; n < ar.length; n++) {
3073      *     if (m.laneIsSet(n)) {
3074      *         ar[n] = eb.get(n);
3075      *     }
3076      * }
3077      * ByteVector r = ByteVector.fromArray(species, ar, 0);
3078      * }</pre>
3079      * @implNote
3080      * The byte order argument is ignored.
3081      *
3082      * @param species species of desired vector
3083      * @param bb the byte buffer
3084      * @param offset the offset into the byte buffer
3085      * @param bo the intended byte order
3086      * @param m the mask controlling lane selection
3087      * @return a vector loaded from a byte buffer
3088      * @throws IndexOutOfBoundsException
3089      *         if {@code offset+N*1 < 0}
3090      *         or {@code offset+N*1 >= bb.limit()}
3091      *         for any lane {@code N} in the vector
3092      *         where the mask is set
3093      */
3094     @ForceInline
3095     public static
3096     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3097                                         ByteBuffer bb, int offset,
3098                                         ByteOrder bo,
3099                                         VectorMask<Byte> m) {
3100         ByteSpecies vsp = (ByteSpecies) species;
3101         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
3102             ByteVector zero = vsp.zero();
3103             ByteVector v = zero.fromByteBuffer0(bb, offset);
3104             return zero.blend(v.maybeSwap(bo), m);
3105         }
3106 
3107         // FIXME: optimize
3108         checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3109         ByteBuffer wb = wrapper(bb, bo);
3110         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
3111                    (wb_, o, i)  -> wb_.get(o + i * 1));
3112     }
3113 
3114     // Memory store operations
3115 
3116     /**
3117      * Stores this vector into an array of type {@code byte[]}
3118      * starting at an offset.
3119      * <p>
3120      * For each vector lane, where {@code N} is the vector lane index,
3121      * the lane element at index {@code N} is stored into the array
3122      * element {@code a[offset+N]}.
3123      *
3124      * @param a the array, of type {@code byte[]}
3125      * @param offset the offset into the array
3126      * @throws IndexOutOfBoundsException
3127      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3128      *         for any lane {@code N} in the vector
3129      */
3130     @ForceInline
3131     public final
3132     void intoArray(byte[] a, int offset) {
3133         offset = checkFromIndexSize(offset, length(), a.length);
3134         ByteSpecies vsp = vspecies();
3135         VectorSupport.store(
3136             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3137             a, arrayAddress(a, offset),
3138             this,
3139             a, offset,
3140             (arr, off, v)
3141             -> v.stOp(arr, off,
3142                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3143     }
3144 
3145     /**
3146      * Stores this vector into an array of type {@code byte[]}
3147      * starting at offset and using a mask.
3148      * <p>
3149      * For each vector lane, where {@code N} is the vector lane index,
3150      * the lane element at index {@code N} is stored into the array
3151      * element {@code a[offset+N]}.
3152      * If the mask lane at {@code N} is unset then the corresponding
3153      * array element {@code a[offset+N]} is left unchanged.
3154      * <p>
3155      * Array range checking is done for lanes where the mask is set.
3156      * Lanes where the mask is unset are not stored and do not need
3157      * to correspond to legitimate elements of {@code a}.
3158      * That is, unset lanes may correspond to array indexes less than
3159      * zero or beyond the end of the array.
3160      *
3161      * @param a the array, of type {@code byte[]}
3162      * @param offset the offset into the array
3163      * @param m the mask controlling lane storage
3164      * @throws IndexOutOfBoundsException
3165      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3166      *         for any lane {@code N} in the vector
3167      *         where the mask is set
3168      */
3169     @ForceInline
3170     public final
3171     void intoArray(byte[] a, int offset,
3172                    VectorMask<Byte> m) {
3173         if (m.allTrue()) {
3174             intoArray(a, offset);
3175         } else {
3176             // FIXME: optimize
3177             ByteSpecies vsp = vspecies();
3178             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3179             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
3180         }
3181     }
3182 
3183     /**
3184      * Scatters this vector into an array of type {@code byte[]}
3185      * using indexes obtained by adding a fixed {@code offset} to a
3186      * series of secondary offsets from an <em>index map</em>.
3187      * The index map is a contiguous sequence of {@code VLENGTH}
3188      * elements in a second array of {@code int}s, starting at a given
3189      * {@code mapOffset}.
3190      * <p>
3191      * For each vector lane, where {@code N} is the vector lane index,
3192      * the lane element at index {@code N} is stored into the array
3193      * element {@code a[f(N)]}, where {@code f(N)} is the
3194      * index mapping expression
3195      * {@code offset + indexMap[mapOffset + N]]}.
3196      *
3197      * @param a the array
3198      * @param offset an offset to combine with the index map offsets
3199      * @param indexMap the index map
3200      * @param mapOffset the offset into the index map
3201      * @throws IndexOutOfBoundsException
3202      *         if {@code mapOffset+N < 0}
3203      *         or if {@code mapOffset+N >= indexMap.length},
3204      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3205      *         is an invalid index into {@code a},
3206      *         for any lane {@code N} in the vector
3207      * @see ByteVector#toIntArray()
3208      */
3209     @ForceInline
3210     public final
3211     void intoArray(byte[] a, int offset,
3212                    int[] indexMap, int mapOffset) {
3213         stOp(a, offset,
3214              (arr, off, i, e) -> {
3215                  int j = indexMap[mapOffset + i];
3216                  arr[off + j] = e;
3217              });
3218     }
3219 
3220     /**
3221      * Scatters this vector into an array of type {@code byte[]},
3222      * under the control of a mask, and
3223      * using indexes obtained by adding a fixed {@code offset} to a
3224      * series of secondary offsets from an <em>index map</em>.
3225      * The index map is a contiguous sequence of {@code VLENGTH}
3226      * elements in a second array of {@code int}s, starting at a given
3227      * {@code mapOffset}.
3228      * <p>
3229      * For each vector lane, where {@code N} is the vector lane index,
3230      * if the mask lane at index {@code N} is set then
3231      * the lane element at index {@code N} is stored into the array
3232      * element {@code a[f(N)]}, where {@code f(N)} is the
3233      * index mapping expression
3234      * {@code offset + indexMap[mapOffset + N]]}.
3235      *
3236      * @param a the array
3237      * @param offset an offset to combine with the index map offsets
3238      * @param indexMap the index map
3239      * @param mapOffset the offset into the index map
3240      * @param m the mask
3241      * @throws IndexOutOfBoundsException
3242      *         if {@code mapOffset+N < 0}
3243      *         or if {@code mapOffset+N >= indexMap.length},
3244      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3245      *         is an invalid index into {@code a},
3246      *         for any lane {@code N} in the vector
3247      *         where the mask is set
3248      * @see ByteVector#toIntArray()
3249      */
3250     @ForceInline
3251     public final
3252     void intoArray(byte[] a, int offset,
3253                    int[] indexMap, int mapOffset,
3254                    VectorMask<Byte> m) {
3255         stOp(a, offset, m,
3256              (arr, off, i, e) -> {
3257                  int j = indexMap[mapOffset + i];
3258                  arr[off + j] = e;
3259              });
3260     }
3261 
3262 
3263     /**
3264      * Stores this vector into an array of type {@code boolean[]}
3265      * starting at an offset.
3266      * <p>
3267      * For each vector lane, where {@code N} is the vector lane index,
3268      * the lane element at index {@code N}
3269      * is first converted to a {@code boolean} value and then
3270      * stored into the array element {@code a[offset+N]}.
3271      * <p>
3272      * A {@code byte} value is converted to a {@code boolean} value by applying the
3273      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3274      *
3275      * @param a the array
3276      * @param offset the offset into the array
3277      * @throws IndexOutOfBoundsException
3278      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3279      *         for any lane {@code N} in the vector
3280      */
3281     @ForceInline
3282     public final
3283     void intoBooleanArray(boolean[] a, int offset) {
3284         offset = checkFromIndexSize(offset, length(), a.length);
3285         ByteSpecies vsp = vspecies();
3286         ByteVector normalized = this.and((byte) 1);
3287         VectorSupport.store(
3288             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3289             a, booleanArrayAddress(a, offset),
3290             normalized,
3291             a, offset,
3292             (arr, off, v)
3293             -> v.stOp(arr, off,
3294                       (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
3295     }
3296 
3297     /**
3298      * Stores this vector into an array of type {@code boolean[]}
3299      * starting at offset and using a mask.
3300      * <p>
3301      * For each vector lane, where {@code N} is the vector lane index,
3302      * the lane element at index {@code N}
3303      * is first converted to a {@code boolean} value and then
3304      * stored into the array element {@code a[offset+N]}.
3305      * If the mask lane at {@code N} is unset then the corresponding
3306      * array element {@code a[offset+N]} is left unchanged.
3307      * <p>
3308      * A {@code byte} value is converted to a {@code boolean} value by applying the
3309      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3310      * <p>
3311      * Array range checking is done for lanes where the mask is set.
3312      * Lanes where the mask is unset are not stored and do not need
3313      * to correspond to legitimate elements of {@code a}.
3314      * That is, unset lanes may correspond to array indexes less than
3315      * zero or beyond the end of the array.
3316      *
3317      * @param a the array
3318      * @param offset the offset into the array
3319      * @param m the mask controlling lane storage
3320      * @throws IndexOutOfBoundsException
3321      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3322      *         for any lane {@code N} in the vector
3323      *         where the mask is set
3324      */
3325     @ForceInline
3326     public final
3327     void intoBooleanArray(boolean[] a, int offset,
3328                           VectorMask<Byte> m) {
3329         if (m.allTrue()) {
3330             intoBooleanArray(a, offset);
3331         } else {
3332             // FIXME: optimize
3333             ByteSpecies vsp = vspecies();
3334             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3335             stOp(a, offset, m, (arr, off, i, e) -> arr[off+i] = (e & 1) != 0);
3336         }
3337     }
3338 
3339     /**
3340      * Scatters this vector into an array of type {@code boolean[]}
3341      * using indexes obtained by adding a fixed {@code offset} to a
3342      * series of secondary offsets from an <em>index map</em>.
3343      * The index map is a contiguous sequence of {@code VLENGTH}
3344      * elements in a second array of {@code int}s, starting at a given
3345      * {@code mapOffset}.
3346      * <p>
3347      * For each vector lane, where {@code N} is the vector lane index,
3348      * the lane element at index {@code N}
3349      * is first converted to a {@code boolean} value and then
3350      * stored into the array
3351      * element {@code a[f(N)]}, where {@code f(N)} is the
3352      * index mapping expression
3353      * {@code offset + indexMap[mapOffset + N]]}.
3354      * <p>
3355      * A {@code byte} value is converted to a {@code boolean} value by applying the
3356      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3357      *
3358      * @param a the array
3359      * @param offset an offset to combine with the index map offsets
3360      * @param indexMap the index map
3361      * @param mapOffset the offset into the index map
3362      * @throws IndexOutOfBoundsException
3363      *         if {@code mapOffset+N < 0}
3364      *         or if {@code mapOffset+N >= indexMap.length},
3365      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3366      *         is an invalid index into {@code a},
3367      *         for any lane {@code N} in the vector
3368      * @see ByteVector#toIntArray()
3369      */
3370     @ForceInline
3371     public final
3372     void intoBooleanArray(boolean[] a, int offset,
3373                           int[] indexMap, int mapOffset) {
3374         // FIXME: optimize
3375         stOp(a, offset,
3376              (arr, off, i, e) -> {
3377                  int j = indexMap[mapOffset + i];
3378                  arr[off + j] = (e & 1) != 0;
3379              });
3380     }
3381 
3382     /**
3383      * Scatters this vector into an array of type {@code boolean[]},
3384      * under the control of a mask, and
3385      * using indexes obtained by adding a fixed {@code offset} to a
3386      * series of secondary offsets from an <em>index map</em>.
3387      * The index map is a contiguous sequence of {@code VLENGTH}
3388      * elements in a second array of {@code int}s, starting at a given
3389      * {@code mapOffset}.
3390      * <p>
3391      * For each vector lane, where {@code N} is the vector lane index,
3392      * if the mask lane at index {@code N} is set then
3393      * the lane element at index {@code N}
3394      * is first converted to a {@code boolean} value and then
3395      * stored into the array
3396      * element {@code a[f(N)]}, where {@code f(N)} is the
3397      * index mapping expression
3398      * {@code offset + indexMap[mapOffset + N]]}.
3399      * <p>
3400      * A {@code byte} value is converted to a {@code boolean} value by applying the
3401      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3402      *
3403      * @param a the array
3404      * @param offset an offset to combine with the index map offsets
3405      * @param indexMap the index map
3406      * @param mapOffset the offset into the index map
3407      * @param m the mask
3408      * @throws IndexOutOfBoundsException
3409      *         if {@code mapOffset+N < 0}
3410      *         or if {@code mapOffset+N >= indexMap.length},
3411      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3412      *         is an invalid index into {@code a},
3413      *         for any lane {@code N} in the vector
3414      *         where the mask is set
3415      * @see ByteVector#toIntArray()
3416      */
3417     @ForceInline
3418     public final
3419     void intoBooleanArray(boolean[] a, int offset,
3420                           int[] indexMap, int mapOffset,
3421                           VectorMask<Byte> m) {
3422         // FIXME: optimize
3423         stOp(a, offset, m,
3424              (arr, off, i, e) -> {
3425                  int j = indexMap[mapOffset + i];
3426                  arr[off + j] = (e & 1) != 0;
3427              });
3428     }
3429 
3430     /**
3431      * {@inheritDoc} <!--workaround-->
3432      */
3433     @Override
3434     @ForceInline
3435     public final
3436     void intoByteArray(byte[] a, int offset,
3437                        ByteOrder bo) {
3438         offset = checkFromIndexSize(offset, byteSize(), a.length);
3439         maybeSwap(bo).intoByteArray0(a, offset);
3440     }
3441 
3442     /**
3443      * {@inheritDoc} <!--workaround-->
3444      */
3445     @Override
3446     @ForceInline
3447     public final
3448     void intoByteArray(byte[] a, int offset,
3449                        ByteOrder bo,
3450                        VectorMask<Byte> m) {
3451         if (m.allTrue()) {
3452             intoByteArray(a, offset, bo);
3453         } else {
3454             // FIXME: optimize
3455             ByteSpecies vsp = vspecies();
3456             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3457             ByteBuffer wb = wrapper(a, bo);
3458             this.stOp(wb, offset, m,
3459                     (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3460         }
3461     }
3462 
3463     /**
3464      * {@inheritDoc} <!--workaround-->
3465      */
3466     @Override
3467     @ForceInline
3468     public final
3469     void intoByteBuffer(ByteBuffer bb, int offset,
3470                         ByteOrder bo) {
3471         if (bb.isReadOnly()) {
3472             throw new ReadOnlyBufferException();
3473         }
3474         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3475         maybeSwap(bo).intoByteBuffer0(bb, offset);
3476     }
3477 
3478     /**
3479      * {@inheritDoc} <!--workaround-->
3480      */
3481     @Override
3482     @ForceInline
3483     public final
3484     void intoByteBuffer(ByteBuffer bb, int offset,
3485                         ByteOrder bo,
3486                         VectorMask<Byte> m) {
3487         if (m.allTrue()) {
3488             intoByteBuffer(bb, offset, bo);
3489         } else {
3490             // FIXME: optimize
3491             if (bb.isReadOnly()) {
3492                 throw new ReadOnlyBufferException();
3493             }
3494             ByteSpecies vsp = vspecies();
3495             checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3496             ByteBuffer wb = wrapper(bb, bo);
3497             this.stOp(wb, offset, m,
3498                     (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3499         }
3500     }
3501 
3502     // ================================================
3503 
3504     // Low-level memory operations.
3505     //
3506     // Note that all of these operations *must* inline into a context
3507     // where the exact species of the involved vector is a
3508     // compile-time constant.  Otherwise, the intrinsic generation
3509     // will fail and performance will suffer.
3510     //
3511     // In many cases this is achieved by re-deriving a version of the
3512     // method in each concrete subclass (per species).  The re-derived
3513     // method simply calls one of these generic methods, with exact
3514     // parameters for the controlling metadata, which is either a
3515     // typed vector or constant species instance.
3516 
3517     // Unchecked loading operations in native byte order.
3518     // Caller is responsible for applying index checks, masking, and
3519     // byte swapping.
3520 
3521     /*package-private*/
3522     abstract
3523     ByteVector fromArray0(byte[] a, int offset);
3524     @ForceInline
3525     final
3526     ByteVector fromArray0Template(byte[] a, int offset) {
3527         ByteSpecies vsp = vspecies();
3528         return VectorSupport.load(
3529             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3530             a, arrayAddress(a, offset),
3531             a, offset, vsp,
3532             (arr, off, s) -> s.ldOp(arr, off,
3533                                     (arr_, off_, i) -> arr_[off_ + i]));
3534     }
3535 
3536 
3537     /*package-private*/
3538     abstract
3539     ByteVector fromBooleanArray0(boolean[] a, int offset);
3540     @ForceInline
3541     final
3542     ByteVector fromBooleanArray0Template(boolean[] a, int offset) {
3543         ByteSpecies vsp = vspecies();
3544         return VectorSupport.load(
3545             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3546             a, booleanArrayAddress(a, offset),
3547             a, offset, vsp,
3548             (arr, off, s) -> s.ldOp(arr, off,
3549                                     (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3550     }
3551 
3552     @Override
3553     abstract
3554     ByteVector fromByteArray0(byte[] a, int offset);
3555     @ForceInline
3556     final
3557     ByteVector fromByteArray0Template(byte[] a, int offset) {
3558         ByteSpecies vsp = vspecies();
3559         return VectorSupport.load(
3560             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3561             a, byteArrayAddress(a, offset),
3562             a, offset, vsp,
3563             (arr, off, s) -> {
3564                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3565                 return s.ldOp(wb, off,
3566                         (wb_, o, i) -> wb_.get(o + i * 1));
3567             });
3568     }
3569 
3570     abstract
3571     ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
3572     @ForceInline
3573     final
3574     ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3575         ByteSpecies vsp = vspecies();
3576         return ScopedMemoryAccess.loadFromByteBuffer(
3577                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3578                 bb, offset, vsp,
3579                 (buf, off, s) -> {
3580                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3581                     return s.ldOp(wb, off,
3582                             (wb_, o, i) -> wb_.get(o + i * 1));
3583                 });
3584     }
3585 
3586     // Unchecked storing operations in native byte order.
3587     // Caller is responsible for applying index checks, masking, and
3588     // byte swapping.
3589 
3590     abstract
3591     void intoArray0(byte[] a, int offset);
3592     @ForceInline
3593     final
3594     void intoArray0Template(byte[] a, int offset) {
3595         ByteSpecies vsp = vspecies();
3596         VectorSupport.store(
3597             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3598             a, arrayAddress(a, offset),
3599             this, a, offset,
3600             (arr, off, v)
3601             -> v.stOp(arr, off,
3602                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3603     }
3604 
3605     abstract
3606     void intoByteArray0(byte[] a, int offset);
3607     @ForceInline
3608     final
3609     void intoByteArray0Template(byte[] a, int offset) {
3610         ByteSpecies vsp = vspecies();
3611         VectorSupport.store(
3612             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3613             a, byteArrayAddress(a, offset),
3614             this, a, offset,
3615             (arr, off, v) -> {
3616                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3617                 v.stOp(wb, off,
3618                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
3619             });
3620     }
3621 
3622     @ForceInline
3623     final
3624     void intoByteBuffer0(ByteBuffer bb, int offset) {
3625         ByteSpecies vsp = vspecies();
3626         ScopedMemoryAccess.storeIntoByteBuffer(
3627                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3628                 this, bb, offset,
3629                 (buf, off, v) -> {
3630                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3631                     v.stOp(wb, off,
3632                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3633                 });
3634     }
3635 
3636     // End of low-level memory operations.
3637 
3638     private static
3639     void checkMaskFromIndexSize(int offset,
3640                                 ByteSpecies vsp,
3641                                 VectorMask<Byte> m,
3642                                 int scale,
3643                                 int limit) {
3644         ((AbstractMask<Byte>)m)
3645             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3646     }
3647 
3648     @ForceInline
3649     private void conditionalStoreNYI(int offset,
3650                                      ByteSpecies vsp,
3651                                      VectorMask<Byte> m,
3652                                      int scale,
3653                                      int limit) {
3654         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3655             String msg =
3656                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3657                               offset, limit, m, vsp);
3658             throw new AssertionError(msg);
3659         }
3660     }
3661 
3662     /*package-private*/
3663     @Override
3664     @ForceInline
3665     final
3666     ByteVector maybeSwap(ByteOrder bo) {
3667         return this;
3668     }
3669 
3670     static final int ARRAY_SHIFT =
3671         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE);
3672     static final long ARRAY_BASE =
3673         Unsafe.ARRAY_BYTE_BASE_OFFSET;
3674 
3675     @ForceInline
3676     static long arrayAddress(byte[] a, int index) {
3677         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3678     }
3679 
3680 
3681     static final int ARRAY_BOOLEAN_SHIFT =
3682             31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BOOLEAN_INDEX_SCALE);
3683     static final long ARRAY_BOOLEAN_BASE =
3684             Unsafe.ARRAY_BOOLEAN_BASE_OFFSET;
3685 
3686     @ForceInline
3687     static long booleanArrayAddress(boolean[] a, int index) {
3688         return ARRAY_BOOLEAN_BASE + (((long)index) << ARRAY_BOOLEAN_SHIFT);
3689     }
3690 
3691     @ForceInline
3692     static long byteArrayAddress(byte[] a, int index) {
3693         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3694     }
3695 
3696     // ================================================
3697 
3698     /// Reinterpreting view methods:
3699     //   lanewise reinterpret: viewAsXVector()
3700     //   keep shape, redraw lanes: reinterpretAsEs()
3701 
3702     /**
3703      * {@inheritDoc} <!--workaround-->
3704      */
3705     @ForceInline
3706     @Override
3707     public final ByteVector reinterpretAsBytes() {
3708         return this;
3709     }
3710 
3711     /**
3712      * {@inheritDoc} <!--workaround-->
3713      */
3714     @ForceInline
3715     @Override
3716     public final ByteVector viewAsIntegralLanes() {
3717         return this;
3718     }
3719 
3720     /**
3721      * {@inheritDoc} <!--workaround-->
3722      *
3723      * @implNote This method always throws
3724      * {@code UnsupportedOperationException}, because there is no floating
3725      * point type of the same size as {@code byte}.  The return type
3726      * of this method is arbitrarily designated as
3727      * {@code Vector<?>}.  Future versions of this API may change the return
3728      * type if additional floating point types become available.
3729      */
3730     @ForceInline
3731     @Override
3732     public final
3733     Vector<?>
3734     viewAsFloatingLanes() {
3735         LaneType flt = LaneType.BYTE.asFloating();
3736         // asFloating() will throw UnsupportedOperationException for the unsupported type byte
3737         throw new AssertionError("Cannot reach here");
3738     }
3739 
3740     // ================================================
3741 
3742     /// Object methods: toString, equals, hashCode
3743     //
3744     // Object methods are defined as if via Arrays.toString, etc.,
3745     // is applied to the array of elements.  Two equal vectors
3746     // are required to have equal species and equal lane values.
3747 
3748     /**
3749      * Returns a string representation of this vector, of the form
3750      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
3751      * in lane order.
3752      *
3753      * The string is produced as if by a call to {@link
3754      * java.util.Arrays#toString(byte[]) Arrays.toString()},
3755      * as appropriate to the {@code byte} array returned by
3756      * {@link #toArray this.toArray()}.
3757      *
3758      * @return a string of the form {@code "[0,1,2...]"}
3759      * reporting the lane values of this vector
3760      */
3761     @Override
3762     @ForceInline
3763     public final
3764     String toString() {
3765         // now that toArray is strongly typed, we can define this
3766         return Arrays.toString(toArray());
3767     }
3768 
3769     /**
3770      * {@inheritDoc} <!--workaround-->
3771      */
3772     @Override
3773     @ForceInline
3774     public final
3775     boolean equals(Object obj) {
3776         if (obj instanceof Vector) {
3777             Vector<?> that = (Vector<?>) obj;
3778             if (this.species().equals(that.species())) {
3779                 return this.eq(that.check(this.species())).allTrue();
3780             }
3781         }
3782         return false;
3783     }
3784 
3785     /**
3786      * {@inheritDoc} <!--workaround-->
3787      */
3788     @Override
3789     @ForceInline
3790     public final
3791     int hashCode() {
3792         // now that toArray is strongly typed, we can define this
3793         return Objects.hash(species(), Arrays.hashCode(toArray()));
3794     }
3795 
3796     // ================================================
3797 
3798     // Species
3799 
3800     /**
3801      * Class representing {@link ByteVector}'s of the same {@link VectorShape VectorShape}.
3802      */
3803     /*package-private*/
3804     static final class ByteSpecies extends AbstractSpecies<Byte> {
3805         private ByteSpecies(VectorShape shape,
3806                 Class<? extends ByteVector> vectorType,
3807                 Class<? extends AbstractMask<Byte>> maskType,
3808                 Function<Object, ByteVector> vectorFactory) {
3809             super(shape, LaneType.of(byte.class),
3810                   vectorType, maskType,
3811                   vectorFactory);
3812             assert(this.elementSize() == Byte.SIZE);
3813         }
3814 
3815         // Specializing overrides:
3816 
3817         @Override
3818         @ForceInline
3819         public final Class<Byte> elementType() {
3820             return byte.class;
3821         }
3822 
3823         @Override
3824         @ForceInline
3825         final Class<Byte> genericElementType() {
3826             return Byte.class;
3827         }
3828 
3829         @SuppressWarnings("unchecked")
3830         @Override
3831         @ForceInline
3832         public final Class<? extends ByteVector> vectorType() {
3833             return (Class<? extends ByteVector>) vectorType;
3834         }
3835 
3836         @Override
3837         @ForceInline
3838         public final long checkValue(long e) {
3839             longToElementBits(e);  // only for exception
3840             return e;
3841         }
3842 
3843         /*package-private*/
3844         @Override
3845         @ForceInline
3846         final ByteVector broadcastBits(long bits) {
3847             return (ByteVector)
3848                 VectorSupport.broadcastCoerced(
3849                     vectorType, byte.class, laneCount,
3850                     bits, this,
3851                     (bits_, s_) -> s_.rvOp(i -> bits_));
3852         }
3853 
3854         /*package-private*/
3855         @ForceInline
3856         final ByteVector broadcast(byte e) {
3857             return broadcastBits(toBits(e));
3858         }
3859 
3860         @Override
3861         @ForceInline
3862         public final ByteVector broadcast(long e) {
3863             return broadcastBits(longToElementBits(e));
3864         }
3865 
3866         /*package-private*/
3867         final @Override
3868         @ForceInline
3869         long longToElementBits(long value) {
3870             // Do the conversion, and then test it for failure.
3871             byte e = (byte) value;
3872             if ((long) e != value) {
3873                 throw badElementBits(value, e);
3874             }
3875             return toBits(e);
3876         }
3877 
3878         /*package-private*/
3879         @ForceInline
3880         static long toIntegralChecked(byte e, boolean convertToInt) {
3881             long value = convertToInt ? (int) e : (long) e;
3882             if ((byte) value != e) {
3883                 throw badArrayBits(e, convertToInt, value);
3884             }
3885             return value;
3886         }
3887 
3888         /* this non-public one is for internal conversions */
3889         @Override
3890         @ForceInline
3891         final ByteVector fromIntValues(int[] values) {
3892             VectorIntrinsics.requireLength(values.length, laneCount);
3893             byte[] va = new byte[laneCount()];
3894             for (int i = 0; i < va.length; i++) {
3895                 int lv = values[i];
3896                 byte v = (byte) lv;
3897                 va[i] = v;
3898                 if ((int)v != lv) {
3899                     throw badElementBits(lv, v);
3900                 }
3901             }
3902             return dummyVector().fromArray0(va, 0);
3903         }
3904 
3905         // Virtual constructors
3906 
3907         @ForceInline
3908         @Override final
3909         public ByteVector fromArray(Object a, int offset) {
3910             // User entry point:  Be careful with inputs.
3911             return ByteVector
3912                 .fromArray(this, (byte[]) a, offset);
3913         }
3914 
3915         @ForceInline
3916         @Override final
3917         ByteVector dummyVector() {
3918             return (ByteVector) super.dummyVector();
3919         }
3920 
3921         /*package-private*/
3922         final @Override
3923         @ForceInline
3924         ByteVector rvOp(RVOp f) {
3925             byte[] res = new byte[laneCount()];
3926             for (int i = 0; i < res.length; i++) {
3927                 byte bits = (byte) f.apply(i);
3928                 res[i] = fromBits(bits);
3929             }
3930             return dummyVector().vectorFactory(res);
3931         }
3932 
3933         ByteVector vOp(FVOp f) {
3934             byte[] res = new byte[laneCount()];
3935             for (int i = 0; i < res.length; i++) {
3936                 res[i] = f.apply(i);
3937             }
3938             return dummyVector().vectorFactory(res);
3939         }
3940 
3941         ByteVector vOp(VectorMask<Byte> m, FVOp f) {
3942             byte[] res = new byte[laneCount()];
3943             boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
3944             for (int i = 0; i < res.length; i++) {
3945                 if (mbits[i]) {
3946                     res[i] = f.apply(i);
3947                 }
3948             }
3949             return dummyVector().vectorFactory(res);
3950         }
3951 
3952         /*package-private*/
3953         @ForceInline
3954         <M> ByteVector ldOp(M memory, int offset,
3955                                       FLdOp<M> f) {
3956             return dummyVector().ldOp(memory, offset, f);
3957         }
3958 
3959         /*package-private*/
3960         @ForceInline
3961         <M> ByteVector ldOp(M memory, int offset,
3962                                       AbstractMask<Byte> m,
3963                                       FLdOp<M> f) {
3964             return dummyVector().ldOp(memory, offset, m, f);
3965         }
3966 
3967         /*package-private*/
3968         @ForceInline
3969         <M> void stOp(M memory, int offset, FStOp<M> f) {
3970             dummyVector().stOp(memory, offset, f);
3971         }
3972 
3973         /*package-private*/
3974         @ForceInline
3975         <M> void stOp(M memory, int offset,
3976                       AbstractMask<Byte> m,
3977                       FStOp<M> f) {
3978             dummyVector().stOp(memory, offset, m, f);
3979         }
3980 
3981         // N.B. Make sure these constant vectors and
3982         // masks load up correctly into registers.
3983         //
3984         // Also, see if we can avoid all that switching.
3985         // Could we cache both vectors and both masks in
3986         // this species object?
3987 
3988         // Zero and iota vector access
3989         @Override
3990         @ForceInline
3991         public final ByteVector zero() {
3992             if ((Class<?>) vectorType() == ByteMaxVector.class)
3993                 return ByteMaxVector.ZERO;
3994             switch (vectorBitSize()) {
3995                 case 64: return Byte64Vector.ZERO;
3996                 case 128: return Byte128Vector.ZERO;
3997                 case 256: return Byte256Vector.ZERO;
3998                 case 512: return Byte512Vector.ZERO;
3999             }
4000             throw new AssertionError();
4001         }
4002 
4003         @Override
4004         @ForceInline
4005         public final ByteVector iota() {
4006             if ((Class<?>) vectorType() == ByteMaxVector.class)
4007                 return ByteMaxVector.IOTA;
4008             switch (vectorBitSize()) {
4009                 case 64: return Byte64Vector.IOTA;
4010                 case 128: return Byte128Vector.IOTA;
4011                 case 256: return Byte256Vector.IOTA;
4012                 case 512: return Byte512Vector.IOTA;
4013             }
4014             throw new AssertionError();
4015         }
4016 
4017         // Mask access
4018         @Override
4019         @ForceInline
4020         public final VectorMask<Byte> maskAll(boolean bit) {
4021             if ((Class<?>) vectorType() == ByteMaxVector.class)
4022                 return ByteMaxVector.ByteMaxMask.maskAll(bit);
4023             switch (vectorBitSize()) {
4024                 case 64: return Byte64Vector.Byte64Mask.maskAll(bit);
4025                 case 128: return Byte128Vector.Byte128Mask.maskAll(bit);
4026                 case 256: return Byte256Vector.Byte256Mask.maskAll(bit);
4027                 case 512: return Byte512Vector.Byte512Mask.maskAll(bit);
4028             }
4029             throw new AssertionError();
4030         }
4031     }
4032 
4033     /**
4034      * Finds a species for an element type of {@code byte} and shape.
4035      *
4036      * @param s the shape
4037      * @return a species for an element type of {@code byte} and shape
4038      * @throws IllegalArgumentException if no such species exists for the shape
4039      */
4040     static ByteSpecies species(VectorShape s) {
4041         Objects.requireNonNull(s);
4042         switch (s) {
4043             case S_64_BIT: return (ByteSpecies) SPECIES_64;
4044             case S_128_BIT: return (ByteSpecies) SPECIES_128;
4045             case S_256_BIT: return (ByteSpecies) SPECIES_256;
4046             case S_512_BIT: return (ByteSpecies) SPECIES_512;
4047             case S_Max_BIT: return (ByteSpecies) SPECIES_MAX;
4048             default: throw new IllegalArgumentException("Bad shape: " + s);
4049         }
4050     }
4051 
4052     /** Species representing {@link ByteVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
4053     public static final VectorSpecies<Byte> SPECIES_64
4054         = new ByteSpecies(VectorShape.S_64_BIT,
4055                             Byte64Vector.class,
4056                             Byte64Vector.Byte64Mask.class,
4057                             Byte64Vector::new);
4058 
4059     /** Species representing {@link ByteVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
4060     public static final VectorSpecies<Byte> SPECIES_128
4061         = new ByteSpecies(VectorShape.S_128_BIT,
4062                             Byte128Vector.class,
4063                             Byte128Vector.Byte128Mask.class,
4064                             Byte128Vector::new);
4065 
4066     /** Species representing {@link ByteVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
4067     public static final VectorSpecies<Byte> SPECIES_256
4068         = new ByteSpecies(VectorShape.S_256_BIT,
4069                             Byte256Vector.class,
4070                             Byte256Vector.Byte256Mask.class,
4071                             Byte256Vector::new);
4072 
4073     /** Species representing {@link ByteVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
4074     public static final VectorSpecies<Byte> SPECIES_512
4075         = new ByteSpecies(VectorShape.S_512_BIT,
4076                             Byte512Vector.class,
4077                             Byte512Vector.Byte512Mask.class,
4078                             Byte512Vector::new);
4079 
4080     /** Species representing {@link ByteVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
4081     public static final VectorSpecies<Byte> SPECIES_MAX
4082         = new ByteSpecies(VectorShape.S_Max_BIT,
4083                             ByteMaxVector.class,
4084                             ByteMaxVector.ByteMaxMask.class,
4085                             ByteMaxVector::new);
4086 
4087     /**
4088      * Preferred species for {@link ByteVector}s.
4089      * A preferred species is a species of maximal bit-size for the platform.
4090      */
4091     public static final VectorSpecies<Byte> SPECIES_PREFERRED
4092         = (ByteSpecies) VectorSpecies.ofPreferred(byte.class);
4093 }