1 /*
   2  * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.Function;
  33 import java.util.function.UnaryOperator;
  34 
  35 import jdk.internal.misc.ScopedMemoryAccess;
  36 import jdk.internal.misc.Unsafe;
  37 import jdk.internal.vm.annotation.ForceInline;
  38 import jdk.internal.vm.vector.VectorSupport;
  39 
  40 import static jdk.internal.vm.vector.VectorSupport.*;
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 
  43 import static jdk.incubator.vector.VectorOperators.*;
  44 
  45 // -- This file was mechanically generated: Do not edit! -- //
  46 
  47 /**
  48  * A specialized {@link Vector} representing an ordered immutable sequence of
  49  * {@code byte} values.
  50  */
  51 @SuppressWarnings("cast")  // warning: redundant cast
  52 public abstract class ByteVector extends AbstractVector<Byte> {
  53 
  54     ByteVector(byte[] vec) {
  55         super(vec);
  56     }
  57 
  58     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  59 
  60     @ForceInline
  61     static int opCode(Operator op) {
  62         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  63     }
  64     @ForceInline
  65     static int opCode(Operator op, int requireKind) {
  66         requireKind |= VO_OPCODE_VALID;
  67         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  68     }
  69     @ForceInline
  70     static boolean opKind(Operator op, int bit) {
  71         return VectorOperators.opKind(op, bit);
  72     }
  73 
  74     // Virtualized factories and operators,
  75     // coded with portable definitions.
  76     // These are all @ForceInline in case
  77     // they need to be used performantly.
  78     // The various shape-specific subclasses
  79     // also specialize them by wrapping
  80     // them in a call like this:
  81     //    return (Byte128Vector)
  82     //       super.bOp((Byte128Vector) o);
  83     // The purpose of that is to forcibly inline
  84     // the generic definition from this file
  85     // into a sharply type- and size-specific
  86     // wrapper in the subclass file, so that
  87     // the JIT can specialize the code.
  88     // The code is only inlined and expanded
  89     // if it gets hot.  Think of it as a cheap
  90     // and lazy version of C++ templates.
  91 
  92     // Virtualized getter
  93 
  94     /*package-private*/
  95     abstract byte[] vec();
  96 
  97     // Virtualized constructors
  98 
  99     /**
 100      * Build a vector directly using my own constructor.
 101      * It is an error if the array is aliased elsewhere.
 102      */
 103     /*package-private*/
 104     abstract ByteVector vectorFactory(byte[] vec);
 105 
 106     /**
 107      * Build a mask directly using my species.
 108      * It is an error if the array is aliased elsewhere.
 109      */
 110     /*package-private*/
 111     @ForceInline
 112     final
 113     AbstractMask<Byte> maskFactory(boolean[] bits) {
 114         return vspecies().maskFactory(bits);
 115     }
 116 
 117     // Constant loader (takes dummy as vector arg)
 118     interface FVOp {
 119         byte apply(int i);
 120     }
 121 
 122     /*package-private*/
 123     @ForceInline
 124     final
 125     ByteVector vOp(FVOp f) {
 126         byte[] res = new byte[length()];
 127         for (int i = 0; i < res.length; i++) {
 128             res[i] = f.apply(i);
 129         }
 130         return vectorFactory(res);
 131     }
 132 
 133     @ForceInline
 134     final
 135     ByteVector vOp(VectorMask<Byte> m, FVOp f) {
 136         byte[] res = new byte[length()];
 137         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 138         for (int i = 0; i < res.length; i++) {
 139             if (mbits[i]) {
 140                 res[i] = f.apply(i);
 141             }
 142         }
 143         return vectorFactory(res);
 144     }
 145 
 146     // Unary operator
 147 
 148     /*package-private*/
 149     interface FUnOp {
 150         byte apply(int i, byte a);
 151     }
 152 
 153     /*package-private*/
 154     abstract
 155     ByteVector uOp(FUnOp f);
 156     @ForceInline
 157     final
 158     ByteVector uOpTemplate(FUnOp f) {
 159         byte[] vec = vec();
 160         byte[] res = new byte[length()];
 161         for (int i = 0; i < res.length; i++) {
 162             res[i] = f.apply(i, vec[i]);
 163         }
 164         return vectorFactory(res);
 165     }
 166 
 167     /*package-private*/
 168     abstract
 169     ByteVector uOp(VectorMask<Byte> m,
 170                              FUnOp f);
 171     @ForceInline
 172     final
 173     ByteVector uOpTemplate(VectorMask<Byte> m,
 174                                      FUnOp f) {
 175         if (m == null) {
 176             return uOpTemplate(f);
 177         }
 178         byte[] vec = vec();
 179         byte[] res = new byte[length()];
 180         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 181         for (int i = 0; i < res.length; i++) {
 182             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 183         }
 184         return vectorFactory(res);
 185     }
 186 
 187     // Binary operator
 188 
 189     /*package-private*/
 190     interface FBinOp {
 191         byte apply(int i, byte a, byte b);
 192     }
 193 
 194     /*package-private*/
 195     abstract
 196     ByteVector bOp(Vector<Byte> o,
 197                              FBinOp f);
 198     @ForceInline
 199     final
 200     ByteVector bOpTemplate(Vector<Byte> o,
 201                                      FBinOp f) {
 202         byte[] res = new byte[length()];
 203         byte[] vec1 = this.vec();
 204         byte[] vec2 = ((ByteVector)o).vec();
 205         for (int i = 0; i < res.length; i++) {
 206             res[i] = f.apply(i, vec1[i], vec2[i]);
 207         }
 208         return vectorFactory(res);
 209     }
 210 
 211     /*package-private*/
 212     abstract
 213     ByteVector bOp(Vector<Byte> o,
 214                              VectorMask<Byte> m,
 215                              FBinOp f);
 216     @ForceInline
 217     final
 218     ByteVector bOpTemplate(Vector<Byte> o,
 219                                      VectorMask<Byte> m,
 220                                      FBinOp f) {
 221         if (m == null) {
 222             return bOpTemplate(o, f);
 223         }
 224         byte[] res = new byte[length()];
 225         byte[] vec1 = this.vec();
 226         byte[] vec2 = ((ByteVector)o).vec();
 227         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 228         for (int i = 0; i < res.length; i++) {
 229             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 230         }
 231         return vectorFactory(res);
 232     }
 233 
 234     // Ternary operator
 235 
 236     /*package-private*/
 237     interface FTriOp {
 238         byte apply(int i, byte a, byte b, byte c);
 239     }
 240 
 241     /*package-private*/
 242     abstract
 243     ByteVector tOp(Vector<Byte> o1,
 244                              Vector<Byte> o2,
 245                              FTriOp f);
 246     @ForceInline
 247     final
 248     ByteVector tOpTemplate(Vector<Byte> o1,
 249                                      Vector<Byte> o2,
 250                                      FTriOp f) {
 251         byte[] res = new byte[length()];
 252         byte[] vec1 = this.vec();
 253         byte[] vec2 = ((ByteVector)o1).vec();
 254         byte[] vec3 = ((ByteVector)o2).vec();
 255         for (int i = 0; i < res.length; i++) {
 256             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 257         }
 258         return vectorFactory(res);
 259     }
 260 
 261     /*package-private*/
 262     abstract
 263     ByteVector tOp(Vector<Byte> o1,
 264                              Vector<Byte> o2,
 265                              VectorMask<Byte> m,
 266                              FTriOp f);
 267     @ForceInline
 268     final
 269     ByteVector tOpTemplate(Vector<Byte> o1,
 270                                      Vector<Byte> o2,
 271                                      VectorMask<Byte> m,
 272                                      FTriOp f) {
 273         if (m == null) {
 274             return tOpTemplate(o1, o2, f);
 275         }
 276         byte[] res = new byte[length()];
 277         byte[] vec1 = this.vec();
 278         byte[] vec2 = ((ByteVector)o1).vec();
 279         byte[] vec3 = ((ByteVector)o2).vec();
 280         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 281         for (int i = 0; i < res.length; i++) {
 282             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 283         }
 284         return vectorFactory(res);
 285     }
 286 
 287     // Reduction operator
 288 
 289     /*package-private*/
 290     abstract
 291     byte rOp(byte v, VectorMask<Byte> m, FBinOp f);
 292 
 293     @ForceInline
 294     final
 295     byte rOpTemplate(byte v, VectorMask<Byte> m, FBinOp f) {
 296         if (m == null) {
 297             return rOpTemplate(v, f);
 298         }
 299         byte[] vec = vec();
 300         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 301         for (int i = 0; i < vec.length; i++) {
 302             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
 303         }
 304         return v;
 305     }
 306 
 307     @ForceInline
 308     final
 309     byte rOpTemplate(byte v, FBinOp f) {
 310         byte[] vec = vec();
 311         for (int i = 0; i < vec.length; i++) {
 312             v = f.apply(i, v, vec[i]);
 313         }
 314         return v;
 315     }
 316 
 317     // Memory reference
 318 
 319     /*package-private*/
 320     interface FLdOp<M> {
 321         byte apply(M memory, int offset, int i);
 322     }
 323 
 324     /*package-private*/
 325     @ForceInline
 326     final
 327     <M> ByteVector ldOp(M memory, int offset,
 328                                   FLdOp<M> f) {
 329         //dummy; no vec = vec();
 330         byte[] res = new byte[length()];
 331         for (int i = 0; i < res.length; i++) {
 332             res[i] = f.apply(memory, offset, i);
 333         }
 334         return vectorFactory(res);
 335     }
 336 
 337     /*package-private*/
 338     @ForceInline
 339     final
 340     <M> ByteVector ldOp(M memory, int offset,
 341                                   VectorMask<Byte> m,
 342                                   FLdOp<M> f) {
 343         //byte[] vec = vec();
 344         byte[] res = new byte[length()];
 345         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 346         for (int i = 0; i < res.length; i++) {
 347             if (mbits[i]) {
 348                 res[i] = f.apply(memory, offset, i);
 349             }
 350         }
 351         return vectorFactory(res);
 352     }
 353 
 354     interface FStOp<M> {
 355         void apply(M memory, int offset, int i, byte a);
 356     }
 357 
 358     /*package-private*/
 359     @ForceInline
 360     final
 361     <M> void stOp(M memory, int offset,
 362                   FStOp<M> f) {
 363         byte[] vec = vec();
 364         for (int i = 0; i < vec.length; i++) {
 365             f.apply(memory, offset, i, vec[i]);
 366         }
 367     }
 368 
 369     /*package-private*/
 370     @ForceInline
 371     final
 372     <M> void stOp(M memory, int offset,
 373                   VectorMask<Byte> m,
 374                   FStOp<M> f) {
 375         byte[] vec = vec();
 376         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 377         for (int i = 0; i < vec.length; i++) {
 378             if (mbits[i]) {
 379                 f.apply(memory, offset, i, vec[i]);
 380             }
 381         }
 382     }
 383 
 384     // Binary test
 385 
 386     /*package-private*/
 387     interface FBinTest {
 388         boolean apply(int cond, int i, byte a, byte b);
 389     }
 390 
 391     /*package-private*/
 392     @ForceInline
 393     final
 394     AbstractMask<Byte> bTest(int cond,
 395                                   Vector<Byte> o,
 396                                   FBinTest f) {
 397         byte[] vec1 = vec();
 398         byte[] vec2 = ((ByteVector)o).vec();
 399         boolean[] bits = new boolean[length()];
 400         for (int i = 0; i < length(); i++){
 401             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 402         }
 403         return maskFactory(bits);
 404     }
 405 
 406     /*package-private*/
 407     @ForceInline
 408     static byte rotateLeft(byte a, int n) {
 409         return (byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (Byte.SIZE - (n & Byte.SIZE-1))));
 410     }
 411 
 412     /*package-private*/
 413     @ForceInline
 414     static byte rotateRight(byte a, int n) {
 415         return (byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (Byte.SIZE - (n & Byte.SIZE-1))));
 416     }
 417 
 418     /*package-private*/
 419     @Override
 420     abstract ByteSpecies vspecies();
 421 
 422     /*package-private*/
 423     @ForceInline
 424     static long toBits(byte e) {
 425         return  e;
 426     }
 427 
 428     /*package-private*/
 429     @ForceInline
 430     static byte fromBits(long bits) {
 431         return ((byte)bits);
 432     }
 433 
 434     // Static factories (other than memory operations)
 435 
 436     // Note: A surprising behavior in javadoc
 437     // sometimes makes a lone /** {@inheritDoc} */
 438     // comment drop the method altogether,
 439     // apparently if the method mentions an
 440     // parameter or return type of Vector<Byte>
 441     // instead of Vector<E> as originally specified.
 442     // Adding an empty HTML fragment appears to
 443     // nudge javadoc into providing the desired
 444     // inherited documentation.  We use the HTML
 445     // comment <!--workaround--> for this.
 446 
 447     /**
 448      * Returns a vector of the given species
 449      * where all lane elements are set to
 450      * zero, the default primitive value.
 451      *
 452      * @param species species of the desired zero vector
 453      * @return a zero vector
 454      */
 455     @ForceInline
 456     public static ByteVector zero(VectorSpecies<Byte> species) {
 457         ByteSpecies vsp = (ByteSpecies) species;
 458         return VectorSupport.broadcastCoerced(vsp.vectorType(), byte.class, species.length(),
 459                                 0, vsp,
 460                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 461     }
 462 
 463     /**
 464      * Returns a vector of the same species as this one
 465      * where all lane elements are set to
 466      * the primitive value {@code e}.
 467      *
 468      * The contents of the current vector are discarded;
 469      * only the species is relevant to this operation.
 470      *
 471      * <p> This method returns the value of this expression:
 472      * {@code ByteVector.broadcast(this.species(), e)}.
 473      *
 474      * @apiNote
 475      * Unlike the similar method named {@code broadcast()}
 476      * in the supertype {@code Vector}, this method does not
 477      * need to validate its argument, and cannot throw
 478      * {@code IllegalArgumentException}.  This method is
 479      * therefore preferable to the supertype method.
 480      *
 481      * @param e the value to broadcast
 482      * @return a vector where all lane elements are set to
 483      *         the primitive value {@code e}
 484      * @see #broadcast(VectorSpecies,long)
 485      * @see Vector#broadcast(long)
 486      * @see VectorSpecies#broadcast(long)
 487      */
 488     public abstract ByteVector broadcast(byte e);
 489 
 490     /**
 491      * Returns a vector of the given species
 492      * where all lane elements are set to
 493      * the primitive value {@code e}.
 494      *
 495      * @param species species of the desired vector
 496      * @param e the value to broadcast
 497      * @return a vector where all lane elements are set to
 498      *         the primitive value {@code e}
 499      * @see #broadcast(long)
 500      * @see Vector#broadcast(long)
 501      * @see VectorSpecies#broadcast(long)
 502      */
 503     @ForceInline
 504     public static ByteVector broadcast(VectorSpecies<Byte> species, byte e) {
 505         ByteSpecies vsp = (ByteSpecies) species;
 506         return vsp.broadcast(e);
 507     }
 508 
 509     /*package-private*/
 510     @ForceInline
 511     final ByteVector broadcastTemplate(byte e) {
 512         ByteSpecies vsp = vspecies();
 513         return vsp.broadcast(e);
 514     }
 515 
 516     /**
 517      * {@inheritDoc} <!--workaround-->
 518      * @apiNote
 519      * When working with vector subtypes like {@code ByteVector},
 520      * {@linkplain #broadcast(byte) the more strongly typed method}
 521      * is typically selected.  It can be explicitly selected
 522      * using a cast: {@code v.broadcast((byte)e)}.
 523      * The two expressions will produce numerically identical results.
 524      */
 525     @Override
 526     public abstract ByteVector broadcast(long e);
 527 
 528     /**
 529      * Returns a vector of the given species
 530      * where all lane elements are set to
 531      * the primitive value {@code e}.
 532      *
 533      * The {@code long} value must be accurately representable
 534      * by the {@code ETYPE} of the vector species, so that
 535      * {@code e==(long)(ETYPE)e}.
 536      *
 537      * @param species species of the desired vector
 538      * @param e the value to broadcast
 539      * @return a vector where all lane elements are set to
 540      *         the primitive value {@code e}
 541      * @throws IllegalArgumentException
 542      *         if the given {@code long} value cannot
 543      *         be represented by the vector's {@code ETYPE}
 544      * @see #broadcast(VectorSpecies,byte)
 545      * @see VectorSpecies#checkValue(long)
 546      */
 547     @ForceInline
 548     public static ByteVector broadcast(VectorSpecies<Byte> species, long e) {
 549         ByteSpecies vsp = (ByteSpecies) species;
 550         return vsp.broadcast(e);
 551     }
 552 
 553     /*package-private*/
 554     @ForceInline
 555     final ByteVector broadcastTemplate(long e) {
 556         return vspecies().broadcast(e);
 557     }
 558 
 559     // Unary lanewise support
 560 
 561     /**
 562      * {@inheritDoc} <!--workaround-->
 563      */
 564     public abstract
 565     ByteVector lanewise(VectorOperators.Unary op);
 566 
 567     @ForceInline
 568     final
 569     ByteVector lanewiseTemplate(VectorOperators.Unary op) {
 570         if (opKind(op, VO_SPECIAL)) {
 571             if (op == ZOMO) {
 572                 return blend(broadcast(-1), compare(NE, 0));
 573             }
 574             if (op == NOT) {
 575                 return broadcast(-1).lanewise(XOR, this);
 576             } else if (op == NEG) {
 577                 // FIXME: Support this in the JIT.
 578                 return broadcast(0).lanewise(SUB, this);
 579             }
 580         }
 581         int opc = opCode(op);
 582         return VectorSupport.unaryOp(
 583             opc, getClass(), null, byte.class, length(),
 584             this, null,
 585             UN_IMPL.find(op, opc, ByteVector::unaryOperations));
 586     }
 587 
 588     /**
 589      * {@inheritDoc} <!--workaround-->
 590      */
 591     @Override
 592     public abstract
 593     ByteVector lanewise(VectorOperators.Unary op,
 594                                   VectorMask<Byte> m);
 595     @ForceInline
 596     final
 597     ByteVector lanewiseTemplate(VectorOperators.Unary op,
 598                                           Class<? extends VectorMask<Byte>> maskClass,
 599                                           VectorMask<Byte> m) {
 600         m.check(maskClass, this);
 601         if (opKind(op, VO_SPECIAL)) {
 602             if (op == ZOMO) {
 603                 return blend(broadcast(-1), compare(NE, 0, m));
 604             }
 605             if (op == NOT) {
 606                 return lanewise(XOR, broadcast(-1), m);
 607             } else if (op == NEG) {
 608                 return lanewise(NOT, m).lanewise(ADD, broadcast(1), m);
 609             }
 610         }
 611         int opc = opCode(op);
 612         return VectorSupport.unaryOp(
 613             opc, getClass(), maskClass, byte.class, length(),
 614             this, m,
 615             UN_IMPL.find(op, opc, ByteVector::unaryOperations));
 616     }
 617 
 618     private static final
 619     ImplCache<Unary, UnaryOperation<ByteVector, VectorMask<Byte>>>
 620         UN_IMPL = new ImplCache<>(Unary.class, ByteVector.class);
 621 
 622     private static UnaryOperation<ByteVector, VectorMask<Byte>> unaryOperations(int opc_) {
 623         switch (opc_) {
 624             case VECTOR_OP_NEG: return (v0, m) ->
 625                     v0.uOp(m, (i, a) -> (byte) -a);
 626             case VECTOR_OP_ABS: return (v0, m) ->
 627                     v0.uOp(m, (i, a) -> (byte) Math.abs(a));
 628             default: return null;
 629         }
 630     }
 631 
 632     // Binary lanewise support
 633 
 634     /**
 635      * {@inheritDoc} <!--workaround-->
 636      * @see #lanewise(VectorOperators.Binary,byte)
 637      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 638      */
 639     @Override
 640     public abstract
 641     ByteVector lanewise(VectorOperators.Binary op,
 642                                   Vector<Byte> v);
 643     @ForceInline
 644     final
 645     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 646                                           Vector<Byte> v) {
 647         ByteVector that = (ByteVector) v;
 648         that.check(this);
 649 
 650         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 651             if (op == FIRST_NONZERO) {
 652                 // FIXME: Support this in the JIT.
 653                 VectorMask<Byte> thisNZ
 654                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 655                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 656                 op = OR_UNCHECKED;
 657             }
 658             if (opKind(op, VO_SHIFT)) {
 659                 // As per shift specification for Java, mask the shift count.
 660                 // This allows the JIT to ignore some ISA details.
 661                 that = that.lanewise(AND, SHIFT_MASK);
 662             }
 663             if (op == AND_NOT) {
 664                 // FIXME: Support this in the JIT.
 665                 that = that.lanewise(NOT);
 666                 op = AND;
 667             } else if (op == DIV) {
 668                 VectorMask<Byte> eqz = that.eq((byte) 0);
 669                 if (eqz.anyTrue()) {
 670                     throw that.divZeroException();
 671                 }
 672             }
 673         }
 674 
 675         int opc = opCode(op);
 676         return VectorSupport.binaryOp(
 677             opc, getClass(), null, byte.class, length(),
 678             this, that, null,
 679             BIN_IMPL.find(op, opc, ByteVector::binaryOperations));
 680     }
 681 
 682     /**
 683      * {@inheritDoc} <!--workaround-->
 684      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 685      */
 686     @Override
 687     public abstract
 688     ByteVector lanewise(VectorOperators.Binary op,
 689                                   Vector<Byte> v,
 690                                   VectorMask<Byte> m);
 691     @ForceInline
 692     final
 693     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 694                                           Class<? extends VectorMask<Byte>> maskClass,
 695                                           Vector<Byte> v, VectorMask<Byte> m) {
 696         ByteVector that = (ByteVector) v;
 697         that.check(this);
 698         m.check(maskClass, this);
 699 
 700         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 701             if (op == FIRST_NONZERO) {
 702                 // FIXME: Support this in the JIT.
 703                 VectorMask<Byte> thisNZ
 704                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 705                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 706                 op = OR_UNCHECKED;
 707             }
 708             if (opKind(op, VO_SHIFT)) {
 709                 // As per shift specification for Java, mask the shift count.
 710                 // This allows the JIT to ignore some ISA details.
 711                 that = that.lanewise(AND, SHIFT_MASK);
 712             }
 713             if (op == AND_NOT) {
 714                 // FIXME: Support this in the JIT.
 715                 that = that.lanewise(NOT);
 716                 op = AND;
 717             } else if (op == DIV) {
 718                 VectorMask<Byte> eqz = that.eq((byte)0);
 719                 if (eqz.and(m).anyTrue()) {
 720                     throw that.divZeroException();
 721                 }
 722                 // suppress div/0 exceptions in unset lanes
 723                 that = that.lanewise(NOT, eqz);
 724             }
 725         }
 726 
 727         int opc = opCode(op);
 728         return VectorSupport.binaryOp(
 729             opc, getClass(), maskClass, byte.class, length(),
 730             this, that, m,
 731             BIN_IMPL.find(op, opc, ByteVector::binaryOperations));
 732     }
 733 
 734     private static final
 735     ImplCache<Binary, BinaryOperation<ByteVector, VectorMask<Byte>>>
 736         BIN_IMPL = new ImplCache<>(Binary.class, ByteVector.class);
 737 
 738     private static BinaryOperation<ByteVector, VectorMask<Byte>> binaryOperations(int opc_) {
 739         switch (opc_) {
 740             case VECTOR_OP_ADD: return (v0, v1, vm) ->
 741                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a + b));
 742             case VECTOR_OP_SUB: return (v0, v1, vm) ->
 743                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a - b));
 744             case VECTOR_OP_MUL: return (v0, v1, vm) ->
 745                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a * b));
 746             case VECTOR_OP_DIV: return (v0, v1, vm) ->
 747                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a / b));
 748             case VECTOR_OP_MAX: return (v0, v1, vm) ->
 749                     v0.bOp(v1, vm, (i, a, b) -> (byte)Math.max(a, b));
 750             case VECTOR_OP_MIN: return (v0, v1, vm) ->
 751                     v0.bOp(v1, vm, (i, a, b) -> (byte)Math.min(a, b));
 752             case VECTOR_OP_AND: return (v0, v1, vm) ->
 753                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a & b));
 754             case VECTOR_OP_OR: return (v0, v1, vm) ->
 755                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a | b));
 756             case VECTOR_OP_XOR: return (v0, v1, vm) ->
 757                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a ^ b));
 758             case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
 759                     v0.bOp(v1, vm, (i, a, n) -> (byte)(a << n));
 760             case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
 761                     v0.bOp(v1, vm, (i, a, n) -> (byte)(a >> n));
 762             case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
 763                     v0.bOp(v1, vm, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 764             case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
 765                     v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
 766             case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
 767                     v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
 768             default: return null;
 769         }
 770     }
 771 
 772     // FIXME: Maybe all of the public final methods in this file (the
 773     // simple ones that just call lanewise) should be pushed down to
 774     // the X-VectorBits template.  They can't optimize properly at
 775     // this level, and must rely on inlining.  Does it work?
 776     // (If it works, of course keep the code here.)
 777 
 778     /**
 779      * Combines the lane values of this vector
 780      * with the value of a broadcast scalar.
 781      *
 782      * This is a lane-wise binary operation which applies
 783      * the selected operation to each lane.
 784      * The return value will be equal to this expression:
 785      * {@code this.lanewise(op, this.broadcast(e))}.
 786      *
 787      * @param op the operation used to process lane values
 788      * @param e the input scalar
 789      * @return the result of applying the operation lane-wise
 790      *         to the two input vectors
 791      * @throws UnsupportedOperationException if this vector does
 792      *         not support the requested operation
 793      * @see #lanewise(VectorOperators.Binary,Vector)
 794      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 795      */
 796     @ForceInline
 797     public final
 798     ByteVector lanewise(VectorOperators.Binary op,
 799                                   byte e) {
 800         if (opKind(op, VO_SHIFT) && (byte)(int)e == e) {
 801             return lanewiseShift(op, (int) e);
 802         }
 803         if (op == AND_NOT) {
 804             op = AND; e = (byte) ~e;
 805         }
 806         return lanewise(op, broadcast(e));
 807     }
 808 
 809     /**
 810      * Combines the lane values of this vector
 811      * with the value of a broadcast scalar,
 812      * with selection of lane elements controlled by a mask.
 813      *
 814      * This is a masked lane-wise binary operation which applies
 815      * the selected operation to each lane.
 816      * The return value will be equal to this expression:
 817      * {@code this.lanewise(op, this.broadcast(e), m)}.
 818      *
 819      * @param op the operation used to process lane values
 820      * @param e the input scalar
 821      * @param m the mask controlling lane selection
 822      * @return the result of applying the operation lane-wise
 823      *         to the input vector and the scalar
 824      * @throws UnsupportedOperationException if this vector does
 825      *         not support the requested operation
 826      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 827      * @see #lanewise(VectorOperators.Binary,byte)
 828      */
 829     @ForceInline
 830     public final
 831     ByteVector lanewise(VectorOperators.Binary op,
 832                                   byte e,
 833                                   VectorMask<Byte> m) {
 834         if (opKind(op, VO_SHIFT) && (byte)(int)e == e) {
 835             return lanewiseShift(op, (int) e, m);
 836         }
 837         if (op == AND_NOT) {
 838             op = AND; e = (byte) ~e;
 839         }
 840         return lanewise(op, broadcast(e), m);
 841     }
 842 
 843     /**
 844      * {@inheritDoc} <!--workaround-->
 845      * @apiNote
 846      * When working with vector subtypes like {@code ByteVector},
 847      * {@linkplain #lanewise(VectorOperators.Binary,byte)
 848      * the more strongly typed method}
 849      * is typically selected.  It can be explicitly selected
 850      * using a cast: {@code v.lanewise(op,(byte)e)}.
 851      * The two expressions will produce numerically identical results.
 852      */
 853     @ForceInline
 854     public final
 855     ByteVector lanewise(VectorOperators.Binary op,
 856                                   long e) {
 857         byte e1 = (byte) e;
 858         if ((long)e1 != e
 859             // allow shift ops to clip down their int parameters
 860             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 861             vspecies().checkValue(e);  // for exception
 862         }
 863         return lanewise(op, e1);
 864     }
 865 
 866     /**
 867      * {@inheritDoc} <!--workaround-->
 868      * @apiNote
 869      * When working with vector subtypes like {@code ByteVector},
 870      * {@linkplain #lanewise(VectorOperators.Binary,byte,VectorMask)
 871      * the more strongly typed method}
 872      * is typically selected.  It can be explicitly selected
 873      * using a cast: {@code v.lanewise(op,(byte)e,m)}.
 874      * The two expressions will produce numerically identical results.
 875      */
 876     @ForceInline
 877     public final
 878     ByteVector lanewise(VectorOperators.Binary op,
 879                                   long e, VectorMask<Byte> m) {
 880         byte e1 = (byte) e;
 881         if ((long)e1 != e
 882             // allow shift ops to clip down their int parameters
 883             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 884             vspecies().checkValue(e);  // for exception
 885         }
 886         return lanewise(op, e1, m);
 887     }
 888 
 889     /*package-private*/
 890     abstract ByteVector
 891     lanewiseShift(VectorOperators.Binary op, int e);
 892 
 893     /*package-private*/
 894     @ForceInline
 895     final ByteVector
 896     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 897         // Special handling for these.  FIXME: Refactor?
 898         assert(opKind(op, VO_SHIFT));
 899         // As per shift specification for Java, mask the shift count.
 900         e &= SHIFT_MASK;
 901         int opc = opCode(op);
 902         return VectorSupport.broadcastInt(
 903             opc, getClass(), null, byte.class, length(),
 904             this, e, null,
 905             BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations));
 906     }
 907 
 908     /*package-private*/
 909     abstract ByteVector
 910     lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Byte> m);
 911 
 912     /*package-private*/
 913     @ForceInline
 914     final ByteVector
 915     lanewiseShiftTemplate(VectorOperators.Binary op,
 916                           Class<? extends VectorMask<Byte>> maskClass,
 917                           int e, VectorMask<Byte> m) {
 918         m.check(maskClass, this);
 919         assert(opKind(op, VO_SHIFT));
 920         // As per shift specification for Java, mask the shift count.
 921         e &= SHIFT_MASK;
 922         int opc = opCode(op);
 923         return VectorSupport.broadcastInt(
 924             opc, getClass(), maskClass, byte.class, length(),
 925             this, e, m,
 926             BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations));
 927     }
 928 
 929     private static final
 930     ImplCache<Binary,VectorBroadcastIntOp<ByteVector, VectorMask<Byte>>> BIN_INT_IMPL
 931         = new ImplCache<>(Binary.class, ByteVector.class);
 932 
 933     private static VectorBroadcastIntOp<ByteVector, VectorMask<Byte>> broadcastIntOperations(int opc_) {
 934         switch (opc_) {
 935             case VECTOR_OP_LSHIFT: return (v, n, m) ->
 936                     v.uOp(m, (i, a) -> (byte)(a << n));
 937             case VECTOR_OP_RSHIFT: return (v, n, m) ->
 938                     v.uOp(m, (i, a) -> (byte)(a >> n));
 939             case VECTOR_OP_URSHIFT: return (v, n, m) ->
 940                     v.uOp(m, (i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 941             case VECTOR_OP_LROTATE: return (v, n, m) ->
 942                     v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
 943             case VECTOR_OP_RROTATE: return (v, n, m) ->
 944                     v.uOp(m, (i, a) -> rotateRight(a, (int)n));
 945             default: return null;
 946         }
 947     }
 948 
 949     // As per shift specification for Java, mask the shift count.
 950     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 951     // The latter two maskings go beyond the JLS, but seem reasonable
 952     // since our lane types are first-class types, not just dressed
 953     // up ints.
 954     private static final int SHIFT_MASK = (Byte.SIZE - 1);
 955     // Also simulate >>> on sub-word variables with a mask.
 956     private static final int LSHR_SETUP_MASK = ((1 << Byte.SIZE) - 1);
 957 
 958     // Ternary lanewise support
 959 
 960     // Ternary operators come in eight variations:
 961     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 962     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 963 
 964     // It is annoying to support all of these variations of masking
 965     // and broadcast, but it would be more surprising not to continue
 966     // the obvious pattern started by unary and binary.
 967 
 968    /**
 969      * {@inheritDoc} <!--workaround-->
 970      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
 971      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
 972      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
 973      * @see #lanewise(VectorOperators.Ternary,byte,byte)
 974      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
 975      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
 976      */
 977     @Override
 978     public abstract
 979     ByteVector lanewise(VectorOperators.Ternary op,
 980                                                   Vector<Byte> v1,
 981                                                   Vector<Byte> v2);
 982     @ForceInline
 983     final
 984     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
 985                                           Vector<Byte> v1,
 986                                           Vector<Byte> v2) {
 987         ByteVector that = (ByteVector) v1;
 988         ByteVector tother = (ByteVector) v2;
 989         // It's a word: https://www.dictionary.com/browse/tother
 990         // See also Chapter 11 of Dickens, Our Mutual Friend:
 991         // "Totherest Governor," replied Mr Riderhood...
 992         that.check(this);
 993         tother.check(this);
 994         if (op == BITWISE_BLEND) {
 995             // FIXME: Support this in the JIT.
 996             that = this.lanewise(XOR, that).lanewise(AND, tother);
 997             return this.lanewise(XOR, that);
 998         }
 999         int opc = opCode(op);
1000         return VectorSupport.ternaryOp(
1001             opc, getClass(), null, byte.class, length(),
1002             this, that, tother, null,
1003             TERN_IMPL.find(op, opc, ByteVector::ternaryOperations));
1004     }
1005 
1006     /**
1007      * {@inheritDoc} <!--workaround-->
1008      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1009      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
1010      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1011      */
1012     @Override
1013     public abstract
1014     ByteVector lanewise(VectorOperators.Ternary op,
1015                                   Vector<Byte> v1,
1016                                   Vector<Byte> v2,
1017                                   VectorMask<Byte> m);
1018     @ForceInline
1019     final
1020     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
1021                                           Class<? extends VectorMask<Byte>> maskClass,
1022                                           Vector<Byte> v1,
1023                                           Vector<Byte> v2,
1024                                           VectorMask<Byte> m) {
1025         ByteVector that = (ByteVector) v1;
1026         ByteVector tother = (ByteVector) v2;
1027         // It's a word: https://www.dictionary.com/browse/tother
1028         // See also Chapter 11 of Dickens, Our Mutual Friend:
1029         // "Totherest Governor," replied Mr Riderhood...
1030         that.check(this);
1031         tother.check(this);
1032         m.check(maskClass, this);
1033 
1034         if (op == BITWISE_BLEND) {
1035             // FIXME: Support this in the JIT.
1036             that = this.lanewise(XOR, that).lanewise(AND, tother);
1037             return this.lanewise(XOR, that, m);
1038         }
1039         int opc = opCode(op);
1040         return VectorSupport.ternaryOp(
1041             opc, getClass(), maskClass, byte.class, length(),
1042             this, that, tother, m,
1043             TERN_IMPL.find(op, opc, ByteVector::ternaryOperations));
1044     }
1045 
1046     private static final
1047     ImplCache<Ternary, TernaryOperation<ByteVector, VectorMask<Byte>>>
1048         TERN_IMPL = new ImplCache<>(Ternary.class, ByteVector.class);
1049 
1050     private static TernaryOperation<ByteVector, VectorMask<Byte>> ternaryOperations(int opc_) {
1051         switch (opc_) {
1052             default: return null;
1053         }
1054     }
1055 
1056     /**
1057      * Combines the lane values of this vector
1058      * with the values of two broadcast scalars.
1059      *
1060      * This is a lane-wise ternary operation which applies
1061      * the selected operation to each lane.
1062      * The return value will be equal to this expression:
1063      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
1064      *
1065      * @param op the operation used to combine lane values
1066      * @param e1 the first input scalar
1067      * @param e2 the second input scalar
1068      * @return the result of applying the operation lane-wise
1069      *         to the input vector and the scalars
1070      * @throws UnsupportedOperationException if this vector does
1071      *         not support the requested operation
1072      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1073      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1074      */
1075     @ForceInline
1076     public final
1077     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
1078                                   byte e1,
1079                                   byte e2) {
1080         return lanewise(op, broadcast(e1), broadcast(e2));
1081     }
1082 
1083     /**
1084      * Combines the lane values of this vector
1085      * with the values of two broadcast scalars,
1086      * with selection of lane elements controlled by a mask.
1087      *
1088      * This is a masked lane-wise ternary operation which applies
1089      * the selected operation to each lane.
1090      * The return value will be equal to this expression:
1091      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1092      *
1093      * @param op the operation used to combine lane values
1094      * @param e1 the first input scalar
1095      * @param e2 the second input scalar
1096      * @param m the mask controlling lane selection
1097      * @return the result of applying the operation lane-wise
1098      *         to the input vector and the scalars
1099      * @throws UnsupportedOperationException if this vector does
1100      *         not support the requested operation
1101      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1102      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1103      */
1104     @ForceInline
1105     public final
1106     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1107                                   byte e1,
1108                                   byte e2,
1109                                   VectorMask<Byte> m) {
1110         return lanewise(op, broadcast(e1), broadcast(e2), m);
1111     }
1112 
1113     /**
1114      * Combines the lane values of this vector
1115      * with the values of another vector and a broadcast scalar.
1116      *
1117      * This is a lane-wise ternary operation which applies
1118      * the selected operation to each lane.
1119      * The return value will be equal to this expression:
1120      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1121      *
1122      * @param op the operation used to combine lane values
1123      * @param v1 the other input vector
1124      * @param e2 the input scalar
1125      * @return the result of applying the operation lane-wise
1126      *         to the input vectors and the scalar
1127      * @throws UnsupportedOperationException if this vector does
1128      *         not support the requested operation
1129      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1130      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
1131      */
1132     @ForceInline
1133     public final
1134     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1135                                   Vector<Byte> v1,
1136                                   byte e2) {
1137         return lanewise(op, v1, broadcast(e2));
1138     }
1139 
1140     /**
1141      * Combines the lane values of this vector
1142      * with the values of another vector and a broadcast scalar,
1143      * with selection of lane elements controlled by a mask.
1144      *
1145      * This is a masked lane-wise ternary operation which applies
1146      * the selected operation to each lane.
1147      * The return value will be equal to this expression:
1148      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1149      *
1150      * @param op the operation used to combine lane values
1151      * @param v1 the other input vector
1152      * @param e2 the input scalar
1153      * @param m the mask controlling lane selection
1154      * @return the result of applying the operation lane-wise
1155      *         to the input vectors and the scalar
1156      * @throws UnsupportedOperationException if this vector does
1157      *         not support the requested operation
1158      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1159      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1160      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
1161      */
1162     @ForceInline
1163     public final
1164     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1165                                   Vector<Byte> v1,
1166                                   byte e2,
1167                                   VectorMask<Byte> m) {
1168         return lanewise(op, v1, broadcast(e2), m);
1169     }
1170 
1171     /**
1172      * Combines the lane values of this vector
1173      * with the values of another vector and a broadcast scalar.
1174      *
1175      * This is a lane-wise ternary operation which applies
1176      * the selected operation to each lane.
1177      * The return value will be equal to this expression:
1178      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1179      *
1180      * @param op the operation used to combine lane values
1181      * @param e1 the input scalar
1182      * @param v2 the other input vector
1183      * @return the result of applying the operation lane-wise
1184      *         to the input vectors and the scalar
1185      * @throws UnsupportedOperationException if this vector does
1186      *         not support the requested operation
1187      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1188      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1189      */
1190     @ForceInline
1191     public final
1192     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1193                                   byte e1,
1194                                   Vector<Byte> v2) {
1195         return lanewise(op, broadcast(e1), v2);
1196     }
1197 
1198     /**
1199      * Combines the lane values of this vector
1200      * with the values of another vector and a broadcast scalar,
1201      * with selection of lane elements controlled by a mask.
1202      *
1203      * This is a masked lane-wise ternary operation which applies
1204      * the selected operation to each lane.
1205      * The return value will be equal to this expression:
1206      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1207      *
1208      * @param op the operation used to combine lane values
1209      * @param e1 the input scalar
1210      * @param v2 the other input vector
1211      * @param m the mask controlling lane selection
1212      * @return the result of applying the operation lane-wise
1213      *         to the input vectors and the scalar
1214      * @throws UnsupportedOperationException if this vector does
1215      *         not support the requested operation
1216      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1217      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
1218      */
1219     @ForceInline
1220     public final
1221     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1222                                   byte e1,
1223                                   Vector<Byte> v2,
1224                                   VectorMask<Byte> m) {
1225         return lanewise(op, broadcast(e1), v2, m);
1226     }
1227 
1228     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1229     // https://en.wikipedia.org/wiki/Ogdoad
1230 
1231     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1232     //
1233     // These include masked and non-masked versions.
1234     // This subclass adds broadcast (masked or not).
1235 
1236     /**
1237      * {@inheritDoc} <!--workaround-->
1238      * @see #add(byte)
1239      */
1240     @Override
1241     @ForceInline
1242     public final ByteVector add(Vector<Byte> v) {
1243         return lanewise(ADD, v);
1244     }
1245 
1246     /**
1247      * Adds this vector to the broadcast of an input scalar.
1248      *
1249      * This is a lane-wise binary operation which applies
1250      * the primitive addition operation ({@code +}) to each lane.
1251      *
1252      * This method is also equivalent to the expression
1253      * {@link #lanewise(VectorOperators.Binary,byte)
1254      *    lanewise}{@code (}{@link VectorOperators#ADD
1255      *    ADD}{@code , e)}.
1256      *
1257      * @param e the input scalar
1258      * @return the result of adding each lane of this vector to the scalar
1259      * @see #add(Vector)
1260      * @see #broadcast(byte)
1261      * @see #add(byte,VectorMask)
1262      * @see VectorOperators#ADD
1263      * @see #lanewise(VectorOperators.Binary,Vector)
1264      * @see #lanewise(VectorOperators.Binary,byte)
1265      */
1266     @ForceInline
1267     public final
1268     ByteVector add(byte e) {
1269         return lanewise(ADD, e);
1270     }
1271 
1272     /**
1273      * {@inheritDoc} <!--workaround-->
1274      * @see #add(byte,VectorMask)
1275      */
1276     @Override
1277     @ForceInline
1278     public final ByteVector add(Vector<Byte> v,
1279                                           VectorMask<Byte> m) {
1280         return lanewise(ADD, v, m);
1281     }
1282 
1283     /**
1284      * Adds this vector to the broadcast of an input scalar,
1285      * selecting lane elements controlled by a mask.
1286      *
1287      * This is a masked lane-wise binary operation which applies
1288      * the primitive addition operation ({@code +}) to each lane.
1289      *
1290      * This method is also equivalent to the expression
1291      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1292      *    lanewise}{@code (}{@link VectorOperators#ADD
1293      *    ADD}{@code , s, m)}.
1294      *
1295      * @param e the input scalar
1296      * @param m the mask controlling lane selection
1297      * @return the result of adding each lane of this vector to the scalar
1298      * @see #add(Vector,VectorMask)
1299      * @see #broadcast(byte)
1300      * @see #add(byte)
1301      * @see VectorOperators#ADD
1302      * @see #lanewise(VectorOperators.Binary,Vector)
1303      * @see #lanewise(VectorOperators.Binary,byte)
1304      */
1305     @ForceInline
1306     public final ByteVector add(byte e,
1307                                           VectorMask<Byte> m) {
1308         return lanewise(ADD, e, m);
1309     }
1310 
1311     /**
1312      * {@inheritDoc} <!--workaround-->
1313      * @see #sub(byte)
1314      */
1315     @Override
1316     @ForceInline
1317     public final ByteVector sub(Vector<Byte> v) {
1318         return lanewise(SUB, v);
1319     }
1320 
1321     /**
1322      * Subtracts an input scalar from this vector.
1323      *
1324      * This is a masked lane-wise binary operation which applies
1325      * the primitive subtraction operation ({@code -}) to each lane.
1326      *
1327      * This method is also equivalent to the expression
1328      * {@link #lanewise(VectorOperators.Binary,byte)
1329      *    lanewise}{@code (}{@link VectorOperators#SUB
1330      *    SUB}{@code , e)}.
1331      *
1332      * @param e the input scalar
1333      * @return the result of subtracting the scalar from each lane of this vector
1334      * @see #sub(Vector)
1335      * @see #broadcast(byte)
1336      * @see #sub(byte,VectorMask)
1337      * @see VectorOperators#SUB
1338      * @see #lanewise(VectorOperators.Binary,Vector)
1339      * @see #lanewise(VectorOperators.Binary,byte)
1340      */
1341     @ForceInline
1342     public final ByteVector sub(byte e) {
1343         return lanewise(SUB, e);
1344     }
1345 
1346     /**
1347      * {@inheritDoc} <!--workaround-->
1348      * @see #sub(byte,VectorMask)
1349      */
1350     @Override
1351     @ForceInline
1352     public final ByteVector sub(Vector<Byte> v,
1353                                           VectorMask<Byte> m) {
1354         return lanewise(SUB, v, m);
1355     }
1356 
1357     /**
1358      * Subtracts an input scalar from this vector
1359      * under the control of a mask.
1360      *
1361      * This is a masked lane-wise binary operation which applies
1362      * the primitive subtraction operation ({@code -}) to each lane.
1363      *
1364      * This method is also equivalent to the expression
1365      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1366      *    lanewise}{@code (}{@link VectorOperators#SUB
1367      *    SUB}{@code , s, m)}.
1368      *
1369      * @param e the input scalar
1370      * @param m the mask controlling lane selection
1371      * @return the result of subtracting the scalar from each lane of this vector
1372      * @see #sub(Vector,VectorMask)
1373      * @see #broadcast(byte)
1374      * @see #sub(byte)
1375      * @see VectorOperators#SUB
1376      * @see #lanewise(VectorOperators.Binary,Vector)
1377      * @see #lanewise(VectorOperators.Binary,byte)
1378      */
1379     @ForceInline
1380     public final ByteVector sub(byte e,
1381                                           VectorMask<Byte> m) {
1382         return lanewise(SUB, e, m);
1383     }
1384 
1385     /**
1386      * {@inheritDoc} <!--workaround-->
1387      * @see #mul(byte)
1388      */
1389     @Override
1390     @ForceInline
1391     public final ByteVector mul(Vector<Byte> v) {
1392         return lanewise(MUL, v);
1393     }
1394 
1395     /**
1396      * Multiplies this vector by the broadcast of an input scalar.
1397      *
1398      * This is a lane-wise binary operation which applies
1399      * the primitive multiplication operation ({@code *}) to each lane.
1400      *
1401      * This method is also equivalent to the expression
1402      * {@link #lanewise(VectorOperators.Binary,byte)
1403      *    lanewise}{@code (}{@link VectorOperators#MUL
1404      *    MUL}{@code , e)}.
1405      *
1406      * @param e the input scalar
1407      * @return the result of multiplying this vector by the given scalar
1408      * @see #mul(Vector)
1409      * @see #broadcast(byte)
1410      * @see #mul(byte,VectorMask)
1411      * @see VectorOperators#MUL
1412      * @see #lanewise(VectorOperators.Binary,Vector)
1413      * @see #lanewise(VectorOperators.Binary,byte)
1414      */
1415     @ForceInline
1416     public final ByteVector mul(byte e) {
1417         return lanewise(MUL, e);
1418     }
1419 
1420     /**
1421      * {@inheritDoc} <!--workaround-->
1422      * @see #mul(byte,VectorMask)
1423      */
1424     @Override
1425     @ForceInline
1426     public final ByteVector mul(Vector<Byte> v,
1427                                           VectorMask<Byte> m) {
1428         return lanewise(MUL, v, m);
1429     }
1430 
1431     /**
1432      * Multiplies this vector by the broadcast of an input scalar,
1433      * selecting lane elements controlled by a mask.
1434      *
1435      * This is a masked lane-wise binary operation which applies
1436      * the primitive multiplication operation ({@code *}) to each lane.
1437      *
1438      * This method is also equivalent to the expression
1439      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1440      *    lanewise}{@code (}{@link VectorOperators#MUL
1441      *    MUL}{@code , s, m)}.
1442      *
1443      * @param e the input scalar
1444      * @param m the mask controlling lane selection
1445      * @return the result of muling each lane of this vector to the scalar
1446      * @see #mul(Vector,VectorMask)
1447      * @see #broadcast(byte)
1448      * @see #mul(byte)
1449      * @see VectorOperators#MUL
1450      * @see #lanewise(VectorOperators.Binary,Vector)
1451      * @see #lanewise(VectorOperators.Binary,byte)
1452      */
1453     @ForceInline
1454     public final ByteVector mul(byte e,
1455                                           VectorMask<Byte> m) {
1456         return lanewise(MUL, e, m);
1457     }
1458 
1459     /**
1460      * {@inheritDoc} <!--workaround-->
1461      * @apiNote If there is a zero divisor, {@code
1462      * ArithmeticException} will be thrown.
1463      */
1464     @Override
1465     @ForceInline
1466     public final ByteVector div(Vector<Byte> v) {
1467         return lanewise(DIV, v);
1468     }
1469 
1470     /**
1471      * Divides this vector by the broadcast of an input scalar.
1472      *
1473      * This is a lane-wise binary operation which applies
1474      * the primitive division operation ({@code /}) to each lane.
1475      *
1476      * This method is also equivalent to the expression
1477      * {@link #lanewise(VectorOperators.Binary,byte)
1478      *    lanewise}{@code (}{@link VectorOperators#DIV
1479      *    DIV}{@code , e)}.
1480      *
1481      * @apiNote If there is a zero divisor, {@code
1482      * ArithmeticException} will be thrown.
1483      *
1484      * @param e the input scalar
1485      * @return the result of dividing each lane of this vector by the scalar
1486      * @see #div(Vector)
1487      * @see #broadcast(byte)
1488      * @see #div(byte,VectorMask)
1489      * @see VectorOperators#DIV
1490      * @see #lanewise(VectorOperators.Binary,Vector)
1491      * @see #lanewise(VectorOperators.Binary,byte)
1492      */
1493     @ForceInline
1494     public final ByteVector div(byte e) {
1495         return lanewise(DIV, e);
1496     }
1497 
1498     /**
1499      * {@inheritDoc} <!--workaround-->
1500      * @see #div(byte,VectorMask)
1501      * @apiNote If there is a zero divisor, {@code
1502      * ArithmeticException} will be thrown.
1503      */
1504     @Override
1505     @ForceInline
1506     public final ByteVector div(Vector<Byte> v,
1507                                           VectorMask<Byte> m) {
1508         return lanewise(DIV, v, m);
1509     }
1510 
1511     /**
1512      * Divides this vector by the broadcast of an input scalar,
1513      * selecting lane elements controlled by a mask.
1514      *
1515      * This is a masked lane-wise binary operation which applies
1516      * the primitive division operation ({@code /}) to each lane.
1517      *
1518      * This method is also equivalent to the expression
1519      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1520      *    lanewise}{@code (}{@link VectorOperators#DIV
1521      *    DIV}{@code , s, m)}.
1522      *
1523      * @apiNote If there is a zero divisor, {@code
1524      * ArithmeticException} will be thrown.
1525      *
1526      * @param e the input scalar
1527      * @param m the mask controlling lane selection
1528      * @return the result of dividing each lane of this vector by the scalar
1529      * @see #div(Vector,VectorMask)
1530      * @see #broadcast(byte)
1531      * @see #div(byte)
1532      * @see VectorOperators#DIV
1533      * @see #lanewise(VectorOperators.Binary,Vector)
1534      * @see #lanewise(VectorOperators.Binary,byte)
1535      */
1536     @ForceInline
1537     public final ByteVector div(byte e,
1538                                           VectorMask<Byte> m) {
1539         return lanewise(DIV, e, m);
1540     }
1541 
1542     /// END OF FULL-SERVICE BINARY METHODS
1543 
1544     /// SECOND-TIER BINARY METHODS
1545     //
1546     // There are no masked versions.
1547 
1548     /**
1549      * {@inheritDoc} <!--workaround-->
1550      */
1551     @Override
1552     @ForceInline
1553     public final ByteVector min(Vector<Byte> v) {
1554         return lanewise(MIN, v);
1555     }
1556 
1557     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1558     /**
1559      * Computes the smaller of this vector and the broadcast of an input scalar.
1560      *
1561      * This is a lane-wise binary operation which applies the
1562      * operation {@code Math.min()} to each pair of
1563      * corresponding lane values.
1564      *
1565      * This method is also equivalent to the expression
1566      * {@link #lanewise(VectorOperators.Binary,byte)
1567      *    lanewise}{@code (}{@link VectorOperators#MIN
1568      *    MIN}{@code , e)}.
1569      *
1570      * @param e the input scalar
1571      * @return the result of multiplying this vector by the given scalar
1572      * @see #min(Vector)
1573      * @see #broadcast(byte)
1574      * @see VectorOperators#MIN
1575      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1576      */
1577     @ForceInline
1578     public final ByteVector min(byte e) {
1579         return lanewise(MIN, e);
1580     }
1581 
1582     /**
1583      * {@inheritDoc} <!--workaround-->
1584      */
1585     @Override
1586     @ForceInline
1587     public final ByteVector max(Vector<Byte> v) {
1588         return lanewise(MAX, v);
1589     }
1590 
1591     /**
1592      * Computes the larger of this vector and the broadcast of an input scalar.
1593      *
1594      * This is a lane-wise binary operation which applies the
1595      * operation {@code Math.max()} to each pair of
1596      * corresponding lane values.
1597      *
1598      * This method is also equivalent to the expression
1599      * {@link #lanewise(VectorOperators.Binary,byte)
1600      *    lanewise}{@code (}{@link VectorOperators#MAX
1601      *    MAX}{@code , e)}.
1602      *
1603      * @param e the input scalar
1604      * @return the result of multiplying this vector by the given scalar
1605      * @see #max(Vector)
1606      * @see #broadcast(byte)
1607      * @see VectorOperators#MAX
1608      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1609      */
1610     @ForceInline
1611     public final ByteVector max(byte e) {
1612         return lanewise(MAX, e);
1613     }
1614 
1615     // common bitwise operators: and, or, not (with scalar versions)
1616     /**
1617      * Computes the bitwise logical conjunction ({@code &})
1618      * of this vector and a second input vector.
1619      *
1620      * This is a lane-wise binary operation which applies the
1621      * the primitive bitwise "and" operation ({@code &})
1622      * to each pair of corresponding lane values.
1623      *
1624      * This method is also equivalent to the expression
1625      * {@link #lanewise(VectorOperators.Binary,Vector)
1626      *    lanewise}{@code (}{@link VectorOperators#AND
1627      *    AND}{@code , v)}.
1628      *
1629      * <p>
1630      * This is not a full-service named operation like
1631      * {@link #add(Vector) add}.  A masked version of
1632      * this operation is not directly available
1633      * but may be obtained via the masked version of
1634      * {@code lanewise}.
1635      *
1636      * @param v a second input vector
1637      * @return the bitwise {@code &} of this vector and the second input vector
1638      * @see #and(byte)
1639      * @see #or(Vector)
1640      * @see #not()
1641      * @see VectorOperators#AND
1642      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1643      */
1644     @ForceInline
1645     public final ByteVector and(Vector<Byte> v) {
1646         return lanewise(AND, v);
1647     }
1648 
1649     /**
1650      * Computes the bitwise logical conjunction ({@code &})
1651      * of this vector and a scalar.
1652      *
1653      * This is a lane-wise binary operation which applies the
1654      * the primitive bitwise "and" operation ({@code &})
1655      * to each pair of corresponding lane values.
1656      *
1657      * This method is also equivalent to the expression
1658      * {@link #lanewise(VectorOperators.Binary,Vector)
1659      *    lanewise}{@code (}{@link VectorOperators#AND
1660      *    AND}{@code , e)}.
1661      *
1662      * @param e an input scalar
1663      * @return the bitwise {@code &} of this vector and scalar
1664      * @see #and(Vector)
1665      * @see VectorOperators#AND
1666      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1667      */
1668     @ForceInline
1669     public final ByteVector and(byte e) {
1670         return lanewise(AND, e);
1671     }
1672 
1673     /**
1674      * Computes the bitwise logical disjunction ({@code |})
1675      * of this vector and a second input vector.
1676      *
1677      * This is a lane-wise binary operation which applies the
1678      * the primitive bitwise "or" operation ({@code |})
1679      * to each pair of corresponding lane values.
1680      *
1681      * This method is also equivalent to the expression
1682      * {@link #lanewise(VectorOperators.Binary,Vector)
1683      *    lanewise}{@code (}{@link VectorOperators#OR
1684      *    AND}{@code , v)}.
1685      *
1686      * <p>
1687      * This is not a full-service named operation like
1688      * {@link #add(Vector) add}.  A masked version of
1689      * this operation is not directly available
1690      * but may be obtained via the masked version of
1691      * {@code lanewise}.
1692      *
1693      * @param v a second input vector
1694      * @return the bitwise {@code |} of this vector and the second input vector
1695      * @see #or(byte)
1696      * @see #and(Vector)
1697      * @see #not()
1698      * @see VectorOperators#OR
1699      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1700      */
1701     @ForceInline
1702     public final ByteVector or(Vector<Byte> v) {
1703         return lanewise(OR, v);
1704     }
1705 
1706     /**
1707      * Computes the bitwise logical disjunction ({@code |})
1708      * of this vector and a scalar.
1709      *
1710      * This is a lane-wise binary operation which applies the
1711      * the primitive bitwise "or" operation ({@code |})
1712      * to each pair of corresponding lane values.
1713      *
1714      * This method is also equivalent to the expression
1715      * {@link #lanewise(VectorOperators.Binary,Vector)
1716      *    lanewise}{@code (}{@link VectorOperators#OR
1717      *    OR}{@code , e)}.
1718      *
1719      * @param e an input scalar
1720      * @return the bitwise {@code |} of this vector and scalar
1721      * @see #or(Vector)
1722      * @see VectorOperators#OR
1723      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1724      */
1725     @ForceInline
1726     public final ByteVector or(byte e) {
1727         return lanewise(OR, e);
1728     }
1729 
1730 
1731 
1732     /// UNARY METHODS
1733 
1734     /**
1735      * {@inheritDoc} <!--workaround-->
1736      */
1737     @Override
1738     @ForceInline
1739     public final
1740     ByteVector neg() {
1741         return lanewise(NEG);
1742     }
1743 
1744     /**
1745      * {@inheritDoc} <!--workaround-->
1746      */
1747     @Override
1748     @ForceInline
1749     public final
1750     ByteVector abs() {
1751         return lanewise(ABS);
1752     }
1753 
1754     // not (~)
1755     /**
1756      * Computes the bitwise logical complement ({@code ~})
1757      * of this vector.
1758      *
1759      * This is a lane-wise binary operation which applies the
1760      * the primitive bitwise "not" operation ({@code ~})
1761      * to each lane value.
1762      *
1763      * This method is also equivalent to the expression
1764      * {@link #lanewise(VectorOperators.Unary)
1765      *    lanewise}{@code (}{@link VectorOperators#NOT
1766      *    NOT}{@code )}.
1767      *
1768      * <p>
1769      * This is not a full-service named operation like
1770      * {@link #add(Vector) add}.  A masked version of
1771      * this operation is not directly available
1772      * but may be obtained via the masked version of
1773      * {@code lanewise}.
1774      *
1775      * @return the bitwise complement {@code ~} of this vector
1776      * @see #and(Vector)
1777      * @see VectorOperators#NOT
1778      * @see #lanewise(VectorOperators.Unary,VectorMask)
1779      */
1780     @ForceInline
1781     public final ByteVector not() {
1782         return lanewise(NOT);
1783     }
1784 
1785 
1786     /// COMPARISONS
1787 
1788     /**
1789      * {@inheritDoc} <!--workaround-->
1790      */
1791     @Override
1792     @ForceInline
1793     public final
1794     VectorMask<Byte> eq(Vector<Byte> v) {
1795         return compare(EQ, v);
1796     }
1797 
1798     /**
1799      * Tests if this vector is equal to an input scalar.
1800      *
1801      * This is a lane-wise binary test operation which applies
1802      * the primitive equals operation ({@code ==}) to each lane.
1803      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1804      *
1805      * @param e the input scalar
1806      * @return the result mask of testing if this vector
1807      *         is equal to {@code e}
1808      * @see #compare(VectorOperators.Comparison,byte)
1809      */
1810     @ForceInline
1811     public final
1812     VectorMask<Byte> eq(byte e) {
1813         return compare(EQ, e);
1814     }
1815 
1816     /**
1817      * {@inheritDoc} <!--workaround-->
1818      */
1819     @Override
1820     @ForceInline
1821     public final
1822     VectorMask<Byte> lt(Vector<Byte> v) {
1823         return compare(LT, v);
1824     }
1825 
1826     /**
1827      * Tests if this vector is less than an input scalar.
1828      *
1829      * This is a lane-wise binary test operation which applies
1830      * the primitive less than operation ({@code <}) to each lane.
1831      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1832      *
1833      * @param e the input scalar
1834      * @return the mask result of testing if this vector
1835      *         is less than the input scalar
1836      * @see #compare(VectorOperators.Comparison,byte)
1837      */
1838     @ForceInline
1839     public final
1840     VectorMask<Byte> lt(byte e) {
1841         return compare(LT, e);
1842     }
1843 
1844     /**
1845      * {@inheritDoc} <!--workaround-->
1846      */
1847     @Override
1848     public abstract
1849     VectorMask<Byte> test(VectorOperators.Test op);
1850 
1851     /*package-private*/
1852     @ForceInline
1853     final
1854     <M extends VectorMask<Byte>>
1855     M testTemplate(Class<M> maskType, Test op) {
1856         ByteSpecies vsp = vspecies();
1857         if (opKind(op, VO_SPECIAL)) {
1858             ByteVector bits = this.viewAsIntegralLanes();
1859             VectorMask<Byte> m;
1860             if (op == IS_DEFAULT) {
1861                 m = bits.compare(EQ, (byte) 0);
1862             } else if (op == IS_NEGATIVE) {
1863                 m = bits.compare(LT, (byte) 0);
1864             }
1865             else {
1866                 throw new AssertionError(op);
1867             }
1868             return maskType.cast(m);
1869         }
1870         int opc = opCode(op);
1871         throw new AssertionError(op);
1872     }
1873 
1874     /**
1875      * {@inheritDoc} <!--workaround-->
1876      */
1877     @Override
1878     @ForceInline
1879     public final
1880     VectorMask<Byte> test(VectorOperators.Test op,
1881                                   VectorMask<Byte> m) {
1882         return test(op).and(m);
1883     }
1884 
1885     /**
1886      * {@inheritDoc} <!--workaround-->
1887      */
1888     @Override
1889     public abstract
1890     VectorMask<Byte> compare(VectorOperators.Comparison op, Vector<Byte> v);
1891 
1892     /*package-private*/
1893     @ForceInline
1894     final
1895     <M extends VectorMask<Byte>>
1896     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v) {
1897         ByteVector that = (ByteVector) v;
1898         that.check(this);
1899         int opc = opCode(op);
1900         return VectorSupport.compare(
1901             opc, getClass(), maskType, byte.class, length(),
1902             this, that, null,
1903             (cond, v0, v1, m1) -> {
1904                 AbstractMask<Byte> m
1905                     = v0.bTest(cond, v1, (cond_, i, a, b)
1906                                -> compareWithOp(cond, a, b));
1907                 @SuppressWarnings("unchecked")
1908                 M m2 = (M) m;
1909                 return m2;
1910             });
1911     }
1912 
1913     /*package-private*/
1914     @ForceInline
1915     final
1916     <M extends VectorMask<Byte>>
1917     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v, M m) {
1918         ByteVector that = (ByteVector) v;
1919         that.check(this);
1920         m.check(maskType, this);
1921         int opc = opCode(op);
1922         return VectorSupport.compare(
1923             opc, getClass(), maskType, byte.class, length(),
1924             this, that, m,
1925             (cond, v0, v1, m1) -> {
1926                 AbstractMask<Byte> cmpM
1927                     = v0.bTest(cond, v1, (cond_, i, a, b)
1928                                -> compareWithOp(cond, a, b));
1929                 @SuppressWarnings("unchecked")
1930                 M m2 = (M) cmpM.and(m1);
1931                 return m2;
1932             });
1933     }
1934 
1935     @ForceInline
1936     private static boolean compareWithOp(int cond, byte a, byte b) {
1937         return switch (cond) {
1938             case BT_eq -> a == b;
1939             case BT_ne -> a != b;
1940             case BT_lt -> a < b;
1941             case BT_le -> a <= b;
1942             case BT_gt -> a > b;
1943             case BT_ge -> a >= b;
1944             case BT_ult -> Byte.compareUnsigned(a, b) < 0;
1945             case BT_ule -> Byte.compareUnsigned(a, b) <= 0;
1946             case BT_ugt -> Byte.compareUnsigned(a, b) > 0;
1947             case BT_uge -> Byte.compareUnsigned(a, b) >= 0;
1948             default -> throw new AssertionError();
1949         };
1950     }
1951 
1952     /**
1953      * Tests this vector by comparing it with an input scalar,
1954      * according to the given comparison operation.
1955      *
1956      * This is a lane-wise binary test operation which applies
1957      * the comparison operation to each lane.
1958      * <p>
1959      * The result is the same as
1960      * {@code compare(op, broadcast(species(), e))}.
1961      * That is, the scalar may be regarded as broadcast to
1962      * a vector of the same species, and then compared
1963      * against the original vector, using the selected
1964      * comparison operation.
1965      *
1966      * @param op the operation used to compare lane values
1967      * @param e the input scalar
1968      * @return the mask result of testing lane-wise if this vector
1969      *         compares to the input, according to the selected
1970      *         comparison operator
1971      * @see ByteVector#compare(VectorOperators.Comparison,Vector)
1972      * @see #eq(byte)
1973      * @see #lt(byte)
1974      */
1975     public abstract
1976     VectorMask<Byte> compare(Comparison op, byte e);
1977 
1978     /*package-private*/
1979     @ForceInline
1980     final
1981     <M extends VectorMask<Byte>>
1982     M compareTemplate(Class<M> maskType, Comparison op, byte e) {
1983         return compareTemplate(maskType, op, broadcast(e));
1984     }
1985 
1986     /**
1987      * Tests this vector by comparing it with an input scalar,
1988      * according to the given comparison operation,
1989      * in lanes selected by a mask.
1990      *
1991      * This is a masked lane-wise binary test operation which applies
1992      * to each pair of corresponding lane values.
1993      *
1994      * The returned result is equal to the expression
1995      * {@code compare(op,s).and(m)}.
1996      *
1997      * @param op the operation used to compare lane values
1998      * @param e the input scalar
1999      * @param m the mask controlling lane selection
2000      * @return the mask result of testing lane-wise if this vector
2001      *         compares to the input, according to the selected
2002      *         comparison operator,
2003      *         and only in the lanes selected by the mask
2004      * @see ByteVector#compare(VectorOperators.Comparison,Vector,VectorMask)
2005      */
2006     @ForceInline
2007     public final VectorMask<Byte> compare(VectorOperators.Comparison op,
2008                                                byte e,
2009                                                VectorMask<Byte> m) {
2010         return compare(op, broadcast(e), m);
2011     }
2012 
2013     /**
2014      * {@inheritDoc} <!--workaround-->
2015      */
2016     @Override
2017     public abstract
2018     VectorMask<Byte> compare(Comparison op, long e);
2019 
2020     /*package-private*/
2021     @ForceInline
2022     final
2023     <M extends VectorMask<Byte>>
2024     M compareTemplate(Class<M> maskType, Comparison op, long e) {
2025         return compareTemplate(maskType, op, broadcast(e));
2026     }
2027 
2028     /**
2029      * {@inheritDoc} <!--workaround-->
2030      */
2031     @Override
2032     @ForceInline
2033     public final
2034     VectorMask<Byte> compare(Comparison op, long e, VectorMask<Byte> m) {
2035         return compare(op, broadcast(e), m);
2036     }
2037 
2038 
2039 
2040     /**
2041      * {@inheritDoc} <!--workaround-->
2042      */
2043     @Override public abstract
2044     ByteVector blend(Vector<Byte> v, VectorMask<Byte> m);
2045 
2046     /*package-private*/
2047     @ForceInline
2048     final
2049     <M extends VectorMask<Byte>>
2050     ByteVector
2051     blendTemplate(Class<M> maskType, ByteVector v, M m) {
2052         v.check(this);
2053         return VectorSupport.blend(
2054             getClass(), maskType, byte.class, length(),
2055             this, v, m,
2056             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
2057     }
2058 
2059     /**
2060      * {@inheritDoc} <!--workaround-->
2061      */
2062     @Override public abstract ByteVector addIndex(int scale);
2063 
2064     /*package-private*/
2065     @ForceInline
2066     final ByteVector addIndexTemplate(int scale) {
2067         ByteSpecies vsp = vspecies();
2068         // make sure VLENGTH*scale doesn't overflow:
2069         vsp.checkScale(scale);
2070         return VectorSupport.indexVector(
2071             getClass(), byte.class, length(),
2072             this, scale, vsp,
2073             (v, scale_, s)
2074             -> {
2075                 // If the platform doesn't support an INDEX
2076                 // instruction directly, load IOTA from memory
2077                 // and multiply.
2078                 ByteVector iota = s.iota();
2079                 byte sc = (byte) scale_;
2080                 return v.add(sc == 1 ? iota : iota.mul(sc));
2081             });
2082     }
2083 
2084     /**
2085      * Replaces selected lanes of this vector with
2086      * a scalar value
2087      * under the control of a mask.
2088      *
2089      * This is a masked lane-wise binary operation which
2090      * selects each lane value from one or the other input.
2091      *
2092      * The returned result is equal to the expression
2093      * {@code blend(broadcast(e),m)}.
2094      *
2095      * @param e the input scalar, containing the replacement lane value
2096      * @param m the mask controlling lane selection of the scalar
2097      * @return the result of blending the lane elements of this vector with
2098      *         the scalar value
2099      */
2100     @ForceInline
2101     public final ByteVector blend(byte e,
2102                                             VectorMask<Byte> m) {
2103         return blend(broadcast(e), m);
2104     }
2105 
2106     /**
2107      * Replaces selected lanes of this vector with
2108      * a scalar value
2109      * under the control of a mask.
2110      *
2111      * This is a masked lane-wise binary operation which
2112      * selects each lane value from one or the other input.
2113      *
2114      * The returned result is equal to the expression
2115      * {@code blend(broadcast(e),m)}.
2116      *
2117      * @param e the input scalar, containing the replacement lane value
2118      * @param m the mask controlling lane selection of the scalar
2119      * @return the result of blending the lane elements of this vector with
2120      *         the scalar value
2121      */
2122     @ForceInline
2123     public final ByteVector blend(long e,
2124                                             VectorMask<Byte> m) {
2125         return blend(broadcast(e), m);
2126     }
2127 
2128     /**
2129      * {@inheritDoc} <!--workaround-->
2130      */
2131     @Override
2132     public abstract
2133     ByteVector slice(int origin, Vector<Byte> v1);
2134 
2135     /*package-private*/
2136     final
2137     @ForceInline
2138     ByteVector sliceTemplate(int origin, Vector<Byte> v1) {
2139         ByteVector that = (ByteVector) v1;
2140         that.check(this);
2141         Objects.checkIndex(origin, length() + 1);
2142         VectorShuffle<Byte> iota = iotaShuffle();
2143         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
2144         iota = iotaShuffle(origin, 1, true);
2145         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
2146     }
2147 
2148     /**
2149      * {@inheritDoc} <!--workaround-->
2150      */
2151     @Override
2152     @ForceInline
2153     public final
2154     ByteVector slice(int origin,
2155                                Vector<Byte> w,
2156                                VectorMask<Byte> m) {
2157         return broadcast(0).blend(slice(origin, w), m);
2158     }
2159 
2160     /**
2161      * {@inheritDoc} <!--workaround-->
2162      */
2163     @Override
2164     public abstract
2165     ByteVector slice(int origin);
2166 
2167     /*package-private*/
2168     final
2169     @ForceInline
2170     ByteVector sliceTemplate(int origin) {
2171         Objects.checkIndex(origin, length() + 1);
2172         VectorShuffle<Byte> iota = iotaShuffle();
2173         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
2174         iota = iotaShuffle(origin, 1, true);
2175         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2176     }
2177 
2178     /**
2179      * {@inheritDoc} <!--workaround-->
2180      */
2181     @Override
2182     public abstract
2183     ByteVector unslice(int origin, Vector<Byte> w, int part);
2184 
2185     /*package-private*/
2186     final
2187     @ForceInline
2188     ByteVector
2189     unsliceTemplate(int origin, Vector<Byte> w, int part) {
2190         ByteVector that = (ByteVector) w;
2191         that.check(this);
2192         Objects.checkIndex(origin, length() + 1);
2193         VectorShuffle<Byte> iota = iotaShuffle();
2194         VectorMask<Byte> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
2195                                                                   (broadcast((byte)(origin))));
2196         iota = iotaShuffle(-origin, 1, true);
2197         return that.blend(this.rearrange(iota), blendMask);
2198     }
2199 
2200     /*package-private*/
2201     final
2202     @ForceInline
2203     <M extends VectorMask<Byte>>
2204     ByteVector
2205     unsliceTemplate(Class<M> maskType, int origin, Vector<Byte> w, int part, M m) {
2206         ByteVector that = (ByteVector) w;
2207         that.check(this);
2208         ByteVector slice = that.sliceTemplate(origin, that);
2209         slice = slice.blendTemplate(maskType, this, m);
2210         return slice.unsliceTemplate(origin, w, part);
2211     }
2212 
2213     /**
2214      * {@inheritDoc} <!--workaround-->
2215      */
2216     @Override
2217     public abstract
2218     ByteVector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m);
2219 
2220     /**
2221      * {@inheritDoc} <!--workaround-->
2222      */
2223     @Override
2224     public abstract
2225     ByteVector unslice(int origin);
2226 
2227     /*package-private*/
2228     final
2229     @ForceInline
2230     ByteVector
2231     unsliceTemplate(int origin) {
2232         Objects.checkIndex(origin, length() + 1);
2233         VectorShuffle<Byte> iota = iotaShuffle();
2234         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.GE,
2235                                                                   (broadcast((byte)(origin))));
2236         iota = iotaShuffle(-origin, 1, true);
2237         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2238     }
2239 
2240     private ArrayIndexOutOfBoundsException
2241     wrongPartForSlice(int part) {
2242         String msg = String.format("bad part number %d for slice operation",
2243                                    part);
2244         return new ArrayIndexOutOfBoundsException(msg);
2245     }
2246 
2247     /**
2248      * {@inheritDoc} <!--workaround-->
2249      */
2250     @Override
2251     public abstract
2252     ByteVector rearrange(VectorShuffle<Byte> m);
2253 
2254     /*package-private*/
2255     @ForceInline
2256     final
2257     <S extends VectorShuffle<Byte>>
2258     ByteVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2259         shuffle.checkIndexes();
2260         return VectorSupport.rearrangeOp(
2261             getClass(), shuffletype, null, byte.class, length(),
2262             this, shuffle, null,
2263             (v1, s_, m_) -> v1.uOp((i, a) -> {
2264                 int ei = s_.laneSource(i);
2265                 return v1.lane(ei);
2266             }));
2267     }
2268 
2269     /**
2270      * {@inheritDoc} <!--workaround-->
2271      */
2272     @Override
2273     public abstract
2274     ByteVector rearrange(VectorShuffle<Byte> s,
2275                                    VectorMask<Byte> m);
2276 
2277     /*package-private*/
2278     @ForceInline
2279     final
2280     <S extends VectorShuffle<Byte>, M extends VectorMask<Byte>>
2281     ByteVector rearrangeTemplate(Class<S> shuffletype,
2282                                            Class<M> masktype,
2283                                            S shuffle,
2284                                            M m) {
2285 
2286         m.check(masktype, this);
2287         VectorMask<Byte> valid = shuffle.laneIsValid();
2288         if (m.andNot(valid).anyTrue()) {
2289             shuffle.checkIndexes();
2290             throw new AssertionError();
2291         }
2292         return VectorSupport.rearrangeOp(
2293                    getClass(), shuffletype, masktype, byte.class, length(),
2294                    this, shuffle, m,
2295                    (v1, s_, m_) -> v1.uOp((i, a) -> {
2296                         int ei = s_.laneSource(i);
2297                         return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
2298                    }));
2299     }
2300 
2301     /**
2302      * {@inheritDoc} <!--workaround-->
2303      */
2304     @Override
2305     public abstract
2306     ByteVector rearrange(VectorShuffle<Byte> s,
2307                                    Vector<Byte> v);
2308 
2309     /*package-private*/
2310     @ForceInline
2311     final
2312     <S extends VectorShuffle<Byte>>
2313     ByteVector rearrangeTemplate(Class<S> shuffletype,
2314                                            S shuffle,
2315                                            ByteVector v) {
2316         VectorMask<Byte> valid = shuffle.laneIsValid();
2317         @SuppressWarnings("unchecked")
2318         S ws = (S) shuffle.wrapIndexes();
2319         ByteVector r0 =
2320             VectorSupport.rearrangeOp(
2321                 getClass(), shuffletype, null, byte.class, length(),
2322                 this, ws, null,
2323                 (v0, s_, m_) -> v0.uOp((i, a) -> {
2324                     int ei = s_.laneSource(i);
2325                     return v0.lane(ei);
2326                 }));
2327         ByteVector r1 =
2328             VectorSupport.rearrangeOp(
2329                 getClass(), shuffletype, null, byte.class, length(),
2330                 v, ws, null,
2331                 (v1, s_, m_) -> v1.uOp((i, a) -> {
2332                     int ei = s_.laneSource(i);
2333                     return v1.lane(ei);
2334                 }));
2335         return r1.blend(r0, valid);
2336     }
2337 
2338     @ForceInline
2339     private final
2340     VectorShuffle<Byte> toShuffle0(ByteSpecies dsp) {
2341         byte[] a = toArray();
2342         int[] sa = new int[a.length];
2343         for (int i = 0; i < a.length; i++) {
2344             sa[i] = (int) a[i];
2345         }
2346         return VectorShuffle.fromArray(dsp, sa, 0);
2347     }
2348 
2349     /*package-private*/
2350     @ForceInline
2351     final
2352     VectorShuffle<Byte> toShuffleTemplate(Class<?> shuffleType) {
2353         ByteSpecies vsp = vspecies();
2354         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2355                                      getClass(), byte.class, length(),
2356                                      shuffleType, byte.class, length(),
2357                                      this, vsp,
2358                                      ByteVector::toShuffle0);
2359     }
2360 
2361     /**
2362      * {@inheritDoc} <!--workaround-->
2363      */
2364     @Override
2365     public abstract
2366     ByteVector selectFrom(Vector<Byte> v);
2367 
2368     /*package-private*/
2369     @ForceInline
2370     final ByteVector selectFromTemplate(ByteVector v) {
2371         return v.rearrange(this.toShuffle());
2372     }
2373 
2374     /**
2375      * {@inheritDoc} <!--workaround-->
2376      */
2377     @Override
2378     public abstract
2379     ByteVector selectFrom(Vector<Byte> s, VectorMask<Byte> m);
2380 
2381     /*package-private*/
2382     @ForceInline
2383     final ByteVector selectFromTemplate(ByteVector v,
2384                                                   AbstractMask<Byte> m) {
2385         return v.rearrange(this.toShuffle(), m);
2386     }
2387 
2388     /// Ternary operations
2389 
2390     /**
2391      * Blends together the bits of two vectors under
2392      * the control of a third, which supplies mask bits.
2393      *
2394      * This is a lane-wise ternary operation which performs
2395      * a bitwise blending operation {@code (a&~c)|(b&c)}
2396      * to each lane.
2397      *
2398      * This method is also equivalent to the expression
2399      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2400      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2401      *    BITWISE_BLEND}{@code , bits, mask)}.
2402      *
2403      * @param bits input bits to blend into the current vector
2404      * @param mask a bitwise mask to enable blending of the input bits
2405      * @return the bitwise blend of the given bits into the current vector,
2406      *         under control of the bitwise mask
2407      * @see #bitwiseBlend(byte,byte)
2408      * @see #bitwiseBlend(byte,Vector)
2409      * @see #bitwiseBlend(Vector,byte)
2410      * @see VectorOperators#BITWISE_BLEND
2411      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2412      */
2413     @ForceInline
2414     public final
2415     ByteVector bitwiseBlend(Vector<Byte> bits, Vector<Byte> mask) {
2416         return lanewise(BITWISE_BLEND, bits, mask);
2417     }
2418 
2419     /**
2420      * Blends together the bits of a vector and a scalar under
2421      * the control of another scalar, which supplies mask bits.
2422      *
2423      * This is a lane-wise ternary operation which performs
2424      * a bitwise blending operation {@code (a&~c)|(b&c)}
2425      * to each lane.
2426      *
2427      * This method is also equivalent to the expression
2428      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2429      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2430      *    BITWISE_BLEND}{@code , bits, mask)}.
2431      *
2432      * @param bits input bits to blend into the current vector
2433      * @param mask a bitwise mask to enable blending of the input bits
2434      * @return the bitwise blend of the given bits into the current vector,
2435      *         under control of the bitwise mask
2436      * @see #bitwiseBlend(Vector,Vector)
2437      * @see VectorOperators#BITWISE_BLEND
2438      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
2439      */
2440     @ForceInline
2441     public final
2442     ByteVector bitwiseBlend(byte bits, byte mask) {
2443         return lanewise(BITWISE_BLEND, bits, mask);
2444     }
2445 
2446     /**
2447      * Blends together the bits of a vector and a scalar under
2448      * the control of another vector, which supplies mask bits.
2449      *
2450      * This is a lane-wise ternary operation which performs
2451      * a bitwise blending operation {@code (a&~c)|(b&c)}
2452      * to each lane.
2453      *
2454      * This method is also equivalent to the expression
2455      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2456      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2457      *    BITWISE_BLEND}{@code , bits, mask)}.
2458      *
2459      * @param bits input bits to blend into the current vector
2460      * @param mask a bitwise mask to enable blending of the input bits
2461      * @return the bitwise blend of the given bits into the current vector,
2462      *         under control of the bitwise mask
2463      * @see #bitwiseBlend(Vector,Vector)
2464      * @see VectorOperators#BITWISE_BLEND
2465      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
2466      */
2467     @ForceInline
2468     public final
2469     ByteVector bitwiseBlend(byte bits, Vector<Byte> mask) {
2470         return lanewise(BITWISE_BLEND, bits, mask);
2471     }
2472 
2473     /**
2474      * Blends together the bits of two vectors under
2475      * the control of a scalar, which supplies mask bits.
2476      *
2477      * This is a lane-wise ternary operation which performs
2478      * a bitwise blending operation {@code (a&~c)|(b&c)}
2479      * to each lane.
2480      *
2481      * This method is also equivalent to the expression
2482      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2483      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2484      *    BITWISE_BLEND}{@code , bits, mask)}.
2485      *
2486      * @param bits input bits to blend into the current vector
2487      * @param mask a bitwise mask to enable blending of the input bits
2488      * @return the bitwise blend of the given bits into the current vector,
2489      *         under control of the bitwise mask
2490      * @see #bitwiseBlend(Vector,Vector)
2491      * @see VectorOperators#BITWISE_BLEND
2492      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
2493      */
2494     @ForceInline
2495     public final
2496     ByteVector bitwiseBlend(Vector<Byte> bits, byte mask) {
2497         return lanewise(BITWISE_BLEND, bits, mask);
2498     }
2499 
2500 
2501     // Type specific horizontal reductions
2502 
2503     /**
2504      * Returns a value accumulated from all the lanes of this vector.
2505      *
2506      * This is an associative cross-lane reduction operation which
2507      * applies the specified operation to all the lane elements.
2508      * <p>
2509      * A few reduction operations do not support arbitrary reordering
2510      * of their operands, yet are included here because of their
2511      * usefulness.
2512      * <ul>
2513      * <li>
2514      * In the case of {@code FIRST_NONZERO}, the reduction returns
2515      * the value from the lowest-numbered non-zero lane.
2516      * <li>
2517      * All other reduction operations are fully commutative and
2518      * associative.  The implementation can choose any order of
2519      * processing, yet it will always produce the same result.
2520      * </ul>
2521      *
2522      * @param op the operation used to combine lane values
2523      * @return the accumulated result
2524      * @throws UnsupportedOperationException if this vector does
2525      *         not support the requested operation
2526      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2527      * @see #add(Vector)
2528      * @see #mul(Vector)
2529      * @see #min(Vector)
2530      * @see #max(Vector)
2531      * @see #and(Vector)
2532      * @see #or(Vector)
2533      * @see VectorOperators#XOR
2534      * @see VectorOperators#FIRST_NONZERO
2535      */
2536     public abstract byte reduceLanes(VectorOperators.Associative op);
2537 
2538     /**
2539      * Returns a value accumulated from selected lanes of this vector,
2540      * controlled by a mask.
2541      *
2542      * This is an associative cross-lane reduction operation which
2543      * applies the specified operation to the selected lane elements.
2544      * <p>
2545      * If no elements are selected, an operation-specific identity
2546      * value is returned.
2547      * <ul>
2548      * <li>
2549      * If the operation is
2550      *  {@code ADD}, {@code XOR}, {@code OR},
2551      * or {@code FIRST_NONZERO},
2552      * then the identity value is zero, the default {@code byte} value.
2553      * <li>
2554      * If the operation is {@code MUL},
2555      * then the identity value is one.
2556      * <li>
2557      * If the operation is {@code AND},
2558      * then the identity value is minus one (all bits set).
2559      * <li>
2560      * If the operation is {@code MAX},
2561      * then the identity value is {@code Byte.MIN_VALUE}.
2562      * <li>
2563      * If the operation is {@code MIN},
2564      * then the identity value is {@code Byte.MAX_VALUE}.
2565      * </ul>
2566      * <p>
2567      * A few reduction operations do not support arbitrary reordering
2568      * of their operands, yet are included here because of their
2569      * usefulness.
2570      * <ul>
2571      * <li>
2572      * In the case of {@code FIRST_NONZERO}, the reduction returns
2573      * the value from the lowest-numbered non-zero lane.
2574      * <li>
2575      * All other reduction operations are fully commutative and
2576      * associative.  The implementation can choose any order of
2577      * processing, yet it will always produce the same result.
2578      * </ul>
2579      *
2580      * @param op the operation used to combine lane values
2581      * @param m the mask controlling lane selection
2582      * @return the reduced result accumulated from the selected lane values
2583      * @throws UnsupportedOperationException if this vector does
2584      *         not support the requested operation
2585      * @see #reduceLanes(VectorOperators.Associative)
2586      */
2587     public abstract byte reduceLanes(VectorOperators.Associative op,
2588                                        VectorMask<Byte> m);
2589 
2590     /*package-private*/
2591     @ForceInline
2592     final
2593     byte reduceLanesTemplate(VectorOperators.Associative op,
2594                                Class<? extends VectorMask<Byte>> maskClass,
2595                                VectorMask<Byte> m) {
2596         m.check(maskClass, this);
2597         if (op == FIRST_NONZERO) {
2598             ByteVector v = reduceIdentityVector(op).blend(this, m);
2599             return v.reduceLanesTemplate(op);
2600         }
2601         int opc = opCode(op);
2602         return fromBits(VectorSupport.reductionCoerced(
2603             opc, getClass(), maskClass, byte.class, length(),
2604             this, m,
2605             REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations)));
2606     }
2607 
2608     /*package-private*/
2609     @ForceInline
2610     final
2611     byte reduceLanesTemplate(VectorOperators.Associative op) {
2612         if (op == FIRST_NONZERO) {
2613             // FIXME:  The JIT should handle this, and other scan ops alos.
2614             VectorMask<Byte> thisNZ
2615                 = this.viewAsIntegralLanes().compare(NE, (byte) 0);
2616             return this.lane(thisNZ.firstTrue());
2617         }
2618         int opc = opCode(op);
2619         return fromBits(VectorSupport.reductionCoerced(
2620             opc, getClass(), null, byte.class, length(),
2621             this, null,
2622             REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations)));
2623     }
2624 
2625     private static final
2626     ImplCache<Associative, ReductionOperation<ByteVector, VectorMask<Byte>>>
2627         REDUCE_IMPL = new ImplCache<>(Associative.class, ByteVector.class);
2628 
2629     private static ReductionOperation<ByteVector, VectorMask<Byte>> reductionOperations(int opc_) {
2630         switch (opc_) {
2631             case VECTOR_OP_ADD: return (v, m) ->
2632                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a + b)));
2633             case VECTOR_OP_MUL: return (v, m) ->
2634                     toBits(v.rOp((byte)1, m, (i, a, b) -> (byte)(a * b)));
2635             case VECTOR_OP_MIN: return (v, m) ->
2636                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (byte) Math.min(a, b)));
2637             case VECTOR_OP_MAX: return (v, m) ->
2638                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (byte) Math.max(a, b)));
2639             case VECTOR_OP_AND: return (v, m) ->
2640                     toBits(v.rOp((byte)-1, m, (i, a, b) -> (byte)(a & b)));
2641             case VECTOR_OP_OR: return (v, m) ->
2642                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a | b)));
2643             case VECTOR_OP_XOR: return (v, m) ->
2644                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a ^ b)));
2645             default: return null;
2646         }
2647     }
2648 
2649     private
2650     @ForceInline
2651     ByteVector reduceIdentityVector(VectorOperators.Associative op) {
2652         int opc = opCode(op);
2653         UnaryOperator<ByteVector> fn
2654             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2655                 switch (opc_) {
2656                 case VECTOR_OP_ADD:
2657                 case VECTOR_OP_OR:
2658                 case VECTOR_OP_XOR:
2659                     return v -> v.broadcast(0);
2660                 case VECTOR_OP_MUL:
2661                     return v -> v.broadcast(1);
2662                 case VECTOR_OP_AND:
2663                     return v -> v.broadcast(-1);
2664                 case VECTOR_OP_MIN:
2665                     return v -> v.broadcast(MAX_OR_INF);
2666                 case VECTOR_OP_MAX:
2667                     return v -> v.broadcast(MIN_OR_INF);
2668                 default: return null;
2669                 }
2670             });
2671         return fn.apply(this);
2672     }
2673     private static final
2674     ImplCache<Associative,UnaryOperator<ByteVector>> REDUCE_ID_IMPL
2675         = new ImplCache<>(Associative.class, ByteVector.class);
2676 
2677     private static final byte MIN_OR_INF = Byte.MIN_VALUE;
2678     private static final byte MAX_OR_INF = Byte.MAX_VALUE;
2679 
2680     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2681     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2682                                                      VectorMask<Byte> m);
2683 
2684     // Type specific accessors
2685 
2686     /**
2687      * Gets the lane element at lane index {@code i}
2688      *
2689      * @param i the lane index
2690      * @return the lane element at lane index {@code i}
2691      * @throws IllegalArgumentException if the index is is out of range
2692      * ({@code < 0 || >= length()})
2693      */
2694     public abstract byte lane(int i);
2695 
2696     /**
2697      * Replaces the lane element of this vector at lane index {@code i} with
2698      * value {@code e}.
2699      *
2700      * This is a cross-lane operation and behaves as if it returns the result
2701      * of blending this vector with an input vector that is the result of
2702      * broadcasting {@code e} and a mask that has only one lane set at lane
2703      * index {@code i}.
2704      *
2705      * @param i the lane index of the lane element to be replaced
2706      * @param e the value to be placed
2707      * @return the result of replacing the lane element of this vector at lane
2708      * index {@code i} with value {@code e}.
2709      * @throws IllegalArgumentException if the index is is out of range
2710      * ({@code < 0 || >= length()})
2711      */
2712     public abstract ByteVector withLane(int i, byte e);
2713 
2714     // Memory load operations
2715 
2716     /**
2717      * Returns an array of type {@code byte[]}
2718      * containing all the lane values.
2719      * The array length is the same as the vector length.
2720      * The array elements are stored in lane order.
2721      * <p>
2722      * This method behaves as if it stores
2723      * this vector into an allocated array
2724      * (using {@link #intoArray(byte[], int) intoArray})
2725      * and returns the array as follows:
2726      * <pre>{@code
2727      *   byte[] a = new byte[this.length()];
2728      *   this.intoArray(a, 0);
2729      *   return a;
2730      * }</pre>
2731      *
2732      * @return an array containing the lane values of this vector
2733      */
2734     @ForceInline
2735     @Override
2736     public final byte[] toArray() {
2737         byte[] a = new byte[vspecies().laneCount()];
2738         intoArray(a, 0);
2739         return a;
2740     }
2741 
2742     /** {@inheritDoc} <!--workaround-->
2743      * @implNote
2744      * When this method is used on used on vectors
2745      * of type {@code ByteVector},
2746      * there will be no loss of precision or range,
2747      * and so no {@code UnsupportedOperationException} will
2748      * be thrown.
2749      */
2750     @ForceInline
2751     @Override
2752     public final int[] toIntArray() {
2753         byte[] a = toArray();
2754         int[] res = new int[a.length];
2755         for (int i = 0; i < a.length; i++) {
2756             byte e = a[i];
2757             res[i] = (int) ByteSpecies.toIntegralChecked(e, true);
2758         }
2759         return res;
2760     }
2761 
2762     /** {@inheritDoc} <!--workaround-->
2763      * @implNote
2764      * When this method is used on used on vectors
2765      * of type {@code ByteVector},
2766      * there will be no loss of precision or range,
2767      * and so no {@code UnsupportedOperationException} will
2768      * be thrown.
2769      */
2770     @ForceInline
2771     @Override
2772     public final long[] toLongArray() {
2773         byte[] a = toArray();
2774         long[] res = new long[a.length];
2775         for (int i = 0; i < a.length; i++) {
2776             byte e = a[i];
2777             res[i] = ByteSpecies.toIntegralChecked(e, false);
2778         }
2779         return res;
2780     }
2781 
2782     /** {@inheritDoc} <!--workaround-->
2783      * @implNote
2784      * When this method is used on used on vectors
2785      * of type {@code ByteVector},
2786      * there will be no loss of precision.
2787      */
2788     @ForceInline
2789     @Override
2790     public final double[] toDoubleArray() {
2791         byte[] a = toArray();
2792         double[] res = new double[a.length];
2793         for (int i = 0; i < a.length; i++) {
2794             res[i] = (double) a[i];
2795         }
2796         return res;
2797     }
2798 
2799     /**
2800      * Loads a vector from a byte array starting at an offset.
2801      * Bytes are composed into primitive lane elements according
2802      * to the specified byte order.
2803      * The vector is arranged into lanes according to
2804      * <a href="Vector.html#lane-order">memory ordering</a>.
2805      * <p>
2806      * This method behaves as if it returns the result of calling
2807      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2808      * fromByteBuffer()} as follows:
2809      * <pre>{@code
2810      * var bb = ByteBuffer.wrap(a);
2811      * var m = species.maskAll(true);
2812      * return fromByteBuffer(species, bb, offset, bo, m);
2813      * }</pre>
2814      *
2815      * @param species species of desired vector
2816      * @param a the byte array
2817      * @param offset the offset into the array
2818      * @param bo the intended byte order
2819      * @return a vector loaded from a byte array
2820      * @throws IndexOutOfBoundsException
2821      *         if {@code offset+N*ESIZE < 0}
2822      *         or {@code offset+(N+1)*ESIZE > a.length}
2823      *         for any lane {@code N} in the vector
2824      */
2825     @ForceInline
2826     public static
2827     ByteVector fromByteArray(VectorSpecies<Byte> species,
2828                                        byte[] a, int offset,
2829                                        ByteOrder bo) {
2830         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
2831         ByteSpecies vsp = (ByteSpecies) species;
2832         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
2833     }
2834 
2835     /**
2836      * Loads a vector from a byte array starting at an offset
2837      * and using a mask.
2838      * Lanes where the mask is unset are filled with the default
2839      * value of {@code byte} (zero).
2840      * Bytes are composed into primitive lane elements according
2841      * to the specified byte order.
2842      * The vector is arranged into lanes according to
2843      * <a href="Vector.html#lane-order">memory ordering</a>.
2844      * <p>
2845      * This method behaves as if it returns the result of calling
2846      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2847      * fromByteBuffer()} as follows:
2848      * <pre>{@code
2849      * var bb = ByteBuffer.wrap(a);
2850      * return fromByteBuffer(species, bb, offset, bo, m);
2851      * }</pre>
2852      *
2853      * @param species species of desired vector
2854      * @param a the byte array
2855      * @param offset the offset into the array
2856      * @param bo the intended byte order
2857      * @param m the mask controlling lane selection
2858      * @return a vector loaded from a byte array
2859      * @throws IndexOutOfBoundsException
2860      *         if {@code offset+N*ESIZE < 0}
2861      *         or {@code offset+(N+1)*ESIZE > a.length}
2862      *         for any lane {@code N} in the vector
2863      *         where the mask is set
2864      */
2865     @ForceInline
2866     public static
2867     ByteVector fromByteArray(VectorSpecies<Byte> species,
2868                                        byte[] a, int offset,
2869                                        ByteOrder bo,
2870                                        VectorMask<Byte> m) {
2871         ByteSpecies vsp = (ByteSpecies) species;
2872         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2873             return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
2874         }
2875 
2876         // FIXME: optimize
2877         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2878         ByteBuffer wb = wrapper(a, bo);
2879         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
2880                    (wb_, o, i)  -> wb_.get(o + i * 1));
2881     }
2882 
2883     /**
2884      * Loads a vector from an array of type {@code byte[]}
2885      * starting at an offset.
2886      * For each vector lane, where {@code N} is the vector lane index, the
2887      * array element at index {@code offset + N} is placed into the
2888      * resulting vector at lane index {@code N}.
2889      *
2890      * @param species species of desired vector
2891      * @param a the array
2892      * @param offset the offset into the array
2893      * @return the vector loaded from an array
2894      * @throws IndexOutOfBoundsException
2895      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2896      *         for any lane {@code N} in the vector
2897      */
2898     @ForceInline
2899     public static
2900     ByteVector fromArray(VectorSpecies<Byte> species,
2901                                    byte[] a, int offset) {
2902         offset = checkFromIndexSize(offset, species.length(), a.length);
2903         ByteSpecies vsp = (ByteSpecies) species;
2904         return vsp.dummyVector().fromArray0(a, offset);
2905     }
2906 
2907     /**
2908      * Loads a vector from an array of type {@code byte[]}
2909      * starting at an offset and using a mask.
2910      * Lanes where the mask is unset are filled with the default
2911      * value of {@code byte} (zero).
2912      * For each vector lane, where {@code N} is the vector lane index,
2913      * if the mask lane at index {@code N} is set then the array element at
2914      * index {@code offset + N} is placed into the resulting vector at lane index
2915      * {@code N}, otherwise the default element value is placed into the
2916      * resulting vector at lane index {@code N}.
2917      *
2918      * @param species species of desired vector
2919      * @param a the array
2920      * @param offset the offset into the array
2921      * @param m the mask controlling lane selection
2922      * @return the vector loaded from an array
2923      * @throws IndexOutOfBoundsException
2924      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2925      *         for any lane {@code N} in the vector
2926      *         where the mask is set
2927      */
2928     @ForceInline
2929     public static
2930     ByteVector fromArray(VectorSpecies<Byte> species,
2931                                    byte[] a, int offset,
2932                                    VectorMask<Byte> m) {
2933         ByteSpecies vsp = (ByteSpecies) species;
2934         if (offset >= 0 && offset <= (a.length - species.length())) {
2935             return vsp.dummyVector().fromArray0(a, offset, m);
2936         }
2937 
2938         // FIXME: optimize
2939         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2940         return vsp.vOp(m, i -> a[offset + i]);
2941     }
2942 
2943     /**
2944      * Gathers a new vector composed of elements from an array of type
2945      * {@code byte[]},
2946      * using indexes obtained by adding a fixed {@code offset} to a
2947      * series of secondary offsets from an <em>index map</em>.
2948      * The index map is a contiguous sequence of {@code VLENGTH}
2949      * elements in a second array of {@code int}s, starting at a given
2950      * {@code mapOffset}.
2951      * <p>
2952      * For each vector lane, where {@code N} is the vector lane index,
2953      * the lane is loaded from the array
2954      * element {@code a[f(N)]}, where {@code f(N)} is the
2955      * index mapping expression
2956      * {@code offset + indexMap[mapOffset + N]]}.
2957      *
2958      * @param species species of desired vector
2959      * @param a the array
2960      * @param offset the offset into the array, may be negative if relative
2961      * indexes in the index map compensate to produce a value within the
2962      * array bounds
2963      * @param indexMap the index map
2964      * @param mapOffset the offset into the index map
2965      * @return the vector loaded from the indexed elements of the array
2966      * @throws IndexOutOfBoundsException
2967      *         if {@code mapOffset+N < 0}
2968      *         or if {@code mapOffset+N >= indexMap.length},
2969      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2970      *         is an invalid index into {@code a},
2971      *         for any lane {@code N} in the vector
2972      * @see ByteVector#toIntArray()
2973      */
2974     @ForceInline
2975     public static
2976     ByteVector fromArray(VectorSpecies<Byte> species,
2977                                    byte[] a, int offset,
2978                                    int[] indexMap, int mapOffset) {
2979         ByteSpecies vsp = (ByteSpecies) species;
2980         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
2981     }
2982 
2983     /**
2984      * Gathers a new vector composed of elements from an array of type
2985      * {@code byte[]},
2986      * under the control of a mask, and
2987      * using indexes obtained by adding a fixed {@code offset} to a
2988      * series of secondary offsets from an <em>index map</em>.
2989      * The index map is a contiguous sequence of {@code VLENGTH}
2990      * elements in a second array of {@code int}s, starting at a given
2991      * {@code mapOffset}.
2992      * <p>
2993      * For each vector lane, where {@code N} is the vector lane index,
2994      * if the lane is set in the mask,
2995      * the lane is loaded from the array
2996      * element {@code a[f(N)]}, where {@code f(N)} is the
2997      * index mapping expression
2998      * {@code offset + indexMap[mapOffset + N]]}.
2999      * Unset lanes in the resulting vector are set to zero.
3000      *
3001      * @param species species of desired vector
3002      * @param a the array
3003      * @param offset the offset into the array, may be negative if relative
3004      * indexes in the index map compensate to produce a value within the
3005      * array bounds
3006      * @param indexMap the index map
3007      * @param mapOffset the offset into the index map
3008      * @param m the mask controlling lane selection
3009      * @return the vector loaded from the indexed elements of the array
3010      * @throws IndexOutOfBoundsException
3011      *         if {@code mapOffset+N < 0}
3012      *         or if {@code mapOffset+N >= indexMap.length},
3013      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3014      *         is an invalid index into {@code a},
3015      *         for any lane {@code N} in the vector
3016      *         where the mask is set
3017      * @see ByteVector#toIntArray()
3018      */
3019     @ForceInline
3020     public static
3021     ByteVector fromArray(VectorSpecies<Byte> species,
3022                                    byte[] a, int offset,
3023                                    int[] indexMap, int mapOffset,
3024                                    VectorMask<Byte> m) {
3025         ByteSpecies vsp = (ByteSpecies) species;
3026         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
3027     }
3028 
3029 
3030     /**
3031      * Loads a vector from an array of type {@code boolean[]}
3032      * starting at an offset.
3033      * For each vector lane, where {@code N} is the vector lane index, the
3034      * array element at index {@code offset + N}
3035      * is first converted to a {@code byte} value and then
3036      * placed into the resulting vector at lane index {@code N}.
3037      * <p>
3038      * A {@code boolean} value is converted to a {@code byte} value by applying the
3039      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
3040      *
3041      * @param species species of desired vector
3042      * @param a the array
3043      * @param offset the offset into the array
3044      * @return the vector loaded from an array
3045      * @throws IndexOutOfBoundsException
3046      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3047      *         for any lane {@code N} in the vector
3048      */
3049     @ForceInline
3050     public static
3051     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3052                                           boolean[] a, int offset) {
3053         offset = checkFromIndexSize(offset, species.length(), a.length);
3054         ByteSpecies vsp = (ByteSpecies) species;
3055         return vsp.dummyVector().fromBooleanArray0(a, offset);
3056     }
3057 
3058     /**
3059      * Loads a vector from an array of type {@code boolean[]}
3060      * starting at an offset and using a mask.
3061      * Lanes where the mask is unset are filled with the default
3062      * value of {@code byte} (zero).
3063      * For each vector lane, where {@code N} is the vector lane index,
3064      * if the mask lane at index {@code N} is set then the array element at
3065      * index {@code offset + N}
3066      * is first converted to a {@code byte} value and then
3067      * placed into the resulting vector at lane index
3068      * {@code N}, otherwise the default element value is placed into the
3069      * resulting vector at lane index {@code N}.
3070      * <p>
3071      * A {@code boolean} value is converted to a {@code byte} value by applying the
3072      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
3073      *
3074      * @param species species of desired vector
3075      * @param a the array
3076      * @param offset the offset into the array
3077      * @param m the mask controlling lane selection
3078      * @return the vector loaded from an array
3079      * @throws IndexOutOfBoundsException
3080      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3081      *         for any lane {@code N} in the vector
3082      *         where the mask is set
3083      */
3084     @ForceInline
3085     public static
3086     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3087                                           boolean[] a, int offset,
3088                                           VectorMask<Byte> m) {
3089         ByteSpecies vsp = (ByteSpecies) species;
3090         if (offset >= 0 && offset <= (a.length - species.length())) {
3091             ByteVector zero = vsp.zero();
3092             return vsp.dummyVector().fromBooleanArray0(a, offset, m);
3093         }
3094 
3095         // FIXME: optimize
3096         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3097         return vsp.vOp(m, i -> (byte) (a[offset + i] ? 1 : 0));
3098     }
3099 
3100     /**
3101      * Gathers a new vector composed of elements from an array of type
3102      * {@code boolean[]},
3103      * using indexes obtained by adding a fixed {@code offset} to a
3104      * series of secondary offsets from an <em>index map</em>.
3105      * The index map is a contiguous sequence of {@code VLENGTH}
3106      * elements in a second array of {@code int}s, starting at a given
3107      * {@code mapOffset}.
3108      * <p>
3109      * For each vector lane, where {@code N} is the vector lane index,
3110      * the lane is loaded from the expression
3111      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
3112      * index mapping expression
3113      * {@code offset + indexMap[mapOffset + N]]}.
3114      *
3115      * @param species species of desired vector
3116      * @param a the array
3117      * @param offset the offset into the array, may be negative if relative
3118      * indexes in the index map compensate to produce a value within the
3119      * array bounds
3120      * @param indexMap the index map
3121      * @param mapOffset the offset into the index map
3122      * @return the vector loaded from the indexed elements of the array
3123      * @throws IndexOutOfBoundsException
3124      *         if {@code mapOffset+N < 0}
3125      *         or if {@code mapOffset+N >= indexMap.length},
3126      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3127      *         is an invalid index into {@code a},
3128      *         for any lane {@code N} in the vector
3129      * @see ByteVector#toIntArray()
3130      */
3131     @ForceInline
3132     public static
3133     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3134                                           boolean[] a, int offset,
3135                                           int[] indexMap, int mapOffset) {
3136         // FIXME: optimize
3137         ByteSpecies vsp = (ByteSpecies) species;
3138         return vsp.vOp(n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
3139     }
3140 
3141     /**
3142      * Gathers a new vector composed of elements from an array of type
3143      * {@code boolean[]},
3144      * under the control of a mask, and
3145      * using indexes obtained by adding a fixed {@code offset} to a
3146      * series of secondary offsets from an <em>index map</em>.
3147      * The index map is a contiguous sequence of {@code VLENGTH}
3148      * elements in a second array of {@code int}s, starting at a given
3149      * {@code mapOffset}.
3150      * <p>
3151      * For each vector lane, where {@code N} is the vector lane index,
3152      * if the lane is set in the mask,
3153      * the lane is loaded from the expression
3154      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
3155      * index mapping expression
3156      * {@code offset + indexMap[mapOffset + N]]}.
3157      * Unset lanes in the resulting vector are set to zero.
3158      *
3159      * @param species species of desired vector
3160      * @param a the array
3161      * @param offset the offset into the array, may be negative if relative
3162      * indexes in the index map compensate to produce a value within the
3163      * array bounds
3164      * @param indexMap the index map
3165      * @param mapOffset the offset into the index map
3166      * @param m the mask controlling lane selection
3167      * @return the vector loaded from the indexed elements of the array
3168      * @throws IndexOutOfBoundsException
3169      *         if {@code mapOffset+N < 0}
3170      *         or if {@code mapOffset+N >= indexMap.length},
3171      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3172      *         is an invalid index into {@code a},
3173      *         for any lane {@code N} in the vector
3174      *         where the mask is set
3175      * @see ByteVector#toIntArray()
3176      */
3177     @ForceInline
3178     public static
3179     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3180                                           boolean[] a, int offset,
3181                                           int[] indexMap, int mapOffset,
3182                                           VectorMask<Byte> m) {
3183         // FIXME: optimize
3184         ByteSpecies vsp = (ByteSpecies) species;
3185         return vsp.vOp(m, n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
3186     }
3187 
3188     /**
3189      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3190      * starting at an offset into the byte buffer.
3191      * Bytes are composed into primitive lane elements according
3192      * to the specified byte order.
3193      * The vector is arranged into lanes according to
3194      * <a href="Vector.html#lane-order">memory ordering</a>.
3195      * <p>
3196      * This method behaves as if it returns the result of calling
3197      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3198      * fromByteBuffer()} as follows:
3199      * <pre>{@code
3200      * var m = species.maskAll(true);
3201      * return fromByteBuffer(species, bb, offset, bo, m);
3202      * }</pre>
3203      *
3204      * @param species species of desired vector
3205      * @param bb the byte buffer
3206      * @param offset the offset into the byte buffer
3207      * @param bo the intended byte order
3208      * @return a vector loaded from a byte buffer
3209      * @throws IndexOutOfBoundsException
3210      *         if {@code offset+N*1 < 0}
3211      *         or {@code offset+N*1 >= bb.limit()}
3212      *         for any lane {@code N} in the vector
3213      */
3214     @ForceInline
3215     public static
3216     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3217                                         ByteBuffer bb, int offset,
3218                                         ByteOrder bo) {
3219         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
3220         ByteSpecies vsp = (ByteSpecies) species;
3221         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
3222     }
3223 
3224     /**
3225      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3226      * starting at an offset into the byte buffer
3227      * and using a mask.
3228      * Lanes where the mask is unset are filled with the default
3229      * value of {@code byte} (zero).
3230      * Bytes are composed into primitive lane elements according
3231      * to the specified byte order.
3232      * The vector is arranged into lanes according to
3233      * <a href="Vector.html#lane-order">memory ordering</a>.
3234      * <p>
3235      * The following pseudocode illustrates the behavior:
3236      * <pre>{@code
3237      * ByteBuffer eb = bb.duplicate()
3238      *     .position(offset);
3239      * byte[] ar = new byte[species.length()];
3240      * for (int n = 0; n < ar.length; n++) {
3241      *     if (m.laneIsSet(n)) {
3242      *         ar[n] = eb.get(n);
3243      *     }
3244      * }
3245      * ByteVector r = ByteVector.fromArray(species, ar, 0);
3246      * }</pre>
3247      * @implNote
3248      * The byte order argument is ignored.
3249      *
3250      * @param species species of desired vector
3251      * @param bb the byte buffer
3252      * @param offset the offset into the byte buffer
3253      * @param bo the intended byte order
3254      * @param m the mask controlling lane selection
3255      * @return a vector loaded from a byte buffer
3256      * @throws IndexOutOfBoundsException
3257      *         if {@code offset+N*1 < 0}
3258      *         or {@code offset+N*1 >= bb.limit()}
3259      *         for any lane {@code N} in the vector
3260      *         where the mask is set
3261      */
3262     @ForceInline
3263     public static
3264     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3265                                         ByteBuffer bb, int offset,
3266                                         ByteOrder bo,
3267                                         VectorMask<Byte> m) {
3268         ByteSpecies vsp = (ByteSpecies) species;
3269         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
3270             return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
3271         }
3272 
3273         // FIXME: optimize
3274         checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3275         ByteBuffer wb = wrapper(bb, bo);
3276         return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
3277                    (wb_, o, i)  -> wb_.get(o + i * 1));
3278     }
3279 
3280     // Memory store operations
3281 
3282     /**
3283      * Stores this vector into an array of type {@code byte[]}
3284      * starting at an offset.
3285      * <p>
3286      * For each vector lane, where {@code N} is the vector lane index,
3287      * the lane element at index {@code N} is stored into the array
3288      * element {@code a[offset+N]}.
3289      *
3290      * @param a the array, of type {@code byte[]}
3291      * @param offset the offset into the array
3292      * @throws IndexOutOfBoundsException
3293      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3294      *         for any lane {@code N} in the vector
3295      */
3296     @ForceInline
3297     public final
3298     void intoArray(byte[] a, int offset) {
3299         offset = checkFromIndexSize(offset, length(), a.length);
3300         ByteSpecies vsp = vspecies();
3301         VectorSupport.store(
3302             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3303             a, arrayAddress(a, offset),
3304             this,
3305             a, offset,
3306             (arr, off, v)
3307             -> v.stOp(arr, off,
3308                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3309     }
3310 
3311     /**
3312      * Stores this vector into an array of type {@code byte[]}
3313      * starting at offset and using a mask.
3314      * <p>
3315      * For each vector lane, where {@code N} is the vector lane index,
3316      * the lane element at index {@code N} is stored into the array
3317      * element {@code a[offset+N]}.
3318      * If the mask lane at {@code N} is unset then the corresponding
3319      * array element {@code a[offset+N]} is left unchanged.
3320      * <p>
3321      * Array range checking is done for lanes where the mask is set.
3322      * Lanes where the mask is unset are not stored and do not need
3323      * to correspond to legitimate elements of {@code a}.
3324      * That is, unset lanes may correspond to array indexes less than
3325      * zero or beyond the end of the array.
3326      *
3327      * @param a the array, of type {@code byte[]}
3328      * @param offset the offset into the array
3329      * @param m the mask controlling lane storage
3330      * @throws IndexOutOfBoundsException
3331      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3332      *         for any lane {@code N} in the vector
3333      *         where the mask is set
3334      */
3335     @ForceInline
3336     public final
3337     void intoArray(byte[] a, int offset,
3338                    VectorMask<Byte> m) {
3339         if (m.allTrue()) {
3340             intoArray(a, offset);
3341         } else {
3342             ByteSpecies vsp = vspecies();
3343             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3344             intoArray0(a, offset, m);
3345         }
3346     }
3347 
3348     /**
3349      * Scatters this vector into an array of type {@code byte[]}
3350      * using indexes obtained by adding a fixed {@code offset} to a
3351      * series of secondary offsets from an <em>index map</em>.
3352      * The index map is a contiguous sequence of {@code VLENGTH}
3353      * elements in a second array of {@code int}s, starting at a given
3354      * {@code mapOffset}.
3355      * <p>
3356      * For each vector lane, where {@code N} is the vector lane index,
3357      * the lane element at index {@code N} is stored into the array
3358      * element {@code a[f(N)]}, where {@code f(N)} is the
3359      * index mapping expression
3360      * {@code offset + indexMap[mapOffset + N]]}.
3361      *
3362      * @param a the array
3363      * @param offset an offset to combine with the index map offsets
3364      * @param indexMap the index map
3365      * @param mapOffset the offset into the index map
3366      * @throws IndexOutOfBoundsException
3367      *         if {@code mapOffset+N < 0}
3368      *         or if {@code mapOffset+N >= indexMap.length},
3369      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3370      *         is an invalid index into {@code a},
3371      *         for any lane {@code N} in the vector
3372      * @see ByteVector#toIntArray()
3373      */
3374     @ForceInline
3375     public final
3376     void intoArray(byte[] a, int offset,
3377                    int[] indexMap, int mapOffset) {
3378         stOp(a, offset,
3379              (arr, off, i, e) -> {
3380                  int j = indexMap[mapOffset + i];
3381                  arr[off + j] = e;
3382              });
3383     }
3384 
3385     /**
3386      * Scatters this vector into an array of type {@code byte[]},
3387      * under the control of a mask, and
3388      * using indexes obtained by adding a fixed {@code offset} to a
3389      * series of secondary offsets from an <em>index map</em>.
3390      * The index map is a contiguous sequence of {@code VLENGTH}
3391      * elements in a second array of {@code int}s, starting at a given
3392      * {@code mapOffset}.
3393      * <p>
3394      * For each vector lane, where {@code N} is the vector lane index,
3395      * if the mask lane at index {@code N} is set then
3396      * the lane element at index {@code N} is stored into the array
3397      * element {@code a[f(N)]}, where {@code f(N)} is the
3398      * index mapping expression
3399      * {@code offset + indexMap[mapOffset + N]]}.
3400      *
3401      * @param a the array
3402      * @param offset an offset to combine with the index map offsets
3403      * @param indexMap the index map
3404      * @param mapOffset the offset into the index map
3405      * @param m the mask
3406      * @throws IndexOutOfBoundsException
3407      *         if {@code mapOffset+N < 0}
3408      *         or if {@code mapOffset+N >= indexMap.length},
3409      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3410      *         is an invalid index into {@code a},
3411      *         for any lane {@code N} in the vector
3412      *         where the mask is set
3413      * @see ByteVector#toIntArray()
3414      */
3415     @ForceInline
3416     public final
3417     void intoArray(byte[] a, int offset,
3418                    int[] indexMap, int mapOffset,
3419                    VectorMask<Byte> m) {
3420         stOp(a, offset, m,
3421              (arr, off, i, e) -> {
3422                  int j = indexMap[mapOffset + i];
3423                  arr[off + j] = e;
3424              });
3425     }
3426 
3427 
3428     /**
3429      * Stores this vector into an array of type {@code boolean[]}
3430      * starting at an offset.
3431      * <p>
3432      * For each vector lane, where {@code N} is the vector lane index,
3433      * the lane element at index {@code N}
3434      * is first converted to a {@code boolean} value and then
3435      * stored into the array element {@code a[offset+N]}.
3436      * <p>
3437      * A {@code byte} value is converted to a {@code boolean} value by applying the
3438      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3439      *
3440      * @param a the array
3441      * @param offset the offset into the array
3442      * @throws IndexOutOfBoundsException
3443      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3444      *         for any lane {@code N} in the vector
3445      */
3446     @ForceInline
3447     public final
3448     void intoBooleanArray(boolean[] a, int offset) {
3449         offset = checkFromIndexSize(offset, length(), a.length);
3450         ByteSpecies vsp = vspecies();
3451         ByteVector normalized = this.and((byte) 1);
3452         VectorSupport.store(
3453             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3454             a, booleanArrayAddress(a, offset),
3455             normalized,
3456             a, offset,
3457             (arr, off, v)
3458             -> v.stOp(arr, off,
3459                       (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
3460     }
3461 
3462     /**
3463      * Stores this vector into an array of type {@code boolean[]}
3464      * starting at offset and using a mask.
3465      * <p>
3466      * For each vector lane, where {@code N} is the vector lane index,
3467      * the lane element at index {@code N}
3468      * is first converted to a {@code boolean} value and then
3469      * stored into the array element {@code a[offset+N]}.
3470      * If the mask lane at {@code N} is unset then the corresponding
3471      * array element {@code a[offset+N]} is left unchanged.
3472      * <p>
3473      * A {@code byte} value is converted to a {@code boolean} value by applying the
3474      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3475      * <p>
3476      * Array range checking is done for lanes where the mask is set.
3477      * Lanes where the mask is unset are not stored and do not need
3478      * to correspond to legitimate elements of {@code a}.
3479      * That is, unset lanes may correspond to array indexes less than
3480      * zero or beyond the end of the array.
3481      *
3482      * @param a the array
3483      * @param offset the offset into the array
3484      * @param m the mask controlling lane storage
3485      * @throws IndexOutOfBoundsException
3486      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3487      *         for any lane {@code N} in the vector
3488      *         where the mask is set
3489      */
3490     @ForceInline
3491     public final
3492     void intoBooleanArray(boolean[] a, int offset,
3493                           VectorMask<Byte> m) {
3494         if (m.allTrue()) {
3495             intoBooleanArray(a, offset);
3496         } else {
3497             ByteSpecies vsp = vspecies();
3498             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3499             intoBooleanArray0(a, offset, m);
3500         }
3501     }
3502 
3503     /**
3504      * Scatters this vector into an array of type {@code boolean[]}
3505      * using indexes obtained by adding a fixed {@code offset} to a
3506      * series of secondary offsets from an <em>index map</em>.
3507      * The index map is a contiguous sequence of {@code VLENGTH}
3508      * elements in a second array of {@code int}s, starting at a given
3509      * {@code mapOffset}.
3510      * <p>
3511      * For each vector lane, where {@code N} is the vector lane index,
3512      * the lane element at index {@code N}
3513      * is first converted to a {@code boolean} value and then
3514      * stored into the array
3515      * element {@code a[f(N)]}, where {@code f(N)} is the
3516      * index mapping expression
3517      * {@code offset + indexMap[mapOffset + N]]}.
3518      * <p>
3519      * A {@code byte} value is converted to a {@code boolean} value by applying the
3520      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3521      *
3522      * @param a the array
3523      * @param offset an offset to combine with the index map offsets
3524      * @param indexMap the index map
3525      * @param mapOffset the offset into the index map
3526      * @throws IndexOutOfBoundsException
3527      *         if {@code mapOffset+N < 0}
3528      *         or if {@code mapOffset+N >= indexMap.length},
3529      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3530      *         is an invalid index into {@code a},
3531      *         for any lane {@code N} in the vector
3532      * @see ByteVector#toIntArray()
3533      */
3534     @ForceInline
3535     public final
3536     void intoBooleanArray(boolean[] a, int offset,
3537                           int[] indexMap, int mapOffset) {
3538         // FIXME: optimize
3539         stOp(a, offset,
3540              (arr, off, i, e) -> {
3541                  int j = indexMap[mapOffset + i];
3542                  arr[off + j] = (e & 1) != 0;
3543              });
3544     }
3545 
3546     /**
3547      * Scatters this vector into an array of type {@code boolean[]},
3548      * under the control of a mask, and
3549      * using indexes obtained by adding a fixed {@code offset} to a
3550      * series of secondary offsets from an <em>index map</em>.
3551      * The index map is a contiguous sequence of {@code VLENGTH}
3552      * elements in a second array of {@code int}s, starting at a given
3553      * {@code mapOffset}.
3554      * <p>
3555      * For each vector lane, where {@code N} is the vector lane index,
3556      * if the mask lane at index {@code N} is set then
3557      * the lane element at index {@code N}
3558      * is first converted to a {@code boolean} value and then
3559      * stored into the array
3560      * element {@code a[f(N)]}, where {@code f(N)} is the
3561      * index mapping expression
3562      * {@code offset + indexMap[mapOffset + N]]}.
3563      * <p>
3564      * A {@code byte} value is converted to a {@code boolean} value by applying the
3565      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3566      *
3567      * @param a the array
3568      * @param offset an offset to combine with the index map offsets
3569      * @param indexMap the index map
3570      * @param mapOffset the offset into the index map
3571      * @param m the mask
3572      * @throws IndexOutOfBoundsException
3573      *         if {@code mapOffset+N < 0}
3574      *         or if {@code mapOffset+N >= indexMap.length},
3575      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3576      *         is an invalid index into {@code a},
3577      *         for any lane {@code N} in the vector
3578      *         where the mask is set
3579      * @see ByteVector#toIntArray()
3580      */
3581     @ForceInline
3582     public final
3583     void intoBooleanArray(boolean[] a, int offset,
3584                           int[] indexMap, int mapOffset,
3585                           VectorMask<Byte> m) {
3586         // FIXME: optimize
3587         stOp(a, offset, m,
3588              (arr, off, i, e) -> {
3589                  int j = indexMap[mapOffset + i];
3590                  arr[off + j] = (e & 1) != 0;
3591              });
3592     }
3593 
3594     /**
3595      * {@inheritDoc} <!--workaround-->
3596      */
3597     @Override
3598     @ForceInline
3599     public final
3600     void intoByteArray(byte[] a, int offset,
3601                        ByteOrder bo) {
3602         offset = checkFromIndexSize(offset, byteSize(), a.length);
3603         maybeSwap(bo).intoByteArray0(a, offset);
3604     }
3605 
3606     /**
3607      * {@inheritDoc} <!--workaround-->
3608      */
3609     @Override
3610     @ForceInline
3611     public final
3612     void intoByteArray(byte[] a, int offset,
3613                        ByteOrder bo,
3614                        VectorMask<Byte> m) {
3615         if (m.allTrue()) {
3616             intoByteArray(a, offset, bo);
3617         } else {
3618             ByteSpecies vsp = vspecies();
3619             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3620             maybeSwap(bo).intoByteArray0(a, offset, m);
3621         }
3622     }
3623 
3624     /**
3625      * {@inheritDoc} <!--workaround-->
3626      */
3627     @Override
3628     @ForceInline
3629     public final
3630     void intoByteBuffer(ByteBuffer bb, int offset,
3631                         ByteOrder bo) {
3632         if (ScopedMemoryAccess.isReadOnly(bb)) {
3633             throw new ReadOnlyBufferException();
3634         }
3635         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3636         maybeSwap(bo).intoByteBuffer0(bb, offset);
3637     }
3638 
3639     /**
3640      * {@inheritDoc} <!--workaround-->
3641      */
3642     @Override
3643     @ForceInline
3644     public final
3645     void intoByteBuffer(ByteBuffer bb, int offset,
3646                         ByteOrder bo,
3647                         VectorMask<Byte> m) {
3648         if (m.allTrue()) {
3649             intoByteBuffer(bb, offset, bo);
3650         } else {
3651             if (bb.isReadOnly()) {
3652                 throw new ReadOnlyBufferException();
3653             }
3654             ByteSpecies vsp = vspecies();
3655             checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3656             maybeSwap(bo).intoByteBuffer0(bb, offset, m);
3657         }
3658     }
3659 
3660     // ================================================
3661 
3662     // Low-level memory operations.
3663     //
3664     // Note that all of these operations *must* inline into a context
3665     // where the exact species of the involved vector is a
3666     // compile-time constant.  Otherwise, the intrinsic generation
3667     // will fail and performance will suffer.
3668     //
3669     // In many cases this is achieved by re-deriving a version of the
3670     // method in each concrete subclass (per species).  The re-derived
3671     // method simply calls one of these generic methods, with exact
3672     // parameters for the controlling metadata, which is either a
3673     // typed vector or constant species instance.
3674 
3675     // Unchecked loading operations in native byte order.
3676     // Caller is responsible for applying index checks, masking, and
3677     // byte swapping.
3678 
3679     /*package-private*/
3680     abstract
3681     ByteVector fromArray0(byte[] a, int offset);
3682     @ForceInline
3683     final
3684     ByteVector fromArray0Template(byte[] a, int offset) {
3685         ByteSpecies vsp = vspecies();
3686         return VectorSupport.load(
3687             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3688             a, arrayAddress(a, offset),
3689             a, offset, vsp,
3690             (arr, off, s) -> s.ldOp(arr, off,
3691                                     (arr_, off_, i) -> arr_[off_ + i]));
3692     }
3693 
3694     /*package-private*/
3695     abstract
3696     ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m);
3697     @ForceInline
3698     final
3699     <M extends VectorMask<Byte>>
3700     ByteVector fromArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3701         m.check(species());
3702         ByteSpecies vsp = vspecies();
3703         return VectorSupport.loadMasked(
3704             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3705             a, arrayAddress(a, offset), m,
3706             a, offset, vsp,
3707             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3708                                         (arr_, off_, i) -> arr_[off_ + i]));
3709     }
3710 
3711 
3712 
3713     /*package-private*/
3714     abstract
3715     ByteVector fromBooleanArray0(boolean[] a, int offset);
3716     @ForceInline
3717     final
3718     ByteVector fromBooleanArray0Template(boolean[] a, int offset) {
3719         ByteSpecies vsp = vspecies();
3720         return VectorSupport.load(
3721             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3722             a, booleanArrayAddress(a, offset),
3723             a, offset, vsp,
3724             (arr, off, s) -> s.ldOp(arr, off,
3725                                     (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3726     }
3727 
3728     /*package-private*/
3729     abstract
3730     ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m);
3731     @ForceInline
3732     final
3733     <M extends VectorMask<Byte>>
3734     ByteVector fromBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m) {
3735         m.check(species());
3736         ByteSpecies vsp = vspecies();
3737         return VectorSupport.loadMasked(
3738             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3739             a, booleanArrayAddress(a, offset), m,
3740             a, offset, vsp,
3741             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3742                                         (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3743     }
3744 
3745     @Override
3746     abstract
3747     ByteVector fromByteArray0(byte[] a, int offset);
3748     @ForceInline
3749     final
3750     ByteVector fromByteArray0Template(byte[] a, int offset) {
3751         ByteSpecies vsp = vspecies();
3752         return VectorSupport.load(
3753             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3754             a, byteArrayAddress(a, offset),
3755             a, offset, vsp,
3756             (arr, off, s) -> {
3757                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3758                 return s.ldOp(wb, off,
3759                         (wb_, o, i) -> wb_.get(o + i * 1));
3760             });
3761     }
3762 
3763     abstract
3764     ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m);
3765     @ForceInline
3766     final
3767     <M extends VectorMask<Byte>>
3768     ByteVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3769         ByteSpecies vsp = vspecies();
3770         m.check(vsp);
3771         return VectorSupport.loadMasked(
3772             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3773             a, byteArrayAddress(a, offset), m,
3774             a, offset, vsp,
3775             (arr, off, s, vm) -> {
3776                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3777                 return s.ldOp(wb, off, vm,
3778                         (wb_, o, i) -> wb_.get(o + i * 1));
3779             });
3780     }
3781 
3782     abstract
3783     ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
3784     @ForceInline
3785     final
3786     ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3787         ByteSpecies vsp = vspecies();
3788         return ScopedMemoryAccess.loadFromByteBuffer(
3789                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3790                 bb, offset, vsp,
3791                 (buf, off, s) -> {
3792                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3793                     return s.ldOp(wb, off,
3794                             (wb_, o, i) -> wb_.get(o + i * 1));
3795                 });
3796     }
3797 
3798     abstract
3799     ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
3800     @ForceInline
3801     final
3802     <M extends VectorMask<Byte>>
3803     ByteVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3804         ByteSpecies vsp = vspecies();
3805         m.check(vsp);
3806         return ScopedMemoryAccess.loadFromByteBufferMasked(
3807                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3808                 bb, offset, m, vsp,
3809                 (buf, off, s, vm) -> {
3810                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3811                     return s.ldOp(wb, off, vm,
3812                             (wb_, o, i) -> wb_.get(o + i * 1));
3813                 });
3814     }
3815 
3816     // Unchecked storing operations in native byte order.
3817     // Caller is responsible for applying index checks, masking, and
3818     // byte swapping.
3819 
3820     abstract
3821     void intoArray0(byte[] a, int offset);
3822     @ForceInline
3823     final
3824     void intoArray0Template(byte[] a, int offset) {
3825         ByteSpecies vsp = vspecies();
3826         VectorSupport.store(
3827             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3828             a, arrayAddress(a, offset),
3829             this, a, offset,
3830             (arr, off, v)
3831             -> v.stOp(arr, off,
3832                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3833     }
3834 
3835     abstract
3836     void intoArray0(byte[] a, int offset, VectorMask<Byte> m);
3837     @ForceInline
3838     final
3839     <M extends VectorMask<Byte>>
3840     void intoArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3841         m.check(species());
3842         ByteSpecies vsp = vspecies();
3843         VectorSupport.storeMasked(
3844             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3845             a, arrayAddress(a, offset),
3846             this, m, a, offset,
3847             (arr, off, v, vm)
3848             -> v.stOp(arr, off, vm,
3849                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3850     }
3851 
3852 
3853     abstract
3854     void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m);
3855     @ForceInline
3856     final
3857     <M extends VectorMask<Byte>>
3858     void intoBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m) {
3859         m.check(species());
3860         ByteSpecies vsp = vspecies();
3861         ByteVector normalized = this.and((byte) 1);
3862         VectorSupport.storeMasked(
3863             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3864             a, booleanArrayAddress(a, offset),
3865             normalized, m, a, offset,
3866             (arr, off, v, vm)
3867             -> v.stOp(arr, off, vm,
3868                       (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
3869     }
3870 
3871     abstract
3872     void intoByteArray0(byte[] a, int offset);
3873     @ForceInline
3874     final
3875     void intoByteArray0Template(byte[] a, int offset) {
3876         ByteSpecies vsp = vspecies();
3877         VectorSupport.store(
3878             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3879             a, byteArrayAddress(a, offset),
3880             this, a, offset,
3881             (arr, off, v) -> {
3882                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3883                 v.stOp(wb, off,
3884                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
3885             });
3886     }
3887 
3888     abstract
3889     void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m);
3890     @ForceInline
3891     final
3892     <M extends VectorMask<Byte>>
3893     void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3894         ByteSpecies vsp = vspecies();
3895         m.check(vsp);
3896         VectorSupport.storeMasked(
3897             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3898             a, byteArrayAddress(a, offset),
3899             this, m, a, offset,
3900             (arr, off, v, vm) -> {
3901                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3902                 v.stOp(wb, off, vm,
3903                         (tb_, o, i, e) -> tb_.put(o + i * 1, e));
3904             });
3905     }
3906 
3907     @ForceInline
3908     final
3909     void intoByteBuffer0(ByteBuffer bb, int offset) {
3910         ByteSpecies vsp = vspecies();
3911         ScopedMemoryAccess.storeIntoByteBuffer(
3912                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3913                 this, bb, offset,
3914                 (buf, off, v) -> {
3915                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3916                     v.stOp(wb, off,
3917                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3918                 });
3919     }
3920 
3921     abstract
3922     void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
3923     @ForceInline
3924     final
3925     <M extends VectorMask<Byte>>
3926     void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3927         ByteSpecies vsp = vspecies();
3928         m.check(vsp);
3929         ScopedMemoryAccess.storeIntoByteBufferMasked(
3930                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3931                 this, m, bb, offset,
3932                 (buf, off, v, vm) -> {
3933                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3934                     v.stOp(wb, off, vm,
3935                             (wb_, o, i, e) -> wb_.put(o + i * 1, e));
3936                 });
3937     }
3938 
3939 
3940     // End of low-level memory operations.
3941 
3942     private static
3943     void checkMaskFromIndexSize(int offset,
3944                                 ByteSpecies vsp,
3945                                 VectorMask<Byte> m,
3946                                 int scale,
3947                                 int limit) {
3948         ((AbstractMask<Byte>)m)
3949             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3950     }
3951 
3952     @ForceInline
3953     private void conditionalStoreNYI(int offset,
3954                                      ByteSpecies vsp,
3955                                      VectorMask<Byte> m,
3956                                      int scale,
3957                                      int limit) {
3958         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3959             String msg =
3960                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3961                               offset, limit, m, vsp);
3962             throw new AssertionError(msg);
3963         }
3964     }
3965 
3966     /*package-private*/
3967     @Override
3968     @ForceInline
3969     final
3970     ByteVector maybeSwap(ByteOrder bo) {
3971         return this;
3972     }
3973 
3974     static final int ARRAY_SHIFT =
3975         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE);
3976     static final long ARRAY_BASE =
3977         Unsafe.ARRAY_BYTE_BASE_OFFSET;
3978 
3979     @ForceInline
3980     static long arrayAddress(byte[] a, int index) {
3981         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3982     }
3983 
3984 
3985     static final int ARRAY_BOOLEAN_SHIFT =
3986             31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BOOLEAN_INDEX_SCALE);
3987     static final long ARRAY_BOOLEAN_BASE =
3988             Unsafe.ARRAY_BOOLEAN_BASE_OFFSET;
3989 
3990     @ForceInline
3991     static long booleanArrayAddress(boolean[] a, int index) {
3992         return ARRAY_BOOLEAN_BASE + (((long)index) << ARRAY_BOOLEAN_SHIFT);
3993     }
3994 
3995     @ForceInline
3996     static long byteArrayAddress(byte[] a, int index) {
3997         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3998     }
3999 
4000     // ================================================
4001 
4002     /// Reinterpreting view methods:
4003     //   lanewise reinterpret: viewAsXVector()
4004     //   keep shape, redraw lanes: reinterpretAsEs()
4005 
4006     /**
4007      * {@inheritDoc} <!--workaround-->
4008      */
4009     @ForceInline
4010     @Override
4011     public final ByteVector reinterpretAsBytes() {
4012         return this;
4013     }
4014 
4015     /**
4016      * {@inheritDoc} <!--workaround-->
4017      */
4018     @ForceInline
4019     @Override
4020     public final ByteVector viewAsIntegralLanes() {
4021         return this;
4022     }
4023 
4024     /**
4025      * {@inheritDoc} <!--workaround-->
4026      *
4027      * @implNote This method always throws
4028      * {@code UnsupportedOperationException}, because there is no floating
4029      * point type of the same size as {@code byte}.  The return type
4030      * of this method is arbitrarily designated as
4031      * {@code Vector<?>}.  Future versions of this API may change the return
4032      * type if additional floating point types become available.
4033      */
4034     @ForceInline
4035     @Override
4036     public final
4037     Vector<?>
4038     viewAsFloatingLanes() {
4039         LaneType flt = LaneType.BYTE.asFloating();
4040         // asFloating() will throw UnsupportedOperationException for the unsupported type byte
4041         throw new AssertionError("Cannot reach here");
4042     }
4043 
4044     // ================================================
4045 
4046     /// Object methods: toString, equals, hashCode
4047     //
4048     // Object methods are defined as if via Arrays.toString, etc.,
4049     // is applied to the array of elements.  Two equal vectors
4050     // are required to have equal species and equal lane values.
4051 
4052     /**
4053      * Returns a string representation of this vector, of the form
4054      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
4055      * in lane order.
4056      *
4057      * The string is produced as if by a call to {@link
4058      * java.util.Arrays#toString(byte[]) Arrays.toString()},
4059      * as appropriate to the {@code byte} array returned by
4060      * {@link #toArray this.toArray()}.
4061      *
4062      * @return a string of the form {@code "[0,1,2...]"}
4063      * reporting the lane values of this vector
4064      */
4065     @Override
4066     @ForceInline
4067     public final
4068     String toString() {
4069         // now that toArray is strongly typed, we can define this
4070         return Arrays.toString(toArray());
4071     }
4072 
4073     /**
4074      * {@inheritDoc} <!--workaround-->
4075      */
4076     @Override
4077     @ForceInline
4078     public final
4079     boolean equals(Object obj) {
4080         if (obj instanceof Vector) {
4081             Vector<?> that = (Vector<?>) obj;
4082             if (this.species().equals(that.species())) {
4083                 return this.eq(that.check(this.species())).allTrue();
4084             }
4085         }
4086         return false;
4087     }
4088 
4089     /**
4090      * {@inheritDoc} <!--workaround-->
4091      */
4092     @Override
4093     @ForceInline
4094     public final
4095     int hashCode() {
4096         // now that toArray is strongly typed, we can define this
4097         return Objects.hash(species(), Arrays.hashCode(toArray()));
4098     }
4099 
4100     // ================================================
4101 
4102     // Species
4103 
4104     /**
4105      * Class representing {@link ByteVector}'s of the same {@link VectorShape VectorShape}.
4106      */
4107     /*package-private*/
4108     static final class ByteSpecies extends AbstractSpecies<Byte> {
4109         private ByteSpecies(VectorShape shape,
4110                 Class<? extends ByteVector> vectorType,
4111                 Class<? extends AbstractMask<Byte>> maskType,
4112                 Function<Object, ByteVector> vectorFactory) {
4113             super(shape, LaneType.of(byte.class),
4114                   vectorType, maskType,
4115                   vectorFactory);
4116             assert(this.elementSize() == Byte.SIZE);
4117         }
4118 
4119         // Specializing overrides:
4120 
4121         @Override
4122         @ForceInline
4123         public final Class<Byte> elementType() {
4124             return byte.class;
4125         }
4126 
4127         @Override
4128         @ForceInline
4129         final Class<Byte> genericElementType() {
4130             return Byte.class;
4131         }
4132 
4133         @SuppressWarnings("unchecked")
4134         @Override
4135         @ForceInline
4136         public final Class<? extends ByteVector> vectorType() {
4137             return (Class<? extends ByteVector>) vectorType;
4138         }
4139 
4140         @Override
4141         @ForceInline
4142         public final long checkValue(long e) {
4143             longToElementBits(e);  // only for exception
4144             return e;
4145         }
4146 
4147         /*package-private*/
4148         @Override
4149         @ForceInline
4150         final ByteVector broadcastBits(long bits) {
4151             return (ByteVector)
4152                 VectorSupport.broadcastCoerced(
4153                     vectorType, byte.class, laneCount,
4154                     bits, this,
4155                     (bits_, s_) -> s_.rvOp(i -> bits_));
4156         }
4157 
4158         /*package-private*/
4159         @ForceInline
4160         final ByteVector broadcast(byte e) {
4161             return broadcastBits(toBits(e));
4162         }
4163 
4164         @Override
4165         @ForceInline
4166         public final ByteVector broadcast(long e) {
4167             return broadcastBits(longToElementBits(e));
4168         }
4169 
4170         /*package-private*/
4171         final @Override
4172         @ForceInline
4173         long longToElementBits(long value) {
4174             // Do the conversion, and then test it for failure.
4175             byte e = (byte) value;
4176             if ((long) e != value) {
4177                 throw badElementBits(value, e);
4178             }
4179             return toBits(e);
4180         }
4181 
4182         /*package-private*/
4183         @ForceInline
4184         static long toIntegralChecked(byte e, boolean convertToInt) {
4185             long value = convertToInt ? (int) e : (long) e;
4186             if ((byte) value != e) {
4187                 throw badArrayBits(e, convertToInt, value);
4188             }
4189             return value;
4190         }
4191 
4192         /* this non-public one is for internal conversions */
4193         @Override
4194         @ForceInline
4195         final ByteVector fromIntValues(int[] values) {
4196             VectorIntrinsics.requireLength(values.length, laneCount);
4197             byte[] va = new byte[laneCount()];
4198             for (int i = 0; i < va.length; i++) {
4199                 int lv = values[i];
4200                 byte v = (byte) lv;
4201                 va[i] = v;
4202                 if ((int)v != lv) {
4203                     throw badElementBits(lv, v);
4204                 }
4205             }
4206             return dummyVector().fromArray0(va, 0);
4207         }
4208 
4209         // Virtual constructors
4210 
4211         @ForceInline
4212         @Override final
4213         public ByteVector fromArray(Object a, int offset) {
4214             // User entry point:  Be careful with inputs.
4215             return ByteVector
4216                 .fromArray(this, (byte[]) a, offset);
4217         }
4218 
4219         @ForceInline
4220         @Override final
4221         ByteVector dummyVector() {
4222             return (ByteVector) super.dummyVector();
4223         }
4224 
4225         /*package-private*/
4226         final @Override
4227         @ForceInline
4228         ByteVector rvOp(RVOp f) {
4229             byte[] res = new byte[laneCount()];
4230             for (int i = 0; i < res.length; i++) {
4231                 byte bits = (byte) f.apply(i);
4232                 res[i] = fromBits(bits);
4233             }
4234             return dummyVector().vectorFactory(res);
4235         }
4236 
4237         ByteVector vOp(FVOp f) {
4238             byte[] res = new byte[laneCount()];
4239             for (int i = 0; i < res.length; i++) {
4240                 res[i] = f.apply(i);
4241             }
4242             return dummyVector().vectorFactory(res);
4243         }
4244 
4245         ByteVector vOp(VectorMask<Byte> m, FVOp f) {
4246             byte[] res = new byte[laneCount()];
4247             boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
4248             for (int i = 0; i < res.length; i++) {
4249                 if (mbits[i]) {
4250                     res[i] = f.apply(i);
4251                 }
4252             }
4253             return dummyVector().vectorFactory(res);
4254         }
4255 
4256         /*package-private*/
4257         @ForceInline
4258         <M> ByteVector ldOp(M memory, int offset,
4259                                       FLdOp<M> f) {
4260             return dummyVector().ldOp(memory, offset, f);
4261         }
4262 
4263         /*package-private*/
4264         @ForceInline
4265         <M> ByteVector ldOp(M memory, int offset,
4266                                       VectorMask<Byte> m,
4267                                       FLdOp<M> f) {
4268             return dummyVector().ldOp(memory, offset, m, f);
4269         }
4270 
4271         /*package-private*/
4272         @ForceInline
4273         <M> void stOp(M memory, int offset, FStOp<M> f) {
4274             dummyVector().stOp(memory, offset, f);
4275         }
4276 
4277         /*package-private*/
4278         @ForceInline
4279         <M> void stOp(M memory, int offset,
4280                       AbstractMask<Byte> m,
4281                       FStOp<M> f) {
4282             dummyVector().stOp(memory, offset, m, f);
4283         }
4284 
4285         // N.B. Make sure these constant vectors and
4286         // masks load up correctly into registers.
4287         //
4288         // Also, see if we can avoid all that switching.
4289         // Could we cache both vectors and both masks in
4290         // this species object?
4291 
4292         // Zero and iota vector access
4293         @Override
4294         @ForceInline
4295         public final ByteVector zero() {
4296             if ((Class<?>) vectorType() == ByteMaxVector.class)
4297                 return ByteMaxVector.ZERO;
4298             switch (vectorBitSize()) {
4299                 case 64: return Byte64Vector.ZERO;
4300                 case 128: return Byte128Vector.ZERO;
4301                 case 256: return Byte256Vector.ZERO;
4302                 case 512: return Byte512Vector.ZERO;
4303             }
4304             throw new AssertionError();
4305         }
4306 
4307         @Override
4308         @ForceInline
4309         public final ByteVector iota() {
4310             if ((Class<?>) vectorType() == ByteMaxVector.class)
4311                 return ByteMaxVector.IOTA;
4312             switch (vectorBitSize()) {
4313                 case 64: return Byte64Vector.IOTA;
4314                 case 128: return Byte128Vector.IOTA;
4315                 case 256: return Byte256Vector.IOTA;
4316                 case 512: return Byte512Vector.IOTA;
4317             }
4318             throw new AssertionError();
4319         }
4320 
4321         // Mask access
4322         @Override
4323         @ForceInline
4324         public final VectorMask<Byte> maskAll(boolean bit) {
4325             if ((Class<?>) vectorType() == ByteMaxVector.class)
4326                 return ByteMaxVector.ByteMaxMask.maskAll(bit);
4327             switch (vectorBitSize()) {
4328                 case 64: return Byte64Vector.Byte64Mask.maskAll(bit);
4329                 case 128: return Byte128Vector.Byte128Mask.maskAll(bit);
4330                 case 256: return Byte256Vector.Byte256Mask.maskAll(bit);
4331                 case 512: return Byte512Vector.Byte512Mask.maskAll(bit);
4332             }
4333             throw new AssertionError();
4334         }
4335     }
4336 
4337     /**
4338      * Finds a species for an element type of {@code byte} and shape.
4339      *
4340      * @param s the shape
4341      * @return a species for an element type of {@code byte} and shape
4342      * @throws IllegalArgumentException if no such species exists for the shape
4343      */
4344     static ByteSpecies species(VectorShape s) {
4345         Objects.requireNonNull(s);
4346         switch (s) {
4347             case S_64_BIT: return (ByteSpecies) SPECIES_64;
4348             case S_128_BIT: return (ByteSpecies) SPECIES_128;
4349             case S_256_BIT: return (ByteSpecies) SPECIES_256;
4350             case S_512_BIT: return (ByteSpecies) SPECIES_512;
4351             case S_Max_BIT: return (ByteSpecies) SPECIES_MAX;
4352             default: throw new IllegalArgumentException("Bad shape: " + s);
4353         }
4354     }
4355 
4356     /** Species representing {@link ByteVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
4357     public static final VectorSpecies<Byte> SPECIES_64
4358         = new ByteSpecies(VectorShape.S_64_BIT,
4359                             Byte64Vector.class,
4360                             Byte64Vector.Byte64Mask.class,
4361                             Byte64Vector::new);
4362 
4363     /** Species representing {@link ByteVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
4364     public static final VectorSpecies<Byte> SPECIES_128
4365         = new ByteSpecies(VectorShape.S_128_BIT,
4366                             Byte128Vector.class,
4367                             Byte128Vector.Byte128Mask.class,
4368                             Byte128Vector::new);
4369 
4370     /** Species representing {@link ByteVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
4371     public static final VectorSpecies<Byte> SPECIES_256
4372         = new ByteSpecies(VectorShape.S_256_BIT,
4373                             Byte256Vector.class,
4374                             Byte256Vector.Byte256Mask.class,
4375                             Byte256Vector::new);
4376 
4377     /** Species representing {@link ByteVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
4378     public static final VectorSpecies<Byte> SPECIES_512
4379         = new ByteSpecies(VectorShape.S_512_BIT,
4380                             Byte512Vector.class,
4381                             Byte512Vector.Byte512Mask.class,
4382                             Byte512Vector::new);
4383 
4384     /** Species representing {@link ByteVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
4385     public static final VectorSpecies<Byte> SPECIES_MAX
4386         = new ByteSpecies(VectorShape.S_Max_BIT,
4387                             ByteMaxVector.class,
4388                             ByteMaxVector.ByteMaxMask.class,
4389                             ByteMaxVector::new);
4390 
4391     /**
4392      * Preferred species for {@link ByteVector}s.
4393      * A preferred species is a species of maximal bit-size for the platform.
4394      */
4395     public static final VectorSpecies<Byte> SPECIES_PREFERRED
4396         = (ByteSpecies) VectorSpecies.ofPreferred(byte.class);
4397 }