1 /*
   2  * Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteOrder;
  28 import java.util.Arrays;
  29 import java.util.Objects;
  30 import java.util.function.Function;
  31 
  32 import jdk.incubator.foreign.MemorySegment;
  33 import jdk.incubator.foreign.ValueLayout;
  34 import jdk.internal.access.foreign.MemorySegmentProxy;
  35 import jdk.internal.misc.ScopedMemoryAccess;
  36 import jdk.internal.misc.Unsafe;
  37 import jdk.internal.vm.annotation.ForceInline;
  38 import jdk.internal.vm.vector.VectorSupport;
  39 
  40 import static jdk.internal.vm.vector.VectorSupport.*;
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 
  43 import static jdk.incubator.vector.VectorOperators.*;
  44 
  45 // -- This file was mechanically generated: Do not edit! -- //
  46 
  47 /**
  48  * A specialized {@link Vector} representing an ordered immutable sequence of
  49  * {@code byte} values.
  50  */
  51 @SuppressWarnings("cast")  // warning: redundant cast
  52 public abstract class ByteVector extends AbstractVector<Byte> {
  53 
  54     ByteVector(byte[] vec) {
  55         super(vec);
  56     }
  57 
  58     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  59 
  60     static final ValueLayout.OfByte ELEMENT_LAYOUT = ValueLayout.JAVA_BYTE.withBitAlignment(8);
  61 
  62     @ForceInline
  63     static int opCode(Operator op) {
  64         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  65     }
  66     @ForceInline
  67     static int opCode(Operator op, int requireKind) {
  68         requireKind |= VO_OPCODE_VALID;
  69         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  70     }
  71     @ForceInline
  72     static boolean opKind(Operator op, int bit) {
  73         return VectorOperators.opKind(op, bit);
  74     }
  75 
  76     // Virtualized factories and operators,
  77     // coded with portable definitions.
  78     // These are all @ForceInline in case
  79     // they need to be used performantly.
  80     // The various shape-specific subclasses
  81     // also specialize them by wrapping
  82     // them in a call like this:
  83     //    return (Byte128Vector)
  84     //       super.bOp((Byte128Vector) o);
  85     // The purpose of that is to forcibly inline
  86     // the generic definition from this file
  87     // into a sharply type- and size-specific
  88     // wrapper in the subclass file, so that
  89     // the JIT can specialize the code.
  90     // The code is only inlined and expanded
  91     // if it gets hot.  Think of it as a cheap
  92     // and lazy version of C++ templates.
  93 
  94     // Virtualized getter
  95 
  96     /*package-private*/
  97     abstract byte[] vec();
  98 
  99     // Virtualized constructors
 100 
 101     /**
 102      * Build a vector directly using my own constructor.
 103      * It is an error if the array is aliased elsewhere.
 104      */
 105     /*package-private*/
 106     abstract ByteVector vectorFactory(byte[] vec);
 107 
 108     /**
 109      * Build a mask directly using my species.
 110      * It is an error if the array is aliased elsewhere.
 111      */
 112     /*package-private*/
 113     @ForceInline
 114     final
 115     AbstractMask<Byte> maskFactory(boolean[] bits) {
 116         return vspecies().maskFactory(bits);
 117     }
 118 
 119     // Constant loader (takes dummy as vector arg)
 120     interface FVOp {
 121         byte apply(int i);
 122     }
 123 
 124     /*package-private*/
 125     @ForceInline
 126     final
 127     ByteVector vOp(FVOp f) {
 128         byte[] res = new byte[length()];
 129         for (int i = 0; i < res.length; i++) {
 130             res[i] = f.apply(i);
 131         }
 132         return vectorFactory(res);
 133     }
 134 
 135     @ForceInline
 136     final
 137     ByteVector vOp(VectorMask<Byte> m, FVOp f) {
 138         byte[] res = new byte[length()];
 139         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 140         for (int i = 0; i < res.length; i++) {
 141             if (mbits[i]) {
 142                 res[i] = f.apply(i);
 143             }
 144         }
 145         return vectorFactory(res);
 146     }
 147 
 148     // Unary operator
 149 
 150     /*package-private*/
 151     interface FUnOp {
 152         byte apply(int i, byte a);
 153     }
 154 
 155     /*package-private*/
 156     abstract
 157     ByteVector uOp(FUnOp f);
 158     @ForceInline
 159     final
 160     ByteVector uOpTemplate(FUnOp f) {
 161         byte[] vec = vec();
 162         byte[] res = new byte[length()];
 163         for (int i = 0; i < res.length; i++) {
 164             res[i] = f.apply(i, vec[i]);
 165         }
 166         return vectorFactory(res);
 167     }
 168 
 169     /*package-private*/
 170     abstract
 171     ByteVector uOp(VectorMask<Byte> m,
 172                              FUnOp f);
 173     @ForceInline
 174     final
 175     ByteVector uOpTemplate(VectorMask<Byte> m,
 176                                      FUnOp f) {
 177         if (m == null) {
 178             return uOpTemplate(f);
 179         }
 180         byte[] vec = vec();
 181         byte[] res = new byte[length()];
 182         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 183         for (int i = 0; i < res.length; i++) {
 184             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 185         }
 186         return vectorFactory(res);
 187     }
 188 
 189     // Binary operator
 190 
 191     /*package-private*/
 192     interface FBinOp {
 193         byte apply(int i, byte a, byte b);
 194     }
 195 
 196     /*package-private*/
 197     abstract
 198     ByteVector bOp(Vector<Byte> o,
 199                              FBinOp f);
 200     @ForceInline
 201     final
 202     ByteVector bOpTemplate(Vector<Byte> o,
 203                                      FBinOp f) {
 204         byte[] res = new byte[length()];
 205         byte[] vec1 = this.vec();
 206         byte[] vec2 = ((ByteVector)o).vec();
 207         for (int i = 0; i < res.length; i++) {
 208             res[i] = f.apply(i, vec1[i], vec2[i]);
 209         }
 210         return vectorFactory(res);
 211     }
 212 
 213     /*package-private*/
 214     abstract
 215     ByteVector bOp(Vector<Byte> o,
 216                              VectorMask<Byte> m,
 217                              FBinOp f);
 218     @ForceInline
 219     final
 220     ByteVector bOpTemplate(Vector<Byte> o,
 221                                      VectorMask<Byte> m,
 222                                      FBinOp f) {
 223         if (m == null) {
 224             return bOpTemplate(o, f);
 225         }
 226         byte[] res = new byte[length()];
 227         byte[] vec1 = this.vec();
 228         byte[] vec2 = ((ByteVector)o).vec();
 229         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 230         for (int i = 0; i < res.length; i++) {
 231             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 232         }
 233         return vectorFactory(res);
 234     }
 235 
 236     // Ternary operator
 237 
 238     /*package-private*/
 239     interface FTriOp {
 240         byte apply(int i, byte a, byte b, byte c);
 241     }
 242 
 243     /*package-private*/
 244     abstract
 245     ByteVector tOp(Vector<Byte> o1,
 246                              Vector<Byte> o2,
 247                              FTriOp f);
 248     @ForceInline
 249     final
 250     ByteVector tOpTemplate(Vector<Byte> o1,
 251                                      Vector<Byte> o2,
 252                                      FTriOp f) {
 253         byte[] res = new byte[length()];
 254         byte[] vec1 = this.vec();
 255         byte[] vec2 = ((ByteVector)o1).vec();
 256         byte[] vec3 = ((ByteVector)o2).vec();
 257         for (int i = 0; i < res.length; i++) {
 258             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 259         }
 260         return vectorFactory(res);
 261     }
 262 
 263     /*package-private*/
 264     abstract
 265     ByteVector tOp(Vector<Byte> o1,
 266                              Vector<Byte> o2,
 267                              VectorMask<Byte> m,
 268                              FTriOp f);
 269     @ForceInline
 270     final
 271     ByteVector tOpTemplate(Vector<Byte> o1,
 272                                      Vector<Byte> o2,
 273                                      VectorMask<Byte> m,
 274                                      FTriOp f) {
 275         if (m == null) {
 276             return tOpTemplate(o1, o2, f);
 277         }
 278         byte[] res = new byte[length()];
 279         byte[] vec1 = this.vec();
 280         byte[] vec2 = ((ByteVector)o1).vec();
 281         byte[] vec3 = ((ByteVector)o2).vec();
 282         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 283         for (int i = 0; i < res.length; i++) {
 284             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 285         }
 286         return vectorFactory(res);
 287     }
 288 
 289     // Reduction operator
 290 
 291     /*package-private*/
 292     abstract
 293     byte rOp(byte v, VectorMask<Byte> m, FBinOp f);
 294 
 295     @ForceInline
 296     final
 297     byte rOpTemplate(byte v, VectorMask<Byte> m, FBinOp f) {
 298         if (m == null) {
 299             return rOpTemplate(v, f);
 300         }
 301         byte[] vec = vec();
 302         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 303         for (int i = 0; i < vec.length; i++) {
 304             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
 305         }
 306         return v;
 307     }
 308 
 309     @ForceInline
 310     final
 311     byte rOpTemplate(byte v, FBinOp f) {
 312         byte[] vec = vec();
 313         for (int i = 0; i < vec.length; i++) {
 314             v = f.apply(i, v, vec[i]);
 315         }
 316         return v;
 317     }
 318 
 319     // Memory reference
 320 
 321     /*package-private*/
 322     interface FLdOp<M> {
 323         byte apply(M memory, int offset, int i);
 324     }
 325 
 326     /*package-private*/
 327     @ForceInline
 328     final
 329     <M> ByteVector ldOp(M memory, int offset,
 330                                   FLdOp<M> f) {
 331         //dummy; no vec = vec();
 332         byte[] res = new byte[length()];
 333         for (int i = 0; i < res.length; i++) {
 334             res[i] = f.apply(memory, offset, i);
 335         }
 336         return vectorFactory(res);
 337     }
 338 
 339     /*package-private*/
 340     @ForceInline
 341     final
 342     <M> ByteVector ldOp(M memory, int offset,
 343                                   VectorMask<Byte> m,
 344                                   FLdOp<M> f) {
 345         //byte[] vec = vec();
 346         byte[] res = new byte[length()];
 347         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 348         for (int i = 0; i < res.length; i++) {
 349             if (mbits[i]) {
 350                 res[i] = f.apply(memory, offset, i);
 351             }
 352         }
 353         return vectorFactory(res);
 354     }
 355 
 356     /*package-private*/
 357     interface FLdLongOp {
 358         byte apply(MemorySegment memory, long offset, int i);
 359     }
 360 
 361     /*package-private*/
 362     @ForceInline
 363     final
 364     ByteVector ldLongOp(MemorySegment memory, long offset,
 365                                   FLdLongOp f) {
 366         //dummy; no vec = vec();
 367         byte[] res = new byte[length()];
 368         for (int i = 0; i < res.length; i++) {
 369             res[i] = f.apply(memory, offset, i);
 370         }
 371         return vectorFactory(res);
 372     }
 373 
 374     /*package-private*/
 375     @ForceInline
 376     final
 377     ByteVector ldLongOp(MemorySegment memory, long offset,
 378                                   VectorMask<Byte> m,
 379                                   FLdLongOp f) {
 380         //byte[] vec = vec();
 381         byte[] res = new byte[length()];
 382         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 383         for (int i = 0; i < res.length; i++) {
 384             if (mbits[i]) {
 385                 res[i] = f.apply(memory, offset, i);
 386             }
 387         }
 388         return vectorFactory(res);
 389     }
 390 
 391     static byte memorySegmentGet(MemorySegment ms, long o, int i) {
 392         return ms.get(ELEMENT_LAYOUT, o + i * 1L);
 393     }
 394 
 395     interface FStOp<M> {
 396         void apply(M memory, int offset, int i, byte a);
 397     }
 398 
 399     /*package-private*/
 400     @ForceInline
 401     final
 402     <M> void stOp(M memory, int offset,
 403                   FStOp<M> f) {
 404         byte[] vec = vec();
 405         for (int i = 0; i < vec.length; i++) {
 406             f.apply(memory, offset, i, vec[i]);
 407         }
 408     }
 409 
 410     /*package-private*/
 411     @ForceInline
 412     final
 413     <M> void stOp(M memory, int offset,
 414                   VectorMask<Byte> m,
 415                   FStOp<M> f) {
 416         byte[] vec = vec();
 417         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 418         for (int i = 0; i < vec.length; i++) {
 419             if (mbits[i]) {
 420                 f.apply(memory, offset, i, vec[i]);
 421             }
 422         }
 423     }
 424 
 425     interface FStLongOp {
 426         void apply(MemorySegment memory, long offset, int i, byte a);
 427     }
 428 
 429     /*package-private*/
 430     @ForceInline
 431     final
 432     void stLongOp(MemorySegment memory, long offset,
 433                   FStLongOp f) {
 434         byte[] vec = vec();
 435         for (int i = 0; i < vec.length; i++) {
 436             f.apply(memory, offset, i, vec[i]);
 437         }
 438     }
 439 
 440     /*package-private*/
 441     @ForceInline
 442     final
 443     void stLongOp(MemorySegment memory, long offset,
 444                   VectorMask<Byte> m,
 445                   FStLongOp f) {
 446         byte[] vec = vec();
 447         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 448         for (int i = 0; i < vec.length; i++) {
 449             if (mbits[i]) {
 450                 f.apply(memory, offset, i, vec[i]);
 451             }
 452         }
 453     }
 454 
 455     static void memorySegmentSet(MemorySegment ms, long o, int i, byte e) {
 456         ms.set(ELEMENT_LAYOUT, o + i * 1L, e);
 457     }
 458 
 459     // Binary test
 460 
 461     /*package-private*/
 462     interface FBinTest {
 463         boolean apply(int cond, int i, byte a, byte b);
 464     }
 465 
 466     /*package-private*/
 467     @ForceInline
 468     final
 469     AbstractMask<Byte> bTest(int cond,
 470                                   Vector<Byte> o,
 471                                   FBinTest f) {
 472         byte[] vec1 = vec();
 473         byte[] vec2 = ((ByteVector)o).vec();
 474         boolean[] bits = new boolean[length()];
 475         for (int i = 0; i < length(); i++){
 476             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 477         }
 478         return maskFactory(bits);
 479     }
 480 
 481     /*package-private*/
 482     @ForceInline
 483     static byte rotateLeft(byte a, int n) {
 484         return (byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (Byte.SIZE - (n & Byte.SIZE-1))));
 485     }
 486 
 487     /*package-private*/
 488     @ForceInline
 489     static byte rotateRight(byte a, int n) {
 490         return (byte)(((((byte)a) & Byte.toUnsignedInt((byte)-1)) >>> (n & Byte.SIZE-1)) | ((((byte)a) & Byte.toUnsignedInt((byte)-1)) << (Byte.SIZE - (n & Byte.SIZE-1))));
 491     }
 492 
 493     /*package-private*/
 494     @Override
 495     abstract ByteSpecies vspecies();
 496 
 497     /*package-private*/
 498     @ForceInline
 499     static long toBits(byte e) {
 500         return  e;
 501     }
 502 
 503     /*package-private*/
 504     @ForceInline
 505     static byte fromBits(long bits) {
 506         return ((byte)bits);
 507     }
 508 
 509     static ByteVector expandHelper(Vector<Byte> v, VectorMask<Byte> m) {
 510         VectorSpecies<Byte> vsp = m.vectorSpecies();
 511         ByteVector r  = (ByteVector) vsp.zero();
 512         ByteVector vi = (ByteVector) v;
 513         if (m.allTrue()) {
 514             return vi;
 515         }
 516         for (int i = 0, j = 0; i < vsp.length(); i++) {
 517             if (m.laneIsSet(i)) {
 518                 r = r.withLane(i, vi.lane(j++));
 519             }
 520         }
 521         return r;
 522     }
 523 
 524     static ByteVector compressHelper(Vector<Byte> v, VectorMask<Byte> m) {
 525         VectorSpecies<Byte> vsp = m.vectorSpecies();
 526         ByteVector r  = (ByteVector) vsp.zero();
 527         ByteVector vi = (ByteVector) v;
 528         if (m.allTrue()) {
 529             return vi;
 530         }
 531         for (int i = 0, j = 0; i < vsp.length(); i++) {
 532             if (m.laneIsSet(i)) {
 533                 r = r.withLane(j++, vi.lane(i));
 534             }
 535         }
 536         return r;
 537     }
 538 
 539     // Static factories (other than memory operations)
 540 
 541     // Note: A surprising behavior in javadoc
 542     // sometimes makes a lone /** {@inheritDoc} */
 543     // comment drop the method altogether,
 544     // apparently if the method mentions an
 545     // parameter or return type of Vector<Byte>
 546     // instead of Vector<E> as originally specified.
 547     // Adding an empty HTML fragment appears to
 548     // nudge javadoc into providing the desired
 549     // inherited documentation.  We use the HTML
 550     // comment <!--workaround--> for this.
 551 
 552     /**
 553      * Returns a vector of the given species
 554      * where all lane elements are set to
 555      * zero, the default primitive value.
 556      *
 557      * @param species species of the desired zero vector
 558      * @return a zero vector
 559      */
 560     @ForceInline
 561     public static ByteVector zero(VectorSpecies<Byte> species) {
 562         ByteSpecies vsp = (ByteSpecies) species;
 563         return VectorSupport.fromBitsCoerced(vsp.vectorType(), byte.class, species.length(),
 564                                 0, MODE_BROADCAST, vsp,
 565                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 566     }
 567 
 568     /**
 569      * Returns a vector of the same species as this one
 570      * where all lane elements are set to
 571      * the primitive value {@code e}.
 572      *
 573      * The contents of the current vector are discarded;
 574      * only the species is relevant to this operation.
 575      *
 576      * <p> This method returns the value of this expression:
 577      * {@code ByteVector.broadcast(this.species(), e)}.
 578      *
 579      * @apiNote
 580      * Unlike the similar method named {@code broadcast()}
 581      * in the supertype {@code Vector}, this method does not
 582      * need to validate its argument, and cannot throw
 583      * {@code IllegalArgumentException}.  This method is
 584      * therefore preferable to the supertype method.
 585      *
 586      * @param e the value to broadcast
 587      * @return a vector where all lane elements are set to
 588      *         the primitive value {@code e}
 589      * @see #broadcast(VectorSpecies,long)
 590      * @see Vector#broadcast(long)
 591      * @see VectorSpecies#broadcast(long)
 592      */
 593     public abstract ByteVector broadcast(byte e);
 594 
 595     /**
 596      * Returns a vector of the given species
 597      * where all lane elements are set to
 598      * the primitive value {@code e}.
 599      *
 600      * @param species species of the desired vector
 601      * @param e the value to broadcast
 602      * @return a vector where all lane elements are set to
 603      *         the primitive value {@code e}
 604      * @see #broadcast(long)
 605      * @see Vector#broadcast(long)
 606      * @see VectorSpecies#broadcast(long)
 607      */
 608     @ForceInline
 609     public static ByteVector broadcast(VectorSpecies<Byte> species, byte e) {
 610         ByteSpecies vsp = (ByteSpecies) species;
 611         return vsp.broadcast(e);
 612     }
 613 
 614     /*package-private*/
 615     @ForceInline
 616     final ByteVector broadcastTemplate(byte e) {
 617         ByteSpecies vsp = vspecies();
 618         return vsp.broadcast(e);
 619     }
 620 
 621     /**
 622      * {@inheritDoc} <!--workaround-->
 623      * @apiNote
 624      * When working with vector subtypes like {@code ByteVector},
 625      * {@linkplain #broadcast(byte) the more strongly typed method}
 626      * is typically selected.  It can be explicitly selected
 627      * using a cast: {@code v.broadcast((byte)e)}.
 628      * The two expressions will produce numerically identical results.
 629      */
 630     @Override
 631     public abstract ByteVector broadcast(long e);
 632 
 633     /**
 634      * Returns a vector of the given species
 635      * where all lane elements are set to
 636      * the primitive value {@code e}.
 637      *
 638      * The {@code long} value must be accurately representable
 639      * by the {@code ETYPE} of the vector species, so that
 640      * {@code e==(long)(ETYPE)e}.
 641      *
 642      * @param species species of the desired vector
 643      * @param e the value to broadcast
 644      * @return a vector where all lane elements are set to
 645      *         the primitive value {@code e}
 646      * @throws IllegalArgumentException
 647      *         if the given {@code long} value cannot
 648      *         be represented by the vector's {@code ETYPE}
 649      * @see #broadcast(VectorSpecies,byte)
 650      * @see VectorSpecies#checkValue(long)
 651      */
 652     @ForceInline
 653     public static ByteVector broadcast(VectorSpecies<Byte> species, long e) {
 654         ByteSpecies vsp = (ByteSpecies) species;
 655         return vsp.broadcast(e);
 656     }
 657 
 658     /*package-private*/
 659     @ForceInline
 660     final ByteVector broadcastTemplate(long e) {
 661         return vspecies().broadcast(e);
 662     }
 663 
 664     // Unary lanewise support
 665 
 666     /**
 667      * {@inheritDoc} <!--workaround-->
 668      */
 669     public abstract
 670     ByteVector lanewise(VectorOperators.Unary op);
 671 
 672     @ForceInline
 673     final
 674     ByteVector lanewiseTemplate(VectorOperators.Unary op) {
 675         if (opKind(op, VO_SPECIAL)) {
 676             if (op == ZOMO) {
 677                 return blend(broadcast(-1), compare(NE, 0));
 678             }
 679             if (op == NOT) {
 680                 return broadcast(-1).lanewise(XOR, this);
 681             }
 682         }
 683         int opc = opCode(op);
 684         return VectorSupport.unaryOp(
 685             opc, getClass(), null, byte.class, length(),
 686             this, null,
 687             UN_IMPL.find(op, opc, ByteVector::unaryOperations));
 688     }
 689 
 690     /**
 691      * {@inheritDoc} <!--workaround-->
 692      */
 693     @Override
 694     public abstract
 695     ByteVector lanewise(VectorOperators.Unary op,
 696                                   VectorMask<Byte> m);
 697     @ForceInline
 698     final
 699     ByteVector lanewiseTemplate(VectorOperators.Unary op,
 700                                           Class<? extends VectorMask<Byte>> maskClass,
 701                                           VectorMask<Byte> m) {
 702         m.check(maskClass, this);
 703         if (opKind(op, VO_SPECIAL)) {
 704             if (op == ZOMO) {
 705                 return blend(broadcast(-1), compare(NE, 0, m));
 706             }
 707             if (op == NOT) {
 708                 return lanewise(XOR, broadcast(-1), m);
 709             }
 710         }
 711         int opc = opCode(op);
 712         return VectorSupport.unaryOp(
 713             opc, getClass(), maskClass, byte.class, length(),
 714             this, m,
 715             UN_IMPL.find(op, opc, ByteVector::unaryOperations));
 716     }
 717 
 718     private static final
 719     ImplCache<Unary, UnaryOperation<ByteVector, VectorMask<Byte>>>
 720         UN_IMPL = new ImplCache<>(Unary.class, ByteVector.class);
 721 
 722     private static UnaryOperation<ByteVector, VectorMask<Byte>> unaryOperations(int opc_) {
 723         switch (opc_) {
 724             case VECTOR_OP_NEG: return (v0, m) ->
 725                     v0.uOp(m, (i, a) -> (byte) -a);
 726             case VECTOR_OP_ABS: return (v0, m) ->
 727                     v0.uOp(m, (i, a) -> (byte) Math.abs(a));
 728             case VECTOR_OP_BIT_COUNT: return (v0, m) ->
 729                     v0.uOp(m, (i, a) -> (byte) bitCount(a));
 730             case VECTOR_OP_TZ_COUNT: return (v0, m) ->
 731                     v0.uOp(m, (i, a) -> (byte) numberOfTrailingZeros(a));
 732             case VECTOR_OP_LZ_COUNT: return (v0, m) ->
 733                     v0.uOp(m, (i, a) -> (byte) numberOfLeadingZeros(a));
 734             case VECTOR_OP_REVERSE: return (v0, m) ->
 735                     v0.uOp(m, (i, a) -> reverse(a));
 736             case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
 737                     v0.uOp(m, (i, a) -> a);
 738             default: return null;
 739         }
 740     }
 741 
 742     // Binary lanewise support
 743 
 744     /**
 745      * {@inheritDoc} <!--workaround-->
 746      * @see #lanewise(VectorOperators.Binary,byte)
 747      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 748      */
 749     @Override
 750     public abstract
 751     ByteVector lanewise(VectorOperators.Binary op,
 752                                   Vector<Byte> v);
 753     @ForceInline
 754     final
 755     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 756                                           Vector<Byte> v) {
 757         ByteVector that = (ByteVector) v;
 758         that.check(this);
 759 
 760         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 761             if (op == FIRST_NONZERO) {
 762                 // FIXME: Support this in the JIT.
 763                 VectorMask<Byte> thisNZ
 764                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 765                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 766                 op = OR_UNCHECKED;
 767             }
 768             if (opKind(op, VO_SHIFT)) {
 769                 // As per shift specification for Java, mask the shift count.
 770                 // This allows the JIT to ignore some ISA details.
 771                 that = that.lanewise(AND, SHIFT_MASK);
 772             }
 773             if (op == AND_NOT) {
 774                 // FIXME: Support this in the JIT.
 775                 that = that.lanewise(NOT);
 776                 op = AND;
 777             } else if (op == DIV) {
 778                 VectorMask<Byte> eqz = that.eq((byte) 0);
 779                 if (eqz.anyTrue()) {
 780                     throw that.divZeroException();
 781                 }
 782             }
 783         }
 784 
 785         int opc = opCode(op);
 786         return VectorSupport.binaryOp(
 787             opc, getClass(), null, byte.class, length(),
 788             this, that, null,
 789             BIN_IMPL.find(op, opc, ByteVector::binaryOperations));
 790     }
 791 
 792     /**
 793      * {@inheritDoc} <!--workaround-->
 794      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 795      */
 796     @Override
 797     public abstract
 798     ByteVector lanewise(VectorOperators.Binary op,
 799                                   Vector<Byte> v,
 800                                   VectorMask<Byte> m);
 801     @ForceInline
 802     final
 803     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 804                                           Class<? extends VectorMask<Byte>> maskClass,
 805                                           Vector<Byte> v, VectorMask<Byte> m) {
 806         ByteVector that = (ByteVector) v;
 807         that.check(this);
 808         m.check(maskClass, this);
 809 
 810         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 811             if (op == FIRST_NONZERO) {
 812                 // FIXME: Support this in the JIT.
 813                 VectorMask<Byte> thisNZ
 814                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 815                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 816                 op = OR_UNCHECKED;
 817             }
 818             if (opKind(op, VO_SHIFT)) {
 819                 // As per shift specification for Java, mask the shift count.
 820                 // This allows the JIT to ignore some ISA details.
 821                 that = that.lanewise(AND, SHIFT_MASK);
 822             }
 823             if (op == AND_NOT) {
 824                 // FIXME: Support this in the JIT.
 825                 that = that.lanewise(NOT);
 826                 op = AND;
 827             } else if (op == DIV) {
 828                 VectorMask<Byte> eqz = that.eq((byte)0);
 829                 if (eqz.and(m).anyTrue()) {
 830                     throw that.divZeroException();
 831                 }
 832                 // suppress div/0 exceptions in unset lanes
 833                 that = that.lanewise(NOT, eqz);
 834             }
 835         }
 836 
 837         int opc = opCode(op);
 838         return VectorSupport.binaryOp(
 839             opc, getClass(), maskClass, byte.class, length(),
 840             this, that, m,
 841             BIN_IMPL.find(op, opc, ByteVector::binaryOperations));
 842     }
 843 
 844     private static final
 845     ImplCache<Binary, BinaryOperation<ByteVector, VectorMask<Byte>>>
 846         BIN_IMPL = new ImplCache<>(Binary.class, ByteVector.class);
 847 
 848     private static BinaryOperation<ByteVector, VectorMask<Byte>> binaryOperations(int opc_) {
 849         switch (opc_) {
 850             case VECTOR_OP_ADD: return (v0, v1, vm) ->
 851                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a + b));
 852             case VECTOR_OP_SUB: return (v0, v1, vm) ->
 853                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a - b));
 854             case VECTOR_OP_MUL: return (v0, v1, vm) ->
 855                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a * b));
 856             case VECTOR_OP_DIV: return (v0, v1, vm) ->
 857                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a / b));
 858             case VECTOR_OP_MAX: return (v0, v1, vm) ->
 859                     v0.bOp(v1, vm, (i, a, b) -> (byte)Math.max(a, b));
 860             case VECTOR_OP_MIN: return (v0, v1, vm) ->
 861                     v0.bOp(v1, vm, (i, a, b) -> (byte)Math.min(a, b));
 862             case VECTOR_OP_AND: return (v0, v1, vm) ->
 863                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a & b));
 864             case VECTOR_OP_OR: return (v0, v1, vm) ->
 865                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a | b));
 866             case VECTOR_OP_XOR: return (v0, v1, vm) ->
 867                     v0.bOp(v1, vm, (i, a, b) -> (byte)(a ^ b));
 868             case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
 869                     v0.bOp(v1, vm, (i, a, n) -> (byte)(a << n));
 870             case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
 871                     v0.bOp(v1, vm, (i, a, n) -> (byte)(a >> n));
 872             case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
 873                     v0.bOp(v1, vm, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 874             case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
 875                     v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
 876             case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
 877                     v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
 878             default: return null;
 879         }
 880     }
 881 
 882     // FIXME: Maybe all of the public final methods in this file (the
 883     // simple ones that just call lanewise) should be pushed down to
 884     // the X-VectorBits template.  They can't optimize properly at
 885     // this level, and must rely on inlining.  Does it work?
 886     // (If it works, of course keep the code here.)
 887 
 888     /**
 889      * Combines the lane values of this vector
 890      * with the value of a broadcast scalar.
 891      *
 892      * This is a lane-wise binary operation which applies
 893      * the selected operation to each lane.
 894      * The return value will be equal to this expression:
 895      * {@code this.lanewise(op, this.broadcast(e))}.
 896      *
 897      * @param op the operation used to process lane values
 898      * @param e the input scalar
 899      * @return the result of applying the operation lane-wise
 900      *         to the two input vectors
 901      * @throws UnsupportedOperationException if this vector does
 902      *         not support the requested operation
 903      * @see #lanewise(VectorOperators.Binary,Vector)
 904      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 905      */
 906     @ForceInline
 907     public final
 908     ByteVector lanewise(VectorOperators.Binary op,
 909                                   byte e) {
 910         if (opKind(op, VO_SHIFT) && (byte)(int)e == e) {
 911             return lanewiseShift(op, (int) e);
 912         }
 913         if (op == AND_NOT) {
 914             op = AND; e = (byte) ~e;
 915         }
 916         return lanewise(op, broadcast(e));
 917     }
 918 
 919     /**
 920      * Combines the lane values of this vector
 921      * with the value of a broadcast scalar,
 922      * with selection of lane elements controlled by a mask.
 923      *
 924      * This is a masked lane-wise binary operation which applies
 925      * the selected operation to each lane.
 926      * The return value will be equal to this expression:
 927      * {@code this.lanewise(op, this.broadcast(e), m)}.
 928      *
 929      * @param op the operation used to process lane values
 930      * @param e the input scalar
 931      * @param m the mask controlling lane selection
 932      * @return the result of applying the operation lane-wise
 933      *         to the input vector and the scalar
 934      * @throws UnsupportedOperationException if this vector does
 935      *         not support the requested operation
 936      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 937      * @see #lanewise(VectorOperators.Binary,byte)
 938      */
 939     @ForceInline
 940     public final
 941     ByteVector lanewise(VectorOperators.Binary op,
 942                                   byte e,
 943                                   VectorMask<Byte> m) {
 944         if (opKind(op, VO_SHIFT) && (byte)(int)e == e) {
 945             return lanewiseShift(op, (int) e, m);
 946         }
 947         if (op == AND_NOT) {
 948             op = AND; e = (byte) ~e;
 949         }
 950         return lanewise(op, broadcast(e), m);
 951     }
 952 
 953     /**
 954      * {@inheritDoc} <!--workaround-->
 955      * @apiNote
 956      * When working with vector subtypes like {@code ByteVector},
 957      * {@linkplain #lanewise(VectorOperators.Binary,byte)
 958      * the more strongly typed method}
 959      * is typically selected.  It can be explicitly selected
 960      * using a cast: {@code v.lanewise(op,(byte)e)}.
 961      * The two expressions will produce numerically identical results.
 962      */
 963     @ForceInline
 964     public final
 965     ByteVector lanewise(VectorOperators.Binary op,
 966                                   long e) {
 967         byte e1 = (byte) e;
 968         if ((long)e1 != e
 969             // allow shift ops to clip down their int parameters
 970             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 971             vspecies().checkValue(e);  // for exception
 972         }
 973         return lanewise(op, e1);
 974     }
 975 
 976     /**
 977      * {@inheritDoc} <!--workaround-->
 978      * @apiNote
 979      * When working with vector subtypes like {@code ByteVector},
 980      * {@linkplain #lanewise(VectorOperators.Binary,byte,VectorMask)
 981      * the more strongly typed method}
 982      * is typically selected.  It can be explicitly selected
 983      * using a cast: {@code v.lanewise(op,(byte)e,m)}.
 984      * The two expressions will produce numerically identical results.
 985      */
 986     @ForceInline
 987     public final
 988     ByteVector lanewise(VectorOperators.Binary op,
 989                                   long e, VectorMask<Byte> m) {
 990         byte e1 = (byte) e;
 991         if ((long)e1 != e
 992             // allow shift ops to clip down their int parameters
 993             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 994             vspecies().checkValue(e);  // for exception
 995         }
 996         return lanewise(op, e1, m);
 997     }
 998 
 999     /*package-private*/
1000     abstract ByteVector
1001     lanewiseShift(VectorOperators.Binary op, int e);
1002 
1003     /*package-private*/
1004     @ForceInline
1005     final ByteVector
1006     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
1007         // Special handling for these.  FIXME: Refactor?
1008         assert(opKind(op, VO_SHIFT));
1009         // As per shift specification for Java, mask the shift count.
1010         e &= SHIFT_MASK;
1011         int opc = opCode(op);
1012         return VectorSupport.broadcastInt(
1013             opc, getClass(), null, byte.class, length(),
1014             this, e, null,
1015             BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations));
1016     }
1017 
1018     /*package-private*/
1019     abstract ByteVector
1020     lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Byte> m);
1021 
1022     /*package-private*/
1023     @ForceInline
1024     final ByteVector
1025     lanewiseShiftTemplate(VectorOperators.Binary op,
1026                           Class<? extends VectorMask<Byte>> maskClass,
1027                           int e, VectorMask<Byte> m) {
1028         m.check(maskClass, this);
1029         assert(opKind(op, VO_SHIFT));
1030         // As per shift specification for Java, mask the shift count.
1031         e &= SHIFT_MASK;
1032         int opc = opCode(op);
1033         return VectorSupport.broadcastInt(
1034             opc, getClass(), maskClass, byte.class, length(),
1035             this, e, m,
1036             BIN_INT_IMPL.find(op, opc, ByteVector::broadcastIntOperations));
1037     }
1038 
1039     private static final
1040     ImplCache<Binary,VectorBroadcastIntOp<ByteVector, VectorMask<Byte>>> BIN_INT_IMPL
1041         = new ImplCache<>(Binary.class, ByteVector.class);
1042 
1043     private static VectorBroadcastIntOp<ByteVector, VectorMask<Byte>> broadcastIntOperations(int opc_) {
1044         switch (opc_) {
1045             case VECTOR_OP_LSHIFT: return (v, n, m) ->
1046                     v.uOp(m, (i, a) -> (byte)(a << n));
1047             case VECTOR_OP_RSHIFT: return (v, n, m) ->
1048                     v.uOp(m, (i, a) -> (byte)(a >> n));
1049             case VECTOR_OP_URSHIFT: return (v, n, m) ->
1050                     v.uOp(m, (i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
1051             case VECTOR_OP_LROTATE: return (v, n, m) ->
1052                     v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
1053             case VECTOR_OP_RROTATE: return (v, n, m) ->
1054                     v.uOp(m, (i, a) -> rotateRight(a, (int)n));
1055             default: return null;
1056         }
1057     }
1058 
1059     // As per shift specification for Java, mask the shift count.
1060     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
1061     // The latter two maskings go beyond the JLS, but seem reasonable
1062     // since our lane types are first-class types, not just dressed
1063     // up ints.
1064     private static final int SHIFT_MASK = (Byte.SIZE - 1);
1065     // Also simulate >>> on sub-word variables with a mask.
1066     private static final int LSHR_SETUP_MASK = ((1 << Byte.SIZE) - 1);
1067 
1068     // Ternary lanewise support
1069 
1070     // Ternary operators come in eight variations:
1071     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
1072     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
1073 
1074     // It is annoying to support all of these variations of masking
1075     // and broadcast, but it would be more surprising not to continue
1076     // the obvious pattern started by unary and binary.
1077 
1078    /**
1079      * {@inheritDoc} <!--workaround-->
1080      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1081      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
1082      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1083      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1084      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
1085      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
1086      */
1087     @Override
1088     public abstract
1089     ByteVector lanewise(VectorOperators.Ternary op,
1090                                                   Vector<Byte> v1,
1091                                                   Vector<Byte> v2);
1092     @ForceInline
1093     final
1094     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
1095                                           Vector<Byte> v1,
1096                                           Vector<Byte> v2) {
1097         ByteVector that = (ByteVector) v1;
1098         ByteVector tother = (ByteVector) v2;
1099         // It's a word: https://www.dictionary.com/browse/tother
1100         // See also Chapter 11 of Dickens, Our Mutual Friend:
1101         // "Totherest Governor," replied Mr Riderhood...
1102         that.check(this);
1103         tother.check(this);
1104         if (op == BITWISE_BLEND) {
1105             // FIXME: Support this in the JIT.
1106             that = this.lanewise(XOR, that).lanewise(AND, tother);
1107             return this.lanewise(XOR, that);
1108         }
1109         int opc = opCode(op);
1110         return VectorSupport.ternaryOp(
1111             opc, getClass(), null, byte.class, length(),
1112             this, that, tother, null,
1113             TERN_IMPL.find(op, opc, ByteVector::ternaryOperations));
1114     }
1115 
1116     /**
1117      * {@inheritDoc} <!--workaround-->
1118      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1119      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
1120      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1121      */
1122     @Override
1123     public abstract
1124     ByteVector lanewise(VectorOperators.Ternary op,
1125                                   Vector<Byte> v1,
1126                                   Vector<Byte> v2,
1127                                   VectorMask<Byte> m);
1128     @ForceInline
1129     final
1130     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
1131                                           Class<? extends VectorMask<Byte>> maskClass,
1132                                           Vector<Byte> v1,
1133                                           Vector<Byte> v2,
1134                                           VectorMask<Byte> m) {
1135         ByteVector that = (ByteVector) v1;
1136         ByteVector tother = (ByteVector) v2;
1137         // It's a word: https://www.dictionary.com/browse/tother
1138         // See also Chapter 11 of Dickens, Our Mutual Friend:
1139         // "Totherest Governor," replied Mr Riderhood...
1140         that.check(this);
1141         tother.check(this);
1142         m.check(maskClass, this);
1143 
1144         if (op == BITWISE_BLEND) {
1145             // FIXME: Support this in the JIT.
1146             that = this.lanewise(XOR, that).lanewise(AND, tother);
1147             return this.lanewise(XOR, that, m);
1148         }
1149         int opc = opCode(op);
1150         return VectorSupport.ternaryOp(
1151             opc, getClass(), maskClass, byte.class, length(),
1152             this, that, tother, m,
1153             TERN_IMPL.find(op, opc, ByteVector::ternaryOperations));
1154     }
1155 
1156     private static final
1157     ImplCache<Ternary, TernaryOperation<ByteVector, VectorMask<Byte>>>
1158         TERN_IMPL = new ImplCache<>(Ternary.class, ByteVector.class);
1159 
1160     private static TernaryOperation<ByteVector, VectorMask<Byte>> ternaryOperations(int opc_) {
1161         switch (opc_) {
1162             default: return null;
1163         }
1164     }
1165 
1166     /**
1167      * Combines the lane values of this vector
1168      * with the values of two broadcast scalars.
1169      *
1170      * This is a lane-wise ternary operation which applies
1171      * the selected operation to each lane.
1172      * The return value will be equal to this expression:
1173      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
1174      *
1175      * @param op the operation used to combine lane values
1176      * @param e1 the first input scalar
1177      * @param e2 the second input scalar
1178      * @return the result of applying the operation lane-wise
1179      *         to the input vector and the scalars
1180      * @throws UnsupportedOperationException if this vector does
1181      *         not support the requested operation
1182      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1183      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1184      */
1185     @ForceInline
1186     public final
1187     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
1188                                   byte e1,
1189                                   byte e2) {
1190         return lanewise(op, broadcast(e1), broadcast(e2));
1191     }
1192 
1193     /**
1194      * Combines the lane values of this vector
1195      * with the values of two broadcast scalars,
1196      * with selection of lane elements controlled by a mask.
1197      *
1198      * This is a masked lane-wise ternary operation which applies
1199      * the selected operation to each lane.
1200      * The return value will be equal to this expression:
1201      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1202      *
1203      * @param op the operation used to combine lane values
1204      * @param e1 the first input scalar
1205      * @param e2 the second input scalar
1206      * @param m the mask controlling lane selection
1207      * @return the result of applying the operation lane-wise
1208      *         to the input vector and the scalars
1209      * @throws UnsupportedOperationException if this vector does
1210      *         not support the requested operation
1211      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1212      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1213      */
1214     @ForceInline
1215     public final
1216     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1217                                   byte e1,
1218                                   byte e2,
1219                                   VectorMask<Byte> m) {
1220         return lanewise(op, broadcast(e1), broadcast(e2), m);
1221     }
1222 
1223     /**
1224      * Combines the lane values of this vector
1225      * with the values of another vector and a broadcast scalar.
1226      *
1227      * This is a lane-wise ternary operation which applies
1228      * the selected operation to each lane.
1229      * The return value will be equal to this expression:
1230      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1231      *
1232      * @param op the operation used to combine lane values
1233      * @param v1 the other input vector
1234      * @param e2 the input scalar
1235      * @return the result of applying the operation lane-wise
1236      *         to the input vectors and the scalar
1237      * @throws UnsupportedOperationException if this vector does
1238      *         not support the requested operation
1239      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1240      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
1241      */
1242     @ForceInline
1243     public final
1244     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1245                                   Vector<Byte> v1,
1246                                   byte e2) {
1247         return lanewise(op, v1, broadcast(e2));
1248     }
1249 
1250     /**
1251      * Combines the lane values of this vector
1252      * with the values of another vector and a broadcast scalar,
1253      * with selection of lane elements controlled by a mask.
1254      *
1255      * This is a masked lane-wise ternary operation which applies
1256      * the selected operation to each lane.
1257      * The return value will be equal to this expression:
1258      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1259      *
1260      * @param op the operation used to combine lane values
1261      * @param v1 the other input vector
1262      * @param e2 the input scalar
1263      * @param m the mask controlling lane selection
1264      * @return the result of applying the operation lane-wise
1265      *         to the input vectors and the scalar
1266      * @throws UnsupportedOperationException if this vector does
1267      *         not support the requested operation
1268      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1269      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1270      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
1271      */
1272     @ForceInline
1273     public final
1274     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1275                                   Vector<Byte> v1,
1276                                   byte e2,
1277                                   VectorMask<Byte> m) {
1278         return lanewise(op, v1, broadcast(e2), m);
1279     }
1280 
1281     /**
1282      * Combines the lane values of this vector
1283      * with the values of another vector and a broadcast scalar.
1284      *
1285      * This is a lane-wise ternary operation which applies
1286      * the selected operation to each lane.
1287      * The return value will be equal to this expression:
1288      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1289      *
1290      * @param op the operation used to combine lane values
1291      * @param e1 the input scalar
1292      * @param v2 the other input vector
1293      * @return the result of applying the operation lane-wise
1294      *         to the input vectors and the scalar
1295      * @throws UnsupportedOperationException if this vector does
1296      *         not support the requested operation
1297      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1298      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1299      */
1300     @ForceInline
1301     public final
1302     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1303                                   byte e1,
1304                                   Vector<Byte> v2) {
1305         return lanewise(op, broadcast(e1), v2);
1306     }
1307 
1308     /**
1309      * Combines the lane values of this vector
1310      * with the values of another vector and a broadcast scalar,
1311      * with selection of lane elements controlled by a mask.
1312      *
1313      * This is a masked lane-wise ternary operation which applies
1314      * the selected operation to each lane.
1315      * The return value will be equal to this expression:
1316      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1317      *
1318      * @param op the operation used to combine lane values
1319      * @param e1 the input scalar
1320      * @param v2 the other input vector
1321      * @param m the mask controlling lane selection
1322      * @return the result of applying the operation lane-wise
1323      *         to the input vectors and the scalar
1324      * @throws UnsupportedOperationException if this vector does
1325      *         not support the requested operation
1326      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1327      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
1328      */
1329     @ForceInline
1330     public final
1331     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1332                                   byte e1,
1333                                   Vector<Byte> v2,
1334                                   VectorMask<Byte> m) {
1335         return lanewise(op, broadcast(e1), v2, m);
1336     }
1337 
1338     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1339     // https://en.wikipedia.org/wiki/Ogdoad
1340 
1341     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1342     //
1343     // These include masked and non-masked versions.
1344     // This subclass adds broadcast (masked or not).
1345 
1346     /**
1347      * {@inheritDoc} <!--workaround-->
1348      * @see #add(byte)
1349      */
1350     @Override
1351     @ForceInline
1352     public final ByteVector add(Vector<Byte> v) {
1353         return lanewise(ADD, v);
1354     }
1355 
1356     /**
1357      * Adds this vector to the broadcast of an input scalar.
1358      *
1359      * This is a lane-wise binary operation which applies
1360      * the primitive addition operation ({@code +}) to each lane.
1361      *
1362      * This method is also equivalent to the expression
1363      * {@link #lanewise(VectorOperators.Binary,byte)
1364      *    lanewise}{@code (}{@link VectorOperators#ADD
1365      *    ADD}{@code , e)}.
1366      *
1367      * @param e the input scalar
1368      * @return the result of adding each lane of this vector to the scalar
1369      * @see #add(Vector)
1370      * @see #broadcast(byte)
1371      * @see #add(byte,VectorMask)
1372      * @see VectorOperators#ADD
1373      * @see #lanewise(VectorOperators.Binary,Vector)
1374      * @see #lanewise(VectorOperators.Binary,byte)
1375      */
1376     @ForceInline
1377     public final
1378     ByteVector add(byte e) {
1379         return lanewise(ADD, e);
1380     }
1381 
1382     /**
1383      * {@inheritDoc} <!--workaround-->
1384      * @see #add(byte,VectorMask)
1385      */
1386     @Override
1387     @ForceInline
1388     public final ByteVector add(Vector<Byte> v,
1389                                           VectorMask<Byte> m) {
1390         return lanewise(ADD, v, m);
1391     }
1392 
1393     /**
1394      * Adds this vector to the broadcast of an input scalar,
1395      * selecting lane elements controlled by a mask.
1396      *
1397      * This is a masked lane-wise binary operation which applies
1398      * the primitive addition operation ({@code +}) to each lane.
1399      *
1400      * This method is also equivalent to the expression
1401      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1402      *    lanewise}{@code (}{@link VectorOperators#ADD
1403      *    ADD}{@code , s, m)}.
1404      *
1405      * @param e the input scalar
1406      * @param m the mask controlling lane selection
1407      * @return the result of adding each lane of this vector to the scalar
1408      * @see #add(Vector,VectorMask)
1409      * @see #broadcast(byte)
1410      * @see #add(byte)
1411      * @see VectorOperators#ADD
1412      * @see #lanewise(VectorOperators.Binary,Vector)
1413      * @see #lanewise(VectorOperators.Binary,byte)
1414      */
1415     @ForceInline
1416     public final ByteVector add(byte e,
1417                                           VectorMask<Byte> m) {
1418         return lanewise(ADD, e, m);
1419     }
1420 
1421     /**
1422      * {@inheritDoc} <!--workaround-->
1423      * @see #sub(byte)
1424      */
1425     @Override
1426     @ForceInline
1427     public final ByteVector sub(Vector<Byte> v) {
1428         return lanewise(SUB, v);
1429     }
1430 
1431     /**
1432      * Subtracts an input scalar from this vector.
1433      *
1434      * This is a masked lane-wise binary operation which applies
1435      * the primitive subtraction operation ({@code -}) to each lane.
1436      *
1437      * This method is also equivalent to the expression
1438      * {@link #lanewise(VectorOperators.Binary,byte)
1439      *    lanewise}{@code (}{@link VectorOperators#SUB
1440      *    SUB}{@code , e)}.
1441      *
1442      * @param e the input scalar
1443      * @return the result of subtracting the scalar from each lane of this vector
1444      * @see #sub(Vector)
1445      * @see #broadcast(byte)
1446      * @see #sub(byte,VectorMask)
1447      * @see VectorOperators#SUB
1448      * @see #lanewise(VectorOperators.Binary,Vector)
1449      * @see #lanewise(VectorOperators.Binary,byte)
1450      */
1451     @ForceInline
1452     public final ByteVector sub(byte e) {
1453         return lanewise(SUB, e);
1454     }
1455 
1456     /**
1457      * {@inheritDoc} <!--workaround-->
1458      * @see #sub(byte,VectorMask)
1459      */
1460     @Override
1461     @ForceInline
1462     public final ByteVector sub(Vector<Byte> v,
1463                                           VectorMask<Byte> m) {
1464         return lanewise(SUB, v, m);
1465     }
1466 
1467     /**
1468      * Subtracts an input scalar from this vector
1469      * under the control of a mask.
1470      *
1471      * This is a masked lane-wise binary operation which applies
1472      * the primitive subtraction operation ({@code -}) to each lane.
1473      *
1474      * This method is also equivalent to the expression
1475      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1476      *    lanewise}{@code (}{@link VectorOperators#SUB
1477      *    SUB}{@code , s, m)}.
1478      *
1479      * @param e the input scalar
1480      * @param m the mask controlling lane selection
1481      * @return the result of subtracting the scalar from each lane of this vector
1482      * @see #sub(Vector,VectorMask)
1483      * @see #broadcast(byte)
1484      * @see #sub(byte)
1485      * @see VectorOperators#SUB
1486      * @see #lanewise(VectorOperators.Binary,Vector)
1487      * @see #lanewise(VectorOperators.Binary,byte)
1488      */
1489     @ForceInline
1490     public final ByteVector sub(byte e,
1491                                           VectorMask<Byte> m) {
1492         return lanewise(SUB, e, m);
1493     }
1494 
1495     /**
1496      * {@inheritDoc} <!--workaround-->
1497      * @see #mul(byte)
1498      */
1499     @Override
1500     @ForceInline
1501     public final ByteVector mul(Vector<Byte> v) {
1502         return lanewise(MUL, v);
1503     }
1504 
1505     /**
1506      * Multiplies this vector by the broadcast of an input scalar.
1507      *
1508      * This is a lane-wise binary operation which applies
1509      * the primitive multiplication operation ({@code *}) to each lane.
1510      *
1511      * This method is also equivalent to the expression
1512      * {@link #lanewise(VectorOperators.Binary,byte)
1513      *    lanewise}{@code (}{@link VectorOperators#MUL
1514      *    MUL}{@code , e)}.
1515      *
1516      * @param e the input scalar
1517      * @return the result of multiplying this vector by the given scalar
1518      * @see #mul(Vector)
1519      * @see #broadcast(byte)
1520      * @see #mul(byte,VectorMask)
1521      * @see VectorOperators#MUL
1522      * @see #lanewise(VectorOperators.Binary,Vector)
1523      * @see #lanewise(VectorOperators.Binary,byte)
1524      */
1525     @ForceInline
1526     public final ByteVector mul(byte e) {
1527         return lanewise(MUL, e);
1528     }
1529 
1530     /**
1531      * {@inheritDoc} <!--workaround-->
1532      * @see #mul(byte,VectorMask)
1533      */
1534     @Override
1535     @ForceInline
1536     public final ByteVector mul(Vector<Byte> v,
1537                                           VectorMask<Byte> m) {
1538         return lanewise(MUL, v, m);
1539     }
1540 
1541     /**
1542      * Multiplies this vector by the broadcast of an input scalar,
1543      * selecting lane elements controlled by a mask.
1544      *
1545      * This is a masked lane-wise binary operation which applies
1546      * the primitive multiplication operation ({@code *}) to each lane.
1547      *
1548      * This method is also equivalent to the expression
1549      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1550      *    lanewise}{@code (}{@link VectorOperators#MUL
1551      *    MUL}{@code , s, m)}.
1552      *
1553      * @param e the input scalar
1554      * @param m the mask controlling lane selection
1555      * @return the result of muling each lane of this vector to the scalar
1556      * @see #mul(Vector,VectorMask)
1557      * @see #broadcast(byte)
1558      * @see #mul(byte)
1559      * @see VectorOperators#MUL
1560      * @see #lanewise(VectorOperators.Binary,Vector)
1561      * @see #lanewise(VectorOperators.Binary,byte)
1562      */
1563     @ForceInline
1564     public final ByteVector mul(byte e,
1565                                           VectorMask<Byte> m) {
1566         return lanewise(MUL, e, m);
1567     }
1568 
1569     /**
1570      * {@inheritDoc} <!--workaround-->
1571      * @apiNote If there is a zero divisor, {@code
1572      * ArithmeticException} will be thrown.
1573      */
1574     @Override
1575     @ForceInline
1576     public final ByteVector div(Vector<Byte> v) {
1577         return lanewise(DIV, v);
1578     }
1579 
1580     /**
1581      * Divides this vector by the broadcast of an input scalar.
1582      *
1583      * This is a lane-wise binary operation which applies
1584      * the primitive division operation ({@code /}) to each lane.
1585      *
1586      * This method is also equivalent to the expression
1587      * {@link #lanewise(VectorOperators.Binary,byte)
1588      *    lanewise}{@code (}{@link VectorOperators#DIV
1589      *    DIV}{@code , e)}.
1590      *
1591      * @apiNote If there is a zero divisor, {@code
1592      * ArithmeticException} will be thrown.
1593      *
1594      * @param e the input scalar
1595      * @return the result of dividing each lane of this vector by the scalar
1596      * @see #div(Vector)
1597      * @see #broadcast(byte)
1598      * @see #div(byte,VectorMask)
1599      * @see VectorOperators#DIV
1600      * @see #lanewise(VectorOperators.Binary,Vector)
1601      * @see #lanewise(VectorOperators.Binary,byte)
1602      */
1603     @ForceInline
1604     public final ByteVector div(byte e) {
1605         return lanewise(DIV, e);
1606     }
1607 
1608     /**
1609      * {@inheritDoc} <!--workaround-->
1610      * @see #div(byte,VectorMask)
1611      * @apiNote If there is a zero divisor, {@code
1612      * ArithmeticException} will be thrown.
1613      */
1614     @Override
1615     @ForceInline
1616     public final ByteVector div(Vector<Byte> v,
1617                                           VectorMask<Byte> m) {
1618         return lanewise(DIV, v, m);
1619     }
1620 
1621     /**
1622      * Divides this vector by the broadcast of an input scalar,
1623      * selecting lane elements controlled by a mask.
1624      *
1625      * This is a masked lane-wise binary operation which applies
1626      * the primitive division operation ({@code /}) to each lane.
1627      *
1628      * This method is also equivalent to the expression
1629      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1630      *    lanewise}{@code (}{@link VectorOperators#DIV
1631      *    DIV}{@code , s, m)}.
1632      *
1633      * @apiNote If there is a zero divisor, {@code
1634      * ArithmeticException} will be thrown.
1635      *
1636      * @param e the input scalar
1637      * @param m the mask controlling lane selection
1638      * @return the result of dividing each lane of this vector by the scalar
1639      * @see #div(Vector,VectorMask)
1640      * @see #broadcast(byte)
1641      * @see #div(byte)
1642      * @see VectorOperators#DIV
1643      * @see #lanewise(VectorOperators.Binary,Vector)
1644      * @see #lanewise(VectorOperators.Binary,byte)
1645      */
1646     @ForceInline
1647     public final ByteVector div(byte e,
1648                                           VectorMask<Byte> m) {
1649         return lanewise(DIV, e, m);
1650     }
1651 
1652     /// END OF FULL-SERVICE BINARY METHODS
1653 
1654     /// SECOND-TIER BINARY METHODS
1655     //
1656     // There are no masked versions.
1657 
1658     /**
1659      * {@inheritDoc} <!--workaround-->
1660      */
1661     @Override
1662     @ForceInline
1663     public final ByteVector min(Vector<Byte> v) {
1664         return lanewise(MIN, v);
1665     }
1666 
1667     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1668     /**
1669      * Computes the smaller of this vector and the broadcast of an input scalar.
1670      *
1671      * This is a lane-wise binary operation which applies the
1672      * operation {@code Math.min()} to each pair of
1673      * corresponding lane values.
1674      *
1675      * This method is also equivalent to the expression
1676      * {@link #lanewise(VectorOperators.Binary,byte)
1677      *    lanewise}{@code (}{@link VectorOperators#MIN
1678      *    MIN}{@code , e)}.
1679      *
1680      * @param e the input scalar
1681      * @return the result of multiplying this vector by the given scalar
1682      * @see #min(Vector)
1683      * @see #broadcast(byte)
1684      * @see VectorOperators#MIN
1685      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1686      */
1687     @ForceInline
1688     public final ByteVector min(byte e) {
1689         return lanewise(MIN, e);
1690     }
1691 
1692     /**
1693      * {@inheritDoc} <!--workaround-->
1694      */
1695     @Override
1696     @ForceInline
1697     public final ByteVector max(Vector<Byte> v) {
1698         return lanewise(MAX, v);
1699     }
1700 
1701     /**
1702      * Computes the larger of this vector and the broadcast of an input scalar.
1703      *
1704      * This is a lane-wise binary operation which applies the
1705      * operation {@code Math.max()} to each pair of
1706      * corresponding lane values.
1707      *
1708      * This method is also equivalent to the expression
1709      * {@link #lanewise(VectorOperators.Binary,byte)
1710      *    lanewise}{@code (}{@link VectorOperators#MAX
1711      *    MAX}{@code , e)}.
1712      *
1713      * @param e the input scalar
1714      * @return the result of multiplying this vector by the given scalar
1715      * @see #max(Vector)
1716      * @see #broadcast(byte)
1717      * @see VectorOperators#MAX
1718      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1719      */
1720     @ForceInline
1721     public final ByteVector max(byte e) {
1722         return lanewise(MAX, e);
1723     }
1724 
1725     // common bitwise operators: and, or, not (with scalar versions)
1726     /**
1727      * Computes the bitwise logical conjunction ({@code &})
1728      * of this vector and a second input vector.
1729      *
1730      * This is a lane-wise binary operation which applies the
1731      * the primitive bitwise "and" operation ({@code &})
1732      * to each pair of corresponding lane values.
1733      *
1734      * This method is also equivalent to the expression
1735      * {@link #lanewise(VectorOperators.Binary,Vector)
1736      *    lanewise}{@code (}{@link VectorOperators#AND
1737      *    AND}{@code , v)}.
1738      *
1739      * <p>
1740      * This is not a full-service named operation like
1741      * {@link #add(Vector) add}.  A masked version of
1742      * this operation is not directly available
1743      * but may be obtained via the masked version of
1744      * {@code lanewise}.
1745      *
1746      * @param v a second input vector
1747      * @return the bitwise {@code &} of this vector and the second input vector
1748      * @see #and(byte)
1749      * @see #or(Vector)
1750      * @see #not()
1751      * @see VectorOperators#AND
1752      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1753      */
1754     @ForceInline
1755     public final ByteVector and(Vector<Byte> v) {
1756         return lanewise(AND, v);
1757     }
1758 
1759     /**
1760      * Computes the bitwise logical conjunction ({@code &})
1761      * of this vector and a scalar.
1762      *
1763      * This is a lane-wise binary operation which applies the
1764      * the primitive bitwise "and" operation ({@code &})
1765      * to each pair of corresponding lane values.
1766      *
1767      * This method is also equivalent to the expression
1768      * {@link #lanewise(VectorOperators.Binary,Vector)
1769      *    lanewise}{@code (}{@link VectorOperators#AND
1770      *    AND}{@code , e)}.
1771      *
1772      * @param e an input scalar
1773      * @return the bitwise {@code &} of this vector and scalar
1774      * @see #and(Vector)
1775      * @see VectorOperators#AND
1776      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1777      */
1778     @ForceInline
1779     public final ByteVector and(byte e) {
1780         return lanewise(AND, e);
1781     }
1782 
1783     /**
1784      * Computes the bitwise logical disjunction ({@code |})
1785      * of this vector and a second input vector.
1786      *
1787      * This is a lane-wise binary operation which applies the
1788      * the primitive bitwise "or" operation ({@code |})
1789      * to each pair of corresponding lane values.
1790      *
1791      * This method is also equivalent to the expression
1792      * {@link #lanewise(VectorOperators.Binary,Vector)
1793      *    lanewise}{@code (}{@link VectorOperators#OR
1794      *    AND}{@code , v)}.
1795      *
1796      * <p>
1797      * This is not a full-service named operation like
1798      * {@link #add(Vector) add}.  A masked version of
1799      * this operation is not directly available
1800      * but may be obtained via the masked version of
1801      * {@code lanewise}.
1802      *
1803      * @param v a second input vector
1804      * @return the bitwise {@code |} of this vector and the second input vector
1805      * @see #or(byte)
1806      * @see #and(Vector)
1807      * @see #not()
1808      * @see VectorOperators#OR
1809      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1810      */
1811     @ForceInline
1812     public final ByteVector or(Vector<Byte> v) {
1813         return lanewise(OR, v);
1814     }
1815 
1816     /**
1817      * Computes the bitwise logical disjunction ({@code |})
1818      * of this vector and a scalar.
1819      *
1820      * This is a lane-wise binary operation which applies the
1821      * the primitive bitwise "or" operation ({@code |})
1822      * to each pair of corresponding lane values.
1823      *
1824      * This method is also equivalent to the expression
1825      * {@link #lanewise(VectorOperators.Binary,Vector)
1826      *    lanewise}{@code (}{@link VectorOperators#OR
1827      *    OR}{@code , e)}.
1828      *
1829      * @param e an input scalar
1830      * @return the bitwise {@code |} of this vector and scalar
1831      * @see #or(Vector)
1832      * @see VectorOperators#OR
1833      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1834      */
1835     @ForceInline
1836     public final ByteVector or(byte e) {
1837         return lanewise(OR, e);
1838     }
1839 
1840 
1841 
1842     /// UNARY METHODS
1843 
1844     /**
1845      * {@inheritDoc} <!--workaround-->
1846      */
1847     @Override
1848     @ForceInline
1849     public final
1850     ByteVector neg() {
1851         return lanewise(NEG);
1852     }
1853 
1854     /**
1855      * {@inheritDoc} <!--workaround-->
1856      */
1857     @Override
1858     @ForceInline
1859     public final
1860     ByteVector abs() {
1861         return lanewise(ABS);
1862     }
1863 
1864     static int bitCount(byte a) {
1865         return Integer.bitCount((int)a & 0xFF);
1866     }
1867     static int numberOfTrailingZeros(byte a) {
1868         return a != 0 ? Integer.numberOfTrailingZeros(a) : 8;
1869     }
1870     static int numberOfLeadingZeros(byte a) {
1871         return a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0;
1872     }
1873 
1874     static byte reverse(byte a) {
1875         if (a == 0 || a == -1) return a;
1876 
1877         byte b = rotateLeft(a, 4);
1878         b = (byte) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
1879         b = (byte) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
1880         return b;
1881     }
1882 
1883     // not (~)
1884     /**
1885      * Computes the bitwise logical complement ({@code ~})
1886      * of this vector.
1887      *
1888      * This is a lane-wise binary operation which applies the
1889      * the primitive bitwise "not" operation ({@code ~})
1890      * to each lane value.
1891      *
1892      * This method is also equivalent to the expression
1893      * {@link #lanewise(VectorOperators.Unary)
1894      *    lanewise}{@code (}{@link VectorOperators#NOT
1895      *    NOT}{@code )}.
1896      *
1897      * <p>
1898      * This is not a full-service named operation like
1899      * {@link #add(Vector) add}.  A masked version of
1900      * this operation is not directly available
1901      * but may be obtained via the masked version of
1902      * {@code lanewise}.
1903      *
1904      * @return the bitwise complement {@code ~} of this vector
1905      * @see #and(Vector)
1906      * @see VectorOperators#NOT
1907      * @see #lanewise(VectorOperators.Unary,VectorMask)
1908      */
1909     @ForceInline
1910     public final ByteVector not() {
1911         return lanewise(NOT);
1912     }
1913 
1914 
1915     /// COMPARISONS
1916 
1917     /**
1918      * {@inheritDoc} <!--workaround-->
1919      */
1920     @Override
1921     @ForceInline
1922     public final
1923     VectorMask<Byte> eq(Vector<Byte> v) {
1924         return compare(EQ, v);
1925     }
1926 
1927     /**
1928      * Tests if this vector is equal to an input scalar.
1929      *
1930      * This is a lane-wise binary test operation which applies
1931      * the primitive equals operation ({@code ==}) to each lane.
1932      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1933      *
1934      * @param e the input scalar
1935      * @return the result mask of testing if this vector
1936      *         is equal to {@code e}
1937      * @see #compare(VectorOperators.Comparison,byte)
1938      */
1939     @ForceInline
1940     public final
1941     VectorMask<Byte> eq(byte e) {
1942         return compare(EQ, e);
1943     }
1944 
1945     /**
1946      * {@inheritDoc} <!--workaround-->
1947      */
1948     @Override
1949     @ForceInline
1950     public final
1951     VectorMask<Byte> lt(Vector<Byte> v) {
1952         return compare(LT, v);
1953     }
1954 
1955     /**
1956      * Tests if this vector is less than an input scalar.
1957      *
1958      * This is a lane-wise binary test operation which applies
1959      * the primitive less than operation ({@code <}) to each lane.
1960      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1961      *
1962      * @param e the input scalar
1963      * @return the mask result of testing if this vector
1964      *         is less than the input scalar
1965      * @see #compare(VectorOperators.Comparison,byte)
1966      */
1967     @ForceInline
1968     public final
1969     VectorMask<Byte> lt(byte e) {
1970         return compare(LT, e);
1971     }
1972 
1973     /**
1974      * {@inheritDoc} <!--workaround-->
1975      */
1976     @Override
1977     public abstract
1978     VectorMask<Byte> test(VectorOperators.Test op);
1979 
1980     /*package-private*/
1981     @ForceInline
1982     final
1983     <M extends VectorMask<Byte>>
1984     M testTemplate(Class<M> maskType, Test op) {
1985         ByteSpecies vsp = vspecies();
1986         if (opKind(op, VO_SPECIAL)) {
1987             VectorMask<Byte> m;
1988             if (op == IS_DEFAULT) {
1989                 m = compare(EQ, (byte) 0);
1990             } else if (op == IS_NEGATIVE) {
1991                 m = compare(LT, (byte) 0);
1992             }
1993             else {
1994                 throw new AssertionError(op);
1995             }
1996             return maskType.cast(m);
1997         }
1998         int opc = opCode(op);
1999         throw new AssertionError(op);
2000     }
2001 
2002     /**
2003      * {@inheritDoc} <!--workaround-->
2004      */
2005     @Override
2006     public abstract
2007     VectorMask<Byte> test(VectorOperators.Test op,
2008                                   VectorMask<Byte> m);
2009 
2010     /*package-private*/
2011     @ForceInline
2012     final
2013     <M extends VectorMask<Byte>>
2014     M testTemplate(Class<M> maskType, Test op, M mask) {
2015         ByteSpecies vsp = vspecies();
2016         mask.check(maskType, this);
2017         if (opKind(op, VO_SPECIAL)) {
2018             VectorMask<Byte> m = mask;
2019             if (op == IS_DEFAULT) {
2020                 m = compare(EQ, (byte) 0, m);
2021             } else if (op == IS_NEGATIVE) {
2022                 m = compare(LT, (byte) 0, m);
2023             }
2024             else {
2025                 throw new AssertionError(op);
2026             }
2027             return maskType.cast(m);
2028         }
2029         int opc = opCode(op);
2030         throw new AssertionError(op);
2031     }
2032 
2033     /**
2034      * {@inheritDoc} <!--workaround-->
2035      */
2036     @Override
2037     public abstract
2038     VectorMask<Byte> compare(VectorOperators.Comparison op, Vector<Byte> v);
2039 
2040     /*package-private*/
2041     @ForceInline
2042     final
2043     <M extends VectorMask<Byte>>
2044     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v) {
2045         ByteVector that = (ByteVector) v;
2046         that.check(this);
2047         int opc = opCode(op);
2048         return VectorSupport.compare(
2049             opc, getClass(), maskType, byte.class, length(),
2050             this, that, null,
2051             (cond, v0, v1, m1) -> {
2052                 AbstractMask<Byte> m
2053                     = v0.bTest(cond, v1, (cond_, i, a, b)
2054                                -> compareWithOp(cond, a, b));
2055                 @SuppressWarnings("unchecked")
2056                 M m2 = (M) m;
2057                 return m2;
2058             });
2059     }
2060 
2061     /*package-private*/
2062     @ForceInline
2063     final
2064     <M extends VectorMask<Byte>>
2065     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v, M m) {
2066         ByteVector that = (ByteVector) v;
2067         that.check(this);
2068         m.check(maskType, this);
2069         int opc = opCode(op);
2070         return VectorSupport.compare(
2071             opc, getClass(), maskType, byte.class, length(),
2072             this, that, m,
2073             (cond, v0, v1, m1) -> {
2074                 AbstractMask<Byte> cmpM
2075                     = v0.bTest(cond, v1, (cond_, i, a, b)
2076                                -> compareWithOp(cond, a, b));
2077                 @SuppressWarnings("unchecked")
2078                 M m2 = (M) cmpM.and(m1);
2079                 return m2;
2080             });
2081     }
2082 
2083     @ForceInline
2084     private static boolean compareWithOp(int cond, byte a, byte b) {
2085         return switch (cond) {
2086             case BT_eq -> a == b;
2087             case BT_ne -> a != b;
2088             case BT_lt -> a < b;
2089             case BT_le -> a <= b;
2090             case BT_gt -> a > b;
2091             case BT_ge -> a >= b;
2092             case BT_ult -> Byte.compareUnsigned(a, b) < 0;
2093             case BT_ule -> Byte.compareUnsigned(a, b) <= 0;
2094             case BT_ugt -> Byte.compareUnsigned(a, b) > 0;
2095             case BT_uge -> Byte.compareUnsigned(a, b) >= 0;
2096             default -> throw new AssertionError();
2097         };
2098     }
2099 
2100     /**
2101      * Tests this vector by comparing it with an input scalar,
2102      * according to the given comparison operation.
2103      *
2104      * This is a lane-wise binary test operation which applies
2105      * the comparison operation to each lane.
2106      * <p>
2107      * The result is the same as
2108      * {@code compare(op, broadcast(species(), e))}.
2109      * That is, the scalar may be regarded as broadcast to
2110      * a vector of the same species, and then compared
2111      * against the original vector, using the selected
2112      * comparison operation.
2113      *
2114      * @param op the operation used to compare lane values
2115      * @param e the input scalar
2116      * @return the mask result of testing lane-wise if this vector
2117      *         compares to the input, according to the selected
2118      *         comparison operator
2119      * @see ByteVector#compare(VectorOperators.Comparison,Vector)
2120      * @see #eq(byte)
2121      * @see #lt(byte)
2122      */
2123     public abstract
2124     VectorMask<Byte> compare(Comparison op, byte e);
2125 
2126     /*package-private*/
2127     @ForceInline
2128     final
2129     <M extends VectorMask<Byte>>
2130     M compareTemplate(Class<M> maskType, Comparison op, byte e) {
2131         return compareTemplate(maskType, op, broadcast(e));
2132     }
2133 
2134     /**
2135      * Tests this vector by comparing it with an input scalar,
2136      * according to the given comparison operation,
2137      * in lanes selected by a mask.
2138      *
2139      * This is a masked lane-wise binary test operation which applies
2140      * to each pair of corresponding lane values.
2141      *
2142      * The returned result is equal to the expression
2143      * {@code compare(op,s).and(m)}.
2144      *
2145      * @param op the operation used to compare lane values
2146      * @param e the input scalar
2147      * @param m the mask controlling lane selection
2148      * @return the mask result of testing lane-wise if this vector
2149      *         compares to the input, according to the selected
2150      *         comparison operator,
2151      *         and only in the lanes selected by the mask
2152      * @see ByteVector#compare(VectorOperators.Comparison,Vector,VectorMask)
2153      */
2154     @ForceInline
2155     public final VectorMask<Byte> compare(VectorOperators.Comparison op,
2156                                                byte e,
2157                                                VectorMask<Byte> m) {
2158         return compare(op, broadcast(e), m);
2159     }
2160 
2161     /**
2162      * {@inheritDoc} <!--workaround-->
2163      */
2164     @Override
2165     public abstract
2166     VectorMask<Byte> compare(Comparison op, long e);
2167 
2168     /*package-private*/
2169     @ForceInline
2170     final
2171     <M extends VectorMask<Byte>>
2172     M compareTemplate(Class<M> maskType, Comparison op, long e) {
2173         return compareTemplate(maskType, op, broadcast(e));
2174     }
2175 
2176     /**
2177      * {@inheritDoc} <!--workaround-->
2178      */
2179     @Override
2180     @ForceInline
2181     public final
2182     VectorMask<Byte> compare(Comparison op, long e, VectorMask<Byte> m) {
2183         return compare(op, broadcast(e), m);
2184     }
2185 
2186 
2187 
2188     /**
2189      * {@inheritDoc} <!--workaround-->
2190      */
2191     @Override public abstract
2192     ByteVector blend(Vector<Byte> v, VectorMask<Byte> m);
2193 
2194     /*package-private*/
2195     @ForceInline
2196     final
2197     <M extends VectorMask<Byte>>
2198     ByteVector
2199     blendTemplate(Class<M> maskType, ByteVector v, M m) {
2200         v.check(this);
2201         return VectorSupport.blend(
2202             getClass(), maskType, byte.class, length(),
2203             this, v, m,
2204             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
2205     }
2206 
2207     /**
2208      * {@inheritDoc} <!--workaround-->
2209      */
2210     @Override public abstract ByteVector addIndex(int scale);
2211 
2212     /*package-private*/
2213     @ForceInline
2214     final ByteVector addIndexTemplate(int scale) {
2215         ByteSpecies vsp = vspecies();
2216         // make sure VLENGTH*scale doesn't overflow:
2217         vsp.checkScale(scale);
2218         return VectorSupport.indexVector(
2219             getClass(), byte.class, length(),
2220             this, scale, vsp,
2221             (v, scale_, s)
2222             -> {
2223                 // If the platform doesn't support an INDEX
2224                 // instruction directly, load IOTA from memory
2225                 // and multiply.
2226                 ByteVector iota = s.iota();
2227                 byte sc = (byte) scale_;
2228                 return v.add(sc == 1 ? iota : iota.mul(sc));
2229             });
2230     }
2231 
2232     /**
2233      * Replaces selected lanes of this vector with
2234      * a scalar value
2235      * under the control of a mask.
2236      *
2237      * This is a masked lane-wise binary operation which
2238      * selects each lane value from one or the other input.
2239      *
2240      * The returned result is equal to the expression
2241      * {@code blend(broadcast(e),m)}.
2242      *
2243      * @param e the input scalar, containing the replacement lane value
2244      * @param m the mask controlling lane selection of the scalar
2245      * @return the result of blending the lane elements of this vector with
2246      *         the scalar value
2247      */
2248     @ForceInline
2249     public final ByteVector blend(byte e,
2250                                             VectorMask<Byte> m) {
2251         return blend(broadcast(e), m);
2252     }
2253 
2254     /**
2255      * Replaces selected lanes of this vector with
2256      * a scalar value
2257      * under the control of a mask.
2258      *
2259      * This is a masked lane-wise binary operation which
2260      * selects each lane value from one or the other input.
2261      *
2262      * The returned result is equal to the expression
2263      * {@code blend(broadcast(e),m)}.
2264      *
2265      * @param e the input scalar, containing the replacement lane value
2266      * @param m the mask controlling lane selection of the scalar
2267      * @return the result of blending the lane elements of this vector with
2268      *         the scalar value
2269      */
2270     @ForceInline
2271     public final ByteVector blend(long e,
2272                                             VectorMask<Byte> m) {
2273         return blend(broadcast(e), m);
2274     }
2275 
2276     /**
2277      * {@inheritDoc} <!--workaround-->
2278      */
2279     @Override
2280     public abstract
2281     ByteVector slice(int origin, Vector<Byte> v1);
2282 
2283     /*package-private*/
2284     final
2285     @ForceInline
2286     ByteVector sliceTemplate(int origin, Vector<Byte> v1) {
2287         ByteVector that = (ByteVector) v1;
2288         that.check(this);
2289         Objects.checkIndex(origin, length() + 1);
2290         VectorShuffle<Byte> iota = iotaShuffle();
2291         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
2292         iota = iotaShuffle(origin, 1, true);
2293         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
2294     }
2295 
2296     /**
2297      * {@inheritDoc} <!--workaround-->
2298      */
2299     @Override
2300     @ForceInline
2301     public final
2302     ByteVector slice(int origin,
2303                                Vector<Byte> w,
2304                                VectorMask<Byte> m) {
2305         return broadcast(0).blend(slice(origin, w), m);
2306     }
2307 
2308     /**
2309      * {@inheritDoc} <!--workaround-->
2310      */
2311     @Override
2312     public abstract
2313     ByteVector slice(int origin);
2314 
2315     /*package-private*/
2316     final
2317     @ForceInline
2318     ByteVector sliceTemplate(int origin) {
2319         Objects.checkIndex(origin, length() + 1);
2320         VectorShuffle<Byte> iota = iotaShuffle();
2321         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
2322         iota = iotaShuffle(origin, 1, true);
2323         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2324     }
2325 
2326     /**
2327      * {@inheritDoc} <!--workaround-->
2328      */
2329     @Override
2330     public abstract
2331     ByteVector unslice(int origin, Vector<Byte> w, int part);
2332 
2333     /*package-private*/
2334     final
2335     @ForceInline
2336     ByteVector
2337     unsliceTemplate(int origin, Vector<Byte> w, int part) {
2338         ByteVector that = (ByteVector) w;
2339         that.check(this);
2340         Objects.checkIndex(origin, length() + 1);
2341         VectorShuffle<Byte> iota = iotaShuffle();
2342         VectorMask<Byte> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
2343                                                                   (broadcast((byte)(origin))));
2344         iota = iotaShuffle(-origin, 1, true);
2345         return that.blend(this.rearrange(iota), blendMask);
2346     }
2347 
2348     /*package-private*/
2349     final
2350     @ForceInline
2351     <M extends VectorMask<Byte>>
2352     ByteVector
2353     unsliceTemplate(Class<M> maskType, int origin, Vector<Byte> w, int part, M m) {
2354         ByteVector that = (ByteVector) w;
2355         that.check(this);
2356         ByteVector slice = that.sliceTemplate(origin, that);
2357         slice = slice.blendTemplate(maskType, this, m);
2358         return slice.unsliceTemplate(origin, w, part);
2359     }
2360 
2361     /**
2362      * {@inheritDoc} <!--workaround-->
2363      */
2364     @Override
2365     public abstract
2366     ByteVector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m);
2367 
2368     /**
2369      * {@inheritDoc} <!--workaround-->
2370      */
2371     @Override
2372     public abstract
2373     ByteVector unslice(int origin);
2374 
2375     /*package-private*/
2376     final
2377     @ForceInline
2378     ByteVector
2379     unsliceTemplate(int origin) {
2380         Objects.checkIndex(origin, length() + 1);
2381         VectorShuffle<Byte> iota = iotaShuffle();
2382         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.GE,
2383                                                                   (broadcast((byte)(origin))));
2384         iota = iotaShuffle(-origin, 1, true);
2385         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2386     }
2387 
2388     private ArrayIndexOutOfBoundsException
2389     wrongPartForSlice(int part) {
2390         String msg = String.format("bad part number %d for slice operation",
2391                                    part);
2392         return new ArrayIndexOutOfBoundsException(msg);
2393     }
2394 
2395     /**
2396      * {@inheritDoc} <!--workaround-->
2397      */
2398     @Override
2399     public abstract
2400     ByteVector rearrange(VectorShuffle<Byte> m);
2401 
2402     /*package-private*/
2403     @ForceInline
2404     final
2405     <S extends VectorShuffle<Byte>>
2406     ByteVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2407         shuffle.checkIndexes();
2408         return VectorSupport.rearrangeOp(
2409             getClass(), shuffletype, null, byte.class, length(),
2410             this, shuffle, null,
2411             (v1, s_, m_) -> v1.uOp((i, a) -> {
2412                 int ei = s_.laneSource(i);
2413                 return v1.lane(ei);
2414             }));
2415     }
2416 
2417     /**
2418      * {@inheritDoc} <!--workaround-->
2419      */
2420     @Override
2421     public abstract
2422     ByteVector rearrange(VectorShuffle<Byte> s,
2423                                    VectorMask<Byte> m);
2424 
2425     /*package-private*/
2426     @ForceInline
2427     final
2428     <S extends VectorShuffle<Byte>, M extends VectorMask<Byte>>
2429     ByteVector rearrangeTemplate(Class<S> shuffletype,
2430                                            Class<M> masktype,
2431                                            S shuffle,
2432                                            M m) {
2433 
2434         m.check(masktype, this);
2435         VectorMask<Byte> valid = shuffle.laneIsValid();
2436         if (m.andNot(valid).anyTrue()) {
2437             shuffle.checkIndexes();
2438             throw new AssertionError();
2439         }
2440         return VectorSupport.rearrangeOp(
2441                    getClass(), shuffletype, masktype, byte.class, length(),
2442                    this, shuffle, m,
2443                    (v1, s_, m_) -> v1.uOp((i, a) -> {
2444                         int ei = s_.laneSource(i);
2445                         return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
2446                    }));
2447     }
2448 
2449     /**
2450      * {@inheritDoc} <!--workaround-->
2451      */
2452     @Override
2453     public abstract
2454     ByteVector rearrange(VectorShuffle<Byte> s,
2455                                    Vector<Byte> v);
2456 
2457     /*package-private*/
2458     @ForceInline
2459     final
2460     <S extends VectorShuffle<Byte>>
2461     ByteVector rearrangeTemplate(Class<S> shuffletype,
2462                                            S shuffle,
2463                                            ByteVector v) {
2464         VectorMask<Byte> valid = shuffle.laneIsValid();
2465         @SuppressWarnings("unchecked")
2466         S ws = (S) shuffle.wrapIndexes();
2467         ByteVector r0 =
2468             VectorSupport.rearrangeOp(
2469                 getClass(), shuffletype, null, byte.class, length(),
2470                 this, ws, null,
2471                 (v0, s_, m_) -> v0.uOp((i, a) -> {
2472                     int ei = s_.laneSource(i);
2473                     return v0.lane(ei);
2474                 }));
2475         ByteVector r1 =
2476             VectorSupport.rearrangeOp(
2477                 getClass(), shuffletype, null, byte.class, length(),
2478                 v, ws, null,
2479                 (v1, s_, m_) -> v1.uOp((i, a) -> {
2480                     int ei = s_.laneSource(i);
2481                     return v1.lane(ei);
2482                 }));
2483         return r1.blend(r0, valid);
2484     }
2485 
2486     @ForceInline
2487     private final
2488     VectorShuffle<Byte> toShuffle0(ByteSpecies dsp) {
2489         byte[] a = toArray();
2490         int[] sa = new int[a.length];
2491         for (int i = 0; i < a.length; i++) {
2492             sa[i] = (int) a[i];
2493         }
2494         return VectorShuffle.fromArray(dsp, sa, 0);
2495     }
2496 
2497     /*package-private*/
2498     @ForceInline
2499     final
2500     VectorShuffle<Byte> toShuffleTemplate(Class<?> shuffleType) {
2501         ByteSpecies vsp = vspecies();
2502         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2503                                      getClass(), byte.class, length(),
2504                                      shuffleType, byte.class, length(),
2505                                      this, vsp,
2506                                      ByteVector::toShuffle0);
2507     }
2508 
2509     /**
2510      * {@inheritDoc} <!--workaround-->
2511      * @since 19
2512      */
2513     @Override
2514     public abstract
2515     ByteVector compress(VectorMask<Byte> m);
2516 
2517     /*package-private*/
2518     @ForceInline
2519     final
2520     <M extends AbstractMask<Byte>>
2521     ByteVector compressTemplate(Class<M> masktype, M m) {
2522       m.check(masktype, this);
2523       return (ByteVector) VectorSupport.comExpOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
2524                                                    byte.class, length(), this, m,
2525                                                    (v1, m1) -> compressHelper(v1, m1));
2526     }
2527 
2528     /**
2529      * {@inheritDoc} <!--workaround-->
2530      * @since 19
2531      */
2532     @Override
2533     public abstract
2534     ByteVector expand(VectorMask<Byte> m);
2535 
2536     /*package-private*/
2537     @ForceInline
2538     final
2539     <M extends AbstractMask<Byte>>
2540     ByteVector expandTemplate(Class<M> masktype, M m) {
2541       m.check(masktype, this);
2542       return (ByteVector) VectorSupport.comExpOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
2543                                                    byte.class, length(), this, m,
2544                                                    (v1, m1) -> expandHelper(v1, m1));
2545     }
2546 
2547 
2548     /**
2549      * {@inheritDoc} <!--workaround-->
2550      */
2551     @Override
2552     public abstract
2553     ByteVector selectFrom(Vector<Byte> v);
2554 
2555     /*package-private*/
2556     @ForceInline
2557     final ByteVector selectFromTemplate(ByteVector v) {
2558         return v.rearrange(this.toShuffle());
2559     }
2560 
2561     /**
2562      * {@inheritDoc} <!--workaround-->
2563      */
2564     @Override
2565     public abstract
2566     ByteVector selectFrom(Vector<Byte> s, VectorMask<Byte> m);
2567 
2568     /*package-private*/
2569     @ForceInline
2570     final ByteVector selectFromTemplate(ByteVector v,
2571                                                   AbstractMask<Byte> m) {
2572         return v.rearrange(this.toShuffle(), m);
2573     }
2574 
2575     /// Ternary operations
2576 
2577     /**
2578      * Blends together the bits of two vectors under
2579      * the control of a third, which supplies mask bits.
2580      *
2581      * This is a lane-wise ternary operation which performs
2582      * a bitwise blending operation {@code (a&~c)|(b&c)}
2583      * to each lane.
2584      *
2585      * This method is also equivalent to the expression
2586      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2587      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2588      *    BITWISE_BLEND}{@code , bits, mask)}.
2589      *
2590      * @param bits input bits to blend into the current vector
2591      * @param mask a bitwise mask to enable blending of the input bits
2592      * @return the bitwise blend of the given bits into the current vector,
2593      *         under control of the bitwise mask
2594      * @see #bitwiseBlend(byte,byte)
2595      * @see #bitwiseBlend(byte,Vector)
2596      * @see #bitwiseBlend(Vector,byte)
2597      * @see VectorOperators#BITWISE_BLEND
2598      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2599      */
2600     @ForceInline
2601     public final
2602     ByteVector bitwiseBlend(Vector<Byte> bits, Vector<Byte> mask) {
2603         return lanewise(BITWISE_BLEND, bits, mask);
2604     }
2605 
2606     /**
2607      * Blends together the bits of a vector and a scalar under
2608      * the control of another scalar, which supplies mask bits.
2609      *
2610      * This is a lane-wise ternary operation which performs
2611      * a bitwise blending operation {@code (a&~c)|(b&c)}
2612      * to each lane.
2613      *
2614      * This method is also equivalent to the expression
2615      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2616      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2617      *    BITWISE_BLEND}{@code , bits, mask)}.
2618      *
2619      * @param bits input bits to blend into the current vector
2620      * @param mask a bitwise mask to enable blending of the input bits
2621      * @return the bitwise blend of the given bits into the current vector,
2622      *         under control of the bitwise mask
2623      * @see #bitwiseBlend(Vector,Vector)
2624      * @see VectorOperators#BITWISE_BLEND
2625      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
2626      */
2627     @ForceInline
2628     public final
2629     ByteVector bitwiseBlend(byte bits, byte mask) {
2630         return lanewise(BITWISE_BLEND, bits, mask);
2631     }
2632 
2633     /**
2634      * Blends together the bits of a vector and a scalar under
2635      * the control of another vector, which supplies mask bits.
2636      *
2637      * This is a lane-wise ternary operation which performs
2638      * a bitwise blending operation {@code (a&~c)|(b&c)}
2639      * to each lane.
2640      *
2641      * This method is also equivalent to the expression
2642      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2643      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2644      *    BITWISE_BLEND}{@code , bits, mask)}.
2645      *
2646      * @param bits input bits to blend into the current vector
2647      * @param mask a bitwise mask to enable blending of the input bits
2648      * @return the bitwise blend of the given bits into the current vector,
2649      *         under control of the bitwise mask
2650      * @see #bitwiseBlend(Vector,Vector)
2651      * @see VectorOperators#BITWISE_BLEND
2652      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
2653      */
2654     @ForceInline
2655     public final
2656     ByteVector bitwiseBlend(byte bits, Vector<Byte> mask) {
2657         return lanewise(BITWISE_BLEND, bits, mask);
2658     }
2659 
2660     /**
2661      * Blends together the bits of two vectors under
2662      * the control of a scalar, which supplies mask bits.
2663      *
2664      * This is a lane-wise ternary operation which performs
2665      * a bitwise blending operation {@code (a&~c)|(b&c)}
2666      * to each lane.
2667      *
2668      * This method is also equivalent to the expression
2669      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2670      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2671      *    BITWISE_BLEND}{@code , bits, mask)}.
2672      *
2673      * @param bits input bits to blend into the current vector
2674      * @param mask a bitwise mask to enable blending of the input bits
2675      * @return the bitwise blend of the given bits into the current vector,
2676      *         under control of the bitwise mask
2677      * @see #bitwiseBlend(Vector,Vector)
2678      * @see VectorOperators#BITWISE_BLEND
2679      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
2680      */
2681     @ForceInline
2682     public final
2683     ByteVector bitwiseBlend(Vector<Byte> bits, byte mask) {
2684         return lanewise(BITWISE_BLEND, bits, mask);
2685     }
2686 
2687 
2688     // Type specific horizontal reductions
2689 
2690     /**
2691      * Returns a value accumulated from all the lanes of this vector.
2692      *
2693      * This is an associative cross-lane reduction operation which
2694      * applies the specified operation to all the lane elements.
2695      * <p>
2696      * A few reduction operations do not support arbitrary reordering
2697      * of their operands, yet are included here because of their
2698      * usefulness.
2699      * <ul>
2700      * <li>
2701      * In the case of {@code FIRST_NONZERO}, the reduction returns
2702      * the value from the lowest-numbered non-zero lane.
2703      * <li>
2704      * All other reduction operations are fully commutative and
2705      * associative.  The implementation can choose any order of
2706      * processing, yet it will always produce the same result.
2707      * </ul>
2708      *
2709      * @param op the operation used to combine lane values
2710      * @return the accumulated result
2711      * @throws UnsupportedOperationException if this vector does
2712      *         not support the requested operation
2713      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2714      * @see #add(Vector)
2715      * @see #mul(Vector)
2716      * @see #min(Vector)
2717      * @see #max(Vector)
2718      * @see #and(Vector)
2719      * @see #or(Vector)
2720      * @see VectorOperators#XOR
2721      * @see VectorOperators#FIRST_NONZERO
2722      */
2723     public abstract byte reduceLanes(VectorOperators.Associative op);
2724 
2725     /**
2726      * Returns a value accumulated from selected lanes of this vector,
2727      * controlled by a mask.
2728      *
2729      * This is an associative cross-lane reduction operation which
2730      * applies the specified operation to the selected lane elements.
2731      * <p>
2732      * If no elements are selected, an operation-specific identity
2733      * value is returned.
2734      * <ul>
2735      * <li>
2736      * If the operation is
2737      *  {@code ADD}, {@code XOR}, {@code OR},
2738      * or {@code FIRST_NONZERO},
2739      * then the identity value is zero, the default {@code byte} value.
2740      * <li>
2741      * If the operation is {@code MUL},
2742      * then the identity value is one.
2743      * <li>
2744      * If the operation is {@code AND},
2745      * then the identity value is minus one (all bits set).
2746      * <li>
2747      * If the operation is {@code MAX},
2748      * then the identity value is {@code Byte.MIN_VALUE}.
2749      * <li>
2750      * If the operation is {@code MIN},
2751      * then the identity value is {@code Byte.MAX_VALUE}.
2752      * </ul>
2753      * <p>
2754      * A few reduction operations do not support arbitrary reordering
2755      * of their operands, yet are included here because of their
2756      * usefulness.
2757      * <ul>
2758      * <li>
2759      * In the case of {@code FIRST_NONZERO}, the reduction returns
2760      * the value from the lowest-numbered non-zero lane.
2761      * <li>
2762      * All other reduction operations are fully commutative and
2763      * associative.  The implementation can choose any order of
2764      * processing, yet it will always produce the same result.
2765      * </ul>
2766      *
2767      * @param op the operation used to combine lane values
2768      * @param m the mask controlling lane selection
2769      * @return the reduced result accumulated from the selected lane values
2770      * @throws UnsupportedOperationException if this vector does
2771      *         not support the requested operation
2772      * @see #reduceLanes(VectorOperators.Associative)
2773      */
2774     public abstract byte reduceLanes(VectorOperators.Associative op,
2775                                        VectorMask<Byte> m);
2776 
2777     /*package-private*/
2778     @ForceInline
2779     final
2780     byte reduceLanesTemplate(VectorOperators.Associative op,
2781                                Class<? extends VectorMask<Byte>> maskClass,
2782                                VectorMask<Byte> m) {
2783         m.check(maskClass, this);
2784         if (op == FIRST_NONZERO) {
2785             // FIXME:  The JIT should handle this.
2786             ByteVector v = broadcast((byte) 0).blend(this, m);
2787             return v.reduceLanesTemplate(op);
2788         }
2789         int opc = opCode(op);
2790         return fromBits(VectorSupport.reductionCoerced(
2791             opc, getClass(), maskClass, byte.class, length(),
2792             this, m,
2793             REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations)));
2794     }
2795 
2796     /*package-private*/
2797     @ForceInline
2798     final
2799     byte reduceLanesTemplate(VectorOperators.Associative op) {
2800         if (op == FIRST_NONZERO) {
2801             // FIXME:  The JIT should handle this.
2802             VectorMask<Byte> thisNZ
2803                 = this.viewAsIntegralLanes().compare(NE, (byte) 0);
2804             int ft = thisNZ.firstTrue();
2805             return ft < length() ? this.lane(ft) : (byte) 0;
2806         }
2807         int opc = opCode(op);
2808         return fromBits(VectorSupport.reductionCoerced(
2809             opc, getClass(), null, byte.class, length(),
2810             this, null,
2811             REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations)));
2812     }
2813 
2814     private static final
2815     ImplCache<Associative, ReductionOperation<ByteVector, VectorMask<Byte>>>
2816         REDUCE_IMPL = new ImplCache<>(Associative.class, ByteVector.class);
2817 
2818     private static ReductionOperation<ByteVector, VectorMask<Byte>> reductionOperations(int opc_) {
2819         switch (opc_) {
2820             case VECTOR_OP_ADD: return (v, m) ->
2821                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a + b)));
2822             case VECTOR_OP_MUL: return (v, m) ->
2823                     toBits(v.rOp((byte)1, m, (i, a, b) -> (byte)(a * b)));
2824             case VECTOR_OP_MIN: return (v, m) ->
2825                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (byte) Math.min(a, b)));
2826             case VECTOR_OP_MAX: return (v, m) ->
2827                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (byte) Math.max(a, b)));
2828             case VECTOR_OP_AND: return (v, m) ->
2829                     toBits(v.rOp((byte)-1, m, (i, a, b) -> (byte)(a & b)));
2830             case VECTOR_OP_OR: return (v, m) ->
2831                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a | b)));
2832             case VECTOR_OP_XOR: return (v, m) ->
2833                     toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a ^ b)));
2834             default: return null;
2835         }
2836     }
2837 
2838     private static final byte MIN_OR_INF = Byte.MIN_VALUE;
2839     private static final byte MAX_OR_INF = Byte.MAX_VALUE;
2840 
2841     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2842     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2843                                                      VectorMask<Byte> m);
2844 
2845     // Type specific accessors
2846 
2847     /**
2848      * Gets the lane element at lane index {@code i}
2849      *
2850      * @param i the lane index
2851      * @return the lane element at lane index {@code i}
2852      * @throws IllegalArgumentException if the index is is out of range
2853      * ({@code < 0 || >= length()})
2854      */
2855     public abstract byte lane(int i);
2856 
2857     /**
2858      * Replaces the lane element of this vector at lane index {@code i} with
2859      * value {@code e}.
2860      *
2861      * This is a cross-lane operation and behaves as if it returns the result
2862      * of blending this vector with an input vector that is the result of
2863      * broadcasting {@code e} and a mask that has only one lane set at lane
2864      * index {@code i}.
2865      *
2866      * @param i the lane index of the lane element to be replaced
2867      * @param e the value to be placed
2868      * @return the result of replacing the lane element of this vector at lane
2869      * index {@code i} with value {@code e}.
2870      * @throws IllegalArgumentException if the index is is out of range
2871      * ({@code < 0 || >= length()})
2872      */
2873     public abstract ByteVector withLane(int i, byte e);
2874 
2875     // Memory load operations
2876 
2877     /**
2878      * Returns an array of type {@code byte[]}
2879      * containing all the lane values.
2880      * The array length is the same as the vector length.
2881      * The array elements are stored in lane order.
2882      * <p>
2883      * This method behaves as if it stores
2884      * this vector into an allocated array
2885      * (using {@link #intoArray(byte[], int) intoArray})
2886      * and returns the array as follows:
2887      * <pre>{@code
2888      *   byte[] a = new byte[this.length()];
2889      *   this.intoArray(a, 0);
2890      *   return a;
2891      * }</pre>
2892      *
2893      * @return an array containing the lane values of this vector
2894      */
2895     @ForceInline
2896     @Override
2897     public final byte[] toArray() {
2898         byte[] a = new byte[vspecies().laneCount()];
2899         intoArray(a, 0);
2900         return a;
2901     }
2902 
2903     /** {@inheritDoc} <!--workaround-->
2904      * @implNote
2905      * When this method is used on used on vectors
2906      * of type {@code ByteVector},
2907      * there will be no loss of precision or range,
2908      * and so no {@code UnsupportedOperationException} will
2909      * be thrown.
2910      */
2911     @ForceInline
2912     @Override
2913     public final int[] toIntArray() {
2914         byte[] a = toArray();
2915         int[] res = new int[a.length];
2916         for (int i = 0; i < a.length; i++) {
2917             byte e = a[i];
2918             res[i] = (int) ByteSpecies.toIntegralChecked(e, true);
2919         }
2920         return res;
2921     }
2922 
2923     /** {@inheritDoc} <!--workaround-->
2924      * @implNote
2925      * When this method is used on used on vectors
2926      * of type {@code ByteVector},
2927      * there will be no loss of precision or range,
2928      * and so no {@code UnsupportedOperationException} will
2929      * be thrown.
2930      */
2931     @ForceInline
2932     @Override
2933     public final long[] toLongArray() {
2934         byte[] a = toArray();
2935         long[] res = new long[a.length];
2936         for (int i = 0; i < a.length; i++) {
2937             byte e = a[i];
2938             res[i] = ByteSpecies.toIntegralChecked(e, false);
2939         }
2940         return res;
2941     }
2942 
2943     /** {@inheritDoc} <!--workaround-->
2944      * @implNote
2945      * When this method is used on used on vectors
2946      * of type {@code ByteVector},
2947      * there will be no loss of precision.
2948      */
2949     @ForceInline
2950     @Override
2951     public final double[] toDoubleArray() {
2952         byte[] a = toArray();
2953         double[] res = new double[a.length];
2954         for (int i = 0; i < a.length; i++) {
2955             res[i] = (double) a[i];
2956         }
2957         return res;
2958     }
2959 
2960     /**
2961      * Loads a vector from an array of type {@code byte[]}
2962      * starting at an offset.
2963      * For each vector lane, where {@code N} is the vector lane index, the
2964      * array element at index {@code offset + N} is placed into the
2965      * resulting vector at lane index {@code N}.
2966      *
2967      * @param species species of desired vector
2968      * @param a the array
2969      * @param offset the offset into the array
2970      * @return the vector loaded from an array
2971      * @throws IndexOutOfBoundsException
2972      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2973      *         for any lane {@code N} in the vector
2974      */
2975     @ForceInline
2976     public static
2977     ByteVector fromArray(VectorSpecies<Byte> species,
2978                                    byte[] a, int offset) {
2979         offset = checkFromIndexSize(offset, species.length(), a.length);
2980         ByteSpecies vsp = (ByteSpecies) species;
2981         return vsp.dummyVector().fromArray0(a, offset);
2982     }
2983 
2984     /**
2985      * Loads a vector from an array of type {@code byte[]}
2986      * starting at an offset and using a mask.
2987      * Lanes where the mask is unset are filled with the default
2988      * value of {@code byte} (zero).
2989      * For each vector lane, where {@code N} is the vector lane index,
2990      * if the mask lane at index {@code N} is set then the array element at
2991      * index {@code offset + N} is placed into the resulting vector at lane index
2992      * {@code N}, otherwise the default element value is placed into the
2993      * resulting vector at lane index {@code N}.
2994      *
2995      * @param species species of desired vector
2996      * @param a the array
2997      * @param offset the offset into the array
2998      * @param m the mask controlling lane selection
2999      * @return the vector loaded from an array
3000      * @throws IndexOutOfBoundsException
3001      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3002      *         for any lane {@code N} in the vector
3003      *         where the mask is set
3004      */
3005     @ForceInline
3006     public static
3007     ByteVector fromArray(VectorSpecies<Byte> species,
3008                                    byte[] a, int offset,
3009                                    VectorMask<Byte> m) {
3010         ByteSpecies vsp = (ByteSpecies) species;
3011         if (offset >= 0 && offset <= (a.length - species.length())) {
3012             return vsp.dummyVector().fromArray0(a, offset, m);
3013         }
3014 
3015         // FIXME: optimize
3016         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3017         return vsp.vOp(m, i -> a[offset + i]);
3018     }
3019 
3020     /**
3021      * Gathers a new vector composed of elements from an array of type
3022      * {@code byte[]},
3023      * using indexes obtained by adding a fixed {@code offset} to a
3024      * series of secondary offsets from an <em>index map</em>.
3025      * The index map is a contiguous sequence of {@code VLENGTH}
3026      * elements in a second array of {@code int}s, starting at a given
3027      * {@code mapOffset}.
3028      * <p>
3029      * For each vector lane, where {@code N} is the vector lane index,
3030      * the lane is loaded from the array
3031      * element {@code a[f(N)]}, where {@code f(N)} is the
3032      * index mapping expression
3033      * {@code offset + indexMap[mapOffset + N]]}.
3034      *
3035      * @param species species of desired vector
3036      * @param a the array
3037      * @param offset the offset into the array, may be negative if relative
3038      * indexes in the index map compensate to produce a value within the
3039      * array bounds
3040      * @param indexMap the index map
3041      * @param mapOffset the offset into the index map
3042      * @return the vector loaded from the indexed elements of the array
3043      * @throws IndexOutOfBoundsException
3044      *         if {@code mapOffset+N < 0}
3045      *         or if {@code mapOffset+N >= indexMap.length},
3046      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3047      *         is an invalid index into {@code a},
3048      *         for any lane {@code N} in the vector
3049      * @see ByteVector#toIntArray()
3050      */
3051     @ForceInline
3052     public static
3053     ByteVector fromArray(VectorSpecies<Byte> species,
3054                                    byte[] a, int offset,
3055                                    int[] indexMap, int mapOffset) {
3056         ByteSpecies vsp = (ByteSpecies) species;
3057         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
3058     }
3059 
3060     /**
3061      * Gathers a new vector composed of elements from an array of type
3062      * {@code byte[]},
3063      * under the control of a mask, and
3064      * using indexes obtained by adding a fixed {@code offset} to a
3065      * series of secondary offsets from an <em>index map</em>.
3066      * The index map is a contiguous sequence of {@code VLENGTH}
3067      * elements in a second array of {@code int}s, starting at a given
3068      * {@code mapOffset}.
3069      * <p>
3070      * For each vector lane, where {@code N} is the vector lane index,
3071      * if the lane is set in the mask,
3072      * the lane is loaded from the array
3073      * element {@code a[f(N)]}, where {@code f(N)} is the
3074      * index mapping expression
3075      * {@code offset + indexMap[mapOffset + N]]}.
3076      * Unset lanes in the resulting vector are set to zero.
3077      *
3078      * @param species species of desired vector
3079      * @param a the array
3080      * @param offset the offset into the array, may be negative if relative
3081      * indexes in the index map compensate to produce a value within the
3082      * array bounds
3083      * @param indexMap the index map
3084      * @param mapOffset the offset into the index map
3085      * @param m the mask controlling lane selection
3086      * @return the vector loaded from the indexed elements of the array
3087      * @throws IndexOutOfBoundsException
3088      *         if {@code mapOffset+N < 0}
3089      *         or if {@code mapOffset+N >= indexMap.length},
3090      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3091      *         is an invalid index into {@code a},
3092      *         for any lane {@code N} in the vector
3093      *         where the mask is set
3094      * @see ByteVector#toIntArray()
3095      */
3096     @ForceInline
3097     public static
3098     ByteVector fromArray(VectorSpecies<Byte> species,
3099                                    byte[] a, int offset,
3100                                    int[] indexMap, int mapOffset,
3101                                    VectorMask<Byte> m) {
3102         ByteSpecies vsp = (ByteSpecies) species;
3103         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
3104     }
3105 
3106 
3107     /**
3108      * Loads a vector from an array of type {@code boolean[]}
3109      * starting at an offset.
3110      * For each vector lane, where {@code N} is the vector lane index, the
3111      * array element at index {@code offset + N}
3112      * is first converted to a {@code byte} value and then
3113      * placed into the resulting vector at lane index {@code N}.
3114      * <p>
3115      * A {@code boolean} value is converted to a {@code byte} value by applying the
3116      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
3117      *
3118      * @param species species of desired vector
3119      * @param a the array
3120      * @param offset the offset into the array
3121      * @return the vector loaded from an array
3122      * @throws IndexOutOfBoundsException
3123      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3124      *         for any lane {@code N} in the vector
3125      */
3126     @ForceInline
3127     public static
3128     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3129                                           boolean[] a, int offset) {
3130         offset = checkFromIndexSize(offset, species.length(), a.length);
3131         ByteSpecies vsp = (ByteSpecies) species;
3132         return vsp.dummyVector().fromBooleanArray0(a, offset);
3133     }
3134 
3135     /**
3136      * Loads a vector from an array of type {@code boolean[]}
3137      * starting at an offset and using a mask.
3138      * Lanes where the mask is unset are filled with the default
3139      * value of {@code byte} (zero).
3140      * For each vector lane, where {@code N} is the vector lane index,
3141      * if the mask lane at index {@code N} is set then the array element at
3142      * index {@code offset + N}
3143      * is first converted to a {@code byte} value and then
3144      * placed into the resulting vector at lane index
3145      * {@code N}, otherwise the default element value is placed into the
3146      * resulting vector at lane index {@code N}.
3147      * <p>
3148      * A {@code boolean} value is converted to a {@code byte} value by applying the
3149      * expression {@code (byte) (b ? 1 : 0)}, where {@code b} is the {@code boolean} value.
3150      *
3151      * @param species species of desired vector
3152      * @param a the array
3153      * @param offset the offset into the array
3154      * @param m the mask controlling lane selection
3155      * @return the vector loaded from an array
3156      * @throws IndexOutOfBoundsException
3157      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3158      *         for any lane {@code N} in the vector
3159      *         where the mask is set
3160      */
3161     @ForceInline
3162     public static
3163     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3164                                           boolean[] a, int offset,
3165                                           VectorMask<Byte> m) {
3166         ByteSpecies vsp = (ByteSpecies) species;
3167         if (offset >= 0 && offset <= (a.length - species.length())) {
3168             ByteVector zero = vsp.zero();
3169             return vsp.dummyVector().fromBooleanArray0(a, offset, m);
3170         }
3171 
3172         // FIXME: optimize
3173         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3174         return vsp.vOp(m, i -> (byte) (a[offset + i] ? 1 : 0));
3175     }
3176 
3177     /**
3178      * Gathers a new vector composed of elements from an array of type
3179      * {@code boolean[]},
3180      * using indexes obtained by adding a fixed {@code offset} to a
3181      * series of secondary offsets from an <em>index map</em>.
3182      * The index map is a contiguous sequence of {@code VLENGTH}
3183      * elements in a second array of {@code int}s, starting at a given
3184      * {@code mapOffset}.
3185      * <p>
3186      * For each vector lane, where {@code N} is the vector lane index,
3187      * the lane is loaded from the expression
3188      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
3189      * index mapping expression
3190      * {@code offset + indexMap[mapOffset + N]]}.
3191      *
3192      * @param species species of desired vector
3193      * @param a the array
3194      * @param offset the offset into the array, may be negative if relative
3195      * indexes in the index map compensate to produce a value within the
3196      * array bounds
3197      * @param indexMap the index map
3198      * @param mapOffset the offset into the index map
3199      * @return the vector loaded from the indexed elements of the array
3200      * @throws IndexOutOfBoundsException
3201      *         if {@code mapOffset+N < 0}
3202      *         or if {@code mapOffset+N >= indexMap.length},
3203      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3204      *         is an invalid index into {@code a},
3205      *         for any lane {@code N} in the vector
3206      * @see ByteVector#toIntArray()
3207      */
3208     @ForceInline
3209     public static
3210     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3211                                           boolean[] a, int offset,
3212                                           int[] indexMap, int mapOffset) {
3213         // FIXME: optimize
3214         ByteSpecies vsp = (ByteSpecies) species;
3215         return vsp.vOp(n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
3216     }
3217 
3218     /**
3219      * Gathers a new vector composed of elements from an array of type
3220      * {@code boolean[]},
3221      * under the control of a mask, and
3222      * using indexes obtained by adding a fixed {@code offset} to a
3223      * series of secondary offsets from an <em>index map</em>.
3224      * The index map is a contiguous sequence of {@code VLENGTH}
3225      * elements in a second array of {@code int}s, starting at a given
3226      * {@code mapOffset}.
3227      * <p>
3228      * For each vector lane, where {@code N} is the vector lane index,
3229      * if the lane is set in the mask,
3230      * the lane is loaded from the expression
3231      * {@code (byte) (a[f(N)] ? 1 : 0)}, where {@code f(N)} is the
3232      * index mapping expression
3233      * {@code offset + indexMap[mapOffset + N]]}.
3234      * Unset lanes in the resulting vector are set to zero.
3235      *
3236      * @param species species of desired vector
3237      * @param a the array
3238      * @param offset the offset into the array, may be negative if relative
3239      * indexes in the index map compensate to produce a value within the
3240      * array bounds
3241      * @param indexMap the index map
3242      * @param mapOffset the offset into the index map
3243      * @param m the mask controlling lane selection
3244      * @return the vector loaded from the indexed elements of the array
3245      * @throws IndexOutOfBoundsException
3246      *         if {@code mapOffset+N < 0}
3247      *         or if {@code mapOffset+N >= indexMap.length},
3248      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3249      *         is an invalid index into {@code a},
3250      *         for any lane {@code N} in the vector
3251      *         where the mask is set
3252      * @see ByteVector#toIntArray()
3253      */
3254     @ForceInline
3255     public static
3256     ByteVector fromBooleanArray(VectorSpecies<Byte> species,
3257                                           boolean[] a, int offset,
3258                                           int[] indexMap, int mapOffset,
3259                                           VectorMask<Byte> m) {
3260         // FIXME: optimize
3261         ByteSpecies vsp = (ByteSpecies) species;
3262         return vsp.vOp(m, n -> (byte) (a[offset + indexMap[mapOffset + n]] ? 1 : 0));
3263     }
3264 
3265     /**
3266      * Loads a vector from a {@linkplain MemorySegment memory segment}
3267      * starting at an offset into the memory segment.
3268      * Bytes are composed into primitive lane elements according
3269      * to the specified byte order.
3270      * The vector is arranged into lanes according to
3271      * <a href="Vector.html#lane-order">memory ordering</a>.
3272      * <p>
3273      * This method behaves as if it returns the result of calling
3274      * {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
3275      * fromMemorySegment()} as follows:
3276      * <pre>{@code
3277      * var m = species.maskAll(true);
3278      * return fromMemorySegment(species, ms, offset, bo, m);
3279      * }</pre>
3280      *
3281      * @param species species of desired vector
3282      * @param ms the memory segment
3283      * @param offset the offset into the memory segment
3284      * @param bo the intended byte order
3285      * @return a vector loaded from the memory segment
3286      * @throws IndexOutOfBoundsException
3287      *         if {@code offset+N*1 < 0}
3288      *         or {@code offset+N*1 >= ms.byteSize()}
3289      *         for any lane {@code N} in the vector
3290      * @throws IllegalArgumentException if the memory segment is a heap segment that is
3291      *         not backed by a {@code byte[]} array.
3292      * @throws IllegalStateException if the memory segment's session is not alive,
3293      *         or if access occurs from a thread other than the thread owning the session.
3294      * @since 19
3295      */
3296     @ForceInline
3297     public static
3298     ByteVector fromMemorySegment(VectorSpecies<Byte> species,
3299                                            MemorySegment ms, long offset,
3300                                            ByteOrder bo) {
3301         offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
3302         ByteSpecies vsp = (ByteSpecies) species;
3303         return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
3304     }
3305 
3306     /**
3307      * Loads a vector from a {@linkplain MemorySegment memory segment}
3308      * starting at an offset into the memory segment
3309      * and using a mask.
3310      * Lanes where the mask is unset are filled with the default
3311      * value of {@code byte} (zero).
3312      * Bytes are composed into primitive lane elements according
3313      * to the specified byte order.
3314      * The vector is arranged into lanes according to
3315      * <a href="Vector.html#lane-order">memory ordering</a>.
3316      * <p>
3317      * The following pseudocode illustrates the behavior:
3318      * <pre>{@code
3319      * var slice = ms.asSlice(offset);
3320      * byte[] ar = new byte[species.length()];
3321      * for (int n = 0; n < ar.length; n++) {
3322      *     if (m.laneIsSet(n)) {
3323      *         ar[n] = slice.getAtIndex(ValuaLayout.JAVA_BYTE.withBitAlignment(8), n);
3324      *     }
3325      * }
3326      * ByteVector r = ByteVector.fromArray(species, ar, 0);
3327      * }</pre>
3328      * @implNote
3329      * The byte order argument is ignored.
3330      *
3331      * @param species species of desired vector
3332      * @param ms the memory segment
3333      * @param offset the offset into the memory segment
3334      * @param bo the intended byte order
3335      * @param m the mask controlling lane selection
3336      * @return a vector loaded from the memory segment
3337      * @throws IndexOutOfBoundsException
3338      *         if {@code offset+N*1 < 0}
3339      *         or {@code offset+N*1 >= ms.byteSize()}
3340      *         for any lane {@code N} in the vector
3341      *         where the mask is set
3342      * @throws IllegalArgumentException if the memory segment is a heap segment that is
3343      *         not backed by a {@code byte[]} array.
3344      * @throws IllegalStateException if the memory segment's session is not alive,
3345      *         or if access occurs from a thread other than the thread owning the session.
3346      * @since 19
3347      */
3348     @ForceInline
3349     public static
3350     ByteVector fromMemorySegment(VectorSpecies<Byte> species,
3351                                            MemorySegment ms, long offset,
3352                                            ByteOrder bo,
3353                                            VectorMask<Byte> m) {
3354         ByteSpecies vsp = (ByteSpecies) species;
3355         if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
3356             return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
3357         }
3358 
3359         // FIXME: optimize
3360         checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
3361         return vsp.ldLongOp(ms, offset, m, ByteVector::memorySegmentGet);
3362     }
3363 
3364     // Memory store operations
3365 
3366     /**
3367      * Stores this vector into an array of type {@code byte[]}
3368      * starting at an offset.
3369      * <p>
3370      * For each vector lane, where {@code N} is the vector lane index,
3371      * the lane element at index {@code N} is stored into the array
3372      * element {@code a[offset+N]}.
3373      *
3374      * @param a the array, of type {@code byte[]}
3375      * @param offset the offset into the array
3376      * @throws IndexOutOfBoundsException
3377      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3378      *         for any lane {@code N} in the vector
3379      */
3380     @ForceInline
3381     public final
3382     void intoArray(byte[] a, int offset) {
3383         offset = checkFromIndexSize(offset, length(), a.length);
3384         ByteSpecies vsp = vspecies();
3385         VectorSupport.store(
3386             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3387             a, arrayAddress(a, offset),
3388             this,
3389             a, offset,
3390             (arr, off, v)
3391             -> v.stOp(arr, (int) off,
3392                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3393     }
3394 
3395     /**
3396      * Stores this vector into an array of type {@code byte[]}
3397      * starting at offset and using a mask.
3398      * <p>
3399      * For each vector lane, where {@code N} is the vector lane index,
3400      * the lane element at index {@code N} is stored into the array
3401      * element {@code a[offset+N]}.
3402      * If the mask lane at {@code N} is unset then the corresponding
3403      * array element {@code a[offset+N]} is left unchanged.
3404      * <p>
3405      * Array range checking is done for lanes where the mask is set.
3406      * Lanes where the mask is unset are not stored and do not need
3407      * to correspond to legitimate elements of {@code a}.
3408      * That is, unset lanes may correspond to array indexes less than
3409      * zero or beyond the end of the array.
3410      *
3411      * @param a the array, of type {@code byte[]}
3412      * @param offset the offset into the array
3413      * @param m the mask controlling lane storage
3414      * @throws IndexOutOfBoundsException
3415      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3416      *         for any lane {@code N} in the vector
3417      *         where the mask is set
3418      */
3419     @ForceInline
3420     public final
3421     void intoArray(byte[] a, int offset,
3422                    VectorMask<Byte> m) {
3423         if (m.allTrue()) {
3424             intoArray(a, offset);
3425         } else {
3426             ByteSpecies vsp = vspecies();
3427             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3428             intoArray0(a, offset, m);
3429         }
3430     }
3431 
3432     /**
3433      * Scatters this vector into an array of type {@code byte[]}
3434      * using indexes obtained by adding a fixed {@code offset} to a
3435      * series of secondary offsets from an <em>index map</em>.
3436      * The index map is a contiguous sequence of {@code VLENGTH}
3437      * elements in a second array of {@code int}s, starting at a given
3438      * {@code mapOffset}.
3439      * <p>
3440      * For each vector lane, where {@code N} is the vector lane index,
3441      * the lane element at index {@code N} is stored into the array
3442      * element {@code a[f(N)]}, where {@code f(N)} is the
3443      * index mapping expression
3444      * {@code offset + indexMap[mapOffset + N]]}.
3445      *
3446      * @param a the array
3447      * @param offset an offset to combine with the index map offsets
3448      * @param indexMap the index map
3449      * @param mapOffset the offset into the index map
3450      * @throws IndexOutOfBoundsException
3451      *         if {@code mapOffset+N < 0}
3452      *         or if {@code mapOffset+N >= indexMap.length},
3453      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3454      *         is an invalid index into {@code a},
3455      *         for any lane {@code N} in the vector
3456      * @see ByteVector#toIntArray()
3457      */
3458     @ForceInline
3459     public final
3460     void intoArray(byte[] a, int offset,
3461                    int[] indexMap, int mapOffset) {
3462         stOp(a, offset,
3463              (arr, off, i, e) -> {
3464                  int j = indexMap[mapOffset + i];
3465                  arr[off + j] = e;
3466              });
3467     }
3468 
3469     /**
3470      * Scatters this vector into an array of type {@code byte[]},
3471      * under the control of a mask, and
3472      * using indexes obtained by adding a fixed {@code offset} to a
3473      * series of secondary offsets from an <em>index map</em>.
3474      * The index map is a contiguous sequence of {@code VLENGTH}
3475      * elements in a second array of {@code int}s, starting at a given
3476      * {@code mapOffset}.
3477      * <p>
3478      * For each vector lane, where {@code N} is the vector lane index,
3479      * if the mask lane at index {@code N} is set then
3480      * the lane element at index {@code N} is stored into the array
3481      * element {@code a[f(N)]}, where {@code f(N)} is the
3482      * index mapping expression
3483      * {@code offset + indexMap[mapOffset + N]]}.
3484      *
3485      * @param a the array
3486      * @param offset an offset to combine with the index map offsets
3487      * @param indexMap the index map
3488      * @param mapOffset the offset into the index map
3489      * @param m the mask
3490      * @throws IndexOutOfBoundsException
3491      *         if {@code mapOffset+N < 0}
3492      *         or if {@code mapOffset+N >= indexMap.length},
3493      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3494      *         is an invalid index into {@code a},
3495      *         for any lane {@code N} in the vector
3496      *         where the mask is set
3497      * @see ByteVector#toIntArray()
3498      */
3499     @ForceInline
3500     public final
3501     void intoArray(byte[] a, int offset,
3502                    int[] indexMap, int mapOffset,
3503                    VectorMask<Byte> m) {
3504         stOp(a, offset, m,
3505              (arr, off, i, e) -> {
3506                  int j = indexMap[mapOffset + i];
3507                  arr[off + j] = e;
3508              });
3509     }
3510 
3511 
3512     /**
3513      * Stores this vector into an array of type {@code boolean[]}
3514      * starting at an offset.
3515      * <p>
3516      * For each vector lane, where {@code N} is the vector lane index,
3517      * the lane element at index {@code N}
3518      * is first converted to a {@code boolean} value and then
3519      * stored into the array element {@code a[offset+N]}.
3520      * <p>
3521      * A {@code byte} value is converted to a {@code boolean} value by applying the
3522      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3523      *
3524      * @param a the array
3525      * @param offset the offset into the array
3526      * @throws IndexOutOfBoundsException
3527      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3528      *         for any lane {@code N} in the vector
3529      */
3530     @ForceInline
3531     public final
3532     void intoBooleanArray(boolean[] a, int offset) {
3533         offset = checkFromIndexSize(offset, length(), a.length);
3534         ByteSpecies vsp = vspecies();
3535         ByteVector normalized = this.and((byte) 1);
3536         VectorSupport.store(
3537             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3538             a, booleanArrayAddress(a, offset),
3539             normalized,
3540             a, offset,
3541             (arr, off, v)
3542             -> v.stOp(arr, (int) off,
3543                       (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
3544     }
3545 
3546     /**
3547      * Stores this vector into an array of type {@code boolean[]}
3548      * starting at offset and using a mask.
3549      * <p>
3550      * For each vector lane, where {@code N} is the vector lane index,
3551      * the lane element at index {@code N}
3552      * is first converted to a {@code boolean} value and then
3553      * stored into the array element {@code a[offset+N]}.
3554      * If the mask lane at {@code N} is unset then the corresponding
3555      * array element {@code a[offset+N]} is left unchanged.
3556      * <p>
3557      * A {@code byte} value is converted to a {@code boolean} value by applying the
3558      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3559      * <p>
3560      * Array range checking is done for lanes where the mask is set.
3561      * Lanes where the mask is unset are not stored and do not need
3562      * to correspond to legitimate elements of {@code a}.
3563      * That is, unset lanes may correspond to array indexes less than
3564      * zero or beyond the end of the array.
3565      *
3566      * @param a the array
3567      * @param offset the offset into the array
3568      * @param m the mask controlling lane storage
3569      * @throws IndexOutOfBoundsException
3570      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3571      *         for any lane {@code N} in the vector
3572      *         where the mask is set
3573      */
3574     @ForceInline
3575     public final
3576     void intoBooleanArray(boolean[] a, int offset,
3577                           VectorMask<Byte> m) {
3578         if (m.allTrue()) {
3579             intoBooleanArray(a, offset);
3580         } else {
3581             ByteSpecies vsp = vspecies();
3582             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3583             intoBooleanArray0(a, offset, m);
3584         }
3585     }
3586 
3587     /**
3588      * Scatters this vector into an array of type {@code boolean[]}
3589      * using indexes obtained by adding a fixed {@code offset} to a
3590      * series of secondary offsets from an <em>index map</em>.
3591      * The index map is a contiguous sequence of {@code VLENGTH}
3592      * elements in a second array of {@code int}s, starting at a given
3593      * {@code mapOffset}.
3594      * <p>
3595      * For each vector lane, where {@code N} is the vector lane index,
3596      * the lane element at index {@code N}
3597      * is first converted to a {@code boolean} value and then
3598      * stored into the array
3599      * element {@code a[f(N)]}, where {@code f(N)} is the
3600      * index mapping expression
3601      * {@code offset + indexMap[mapOffset + N]]}.
3602      * <p>
3603      * A {@code byte} value is converted to a {@code boolean} value by applying the
3604      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3605      *
3606      * @param a the array
3607      * @param offset an offset to combine with the index map offsets
3608      * @param indexMap the index map
3609      * @param mapOffset the offset into the index map
3610      * @throws IndexOutOfBoundsException
3611      *         if {@code mapOffset+N < 0}
3612      *         or if {@code mapOffset+N >= indexMap.length},
3613      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3614      *         is an invalid index into {@code a},
3615      *         for any lane {@code N} in the vector
3616      * @see ByteVector#toIntArray()
3617      */
3618     @ForceInline
3619     public final
3620     void intoBooleanArray(boolean[] a, int offset,
3621                           int[] indexMap, int mapOffset) {
3622         // FIXME: optimize
3623         stOp(a, offset,
3624              (arr, off, i, e) -> {
3625                  int j = indexMap[mapOffset + i];
3626                  arr[off + j] = (e & 1) != 0;
3627              });
3628     }
3629 
3630     /**
3631      * Scatters this vector into an array of type {@code boolean[]},
3632      * under the control of a mask, and
3633      * using indexes obtained by adding a fixed {@code offset} to a
3634      * series of secondary offsets from an <em>index map</em>.
3635      * The index map is a contiguous sequence of {@code VLENGTH}
3636      * elements in a second array of {@code int}s, starting at a given
3637      * {@code mapOffset}.
3638      * <p>
3639      * For each vector lane, where {@code N} is the vector lane index,
3640      * if the mask lane at index {@code N} is set then
3641      * the lane element at index {@code N}
3642      * is first converted to a {@code boolean} value and then
3643      * stored into the array
3644      * element {@code a[f(N)]}, where {@code f(N)} is the
3645      * index mapping expression
3646      * {@code offset + indexMap[mapOffset + N]]}.
3647      * <p>
3648      * A {@code byte} value is converted to a {@code boolean} value by applying the
3649      * expression {@code (b & 1) != 0} where {@code b} is the byte value.
3650      *
3651      * @param a the array
3652      * @param offset an offset to combine with the index map offsets
3653      * @param indexMap the index map
3654      * @param mapOffset the offset into the index map
3655      * @param m the mask
3656      * @throws IndexOutOfBoundsException
3657      *         if {@code mapOffset+N < 0}
3658      *         or if {@code mapOffset+N >= indexMap.length},
3659      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3660      *         is an invalid index into {@code a},
3661      *         for any lane {@code N} in the vector
3662      *         where the mask is set
3663      * @see ByteVector#toIntArray()
3664      */
3665     @ForceInline
3666     public final
3667     void intoBooleanArray(boolean[] a, int offset,
3668                           int[] indexMap, int mapOffset,
3669                           VectorMask<Byte> m) {
3670         // FIXME: optimize
3671         stOp(a, offset, m,
3672              (arr, off, i, e) -> {
3673                  int j = indexMap[mapOffset + i];
3674                  arr[off + j] = (e & 1) != 0;
3675              });
3676     }
3677 
3678     /**
3679      * {@inheritDoc} <!--workaround-->
3680      * @since 19
3681      */
3682     @Override
3683     @ForceInline
3684     public final
3685     void intoMemorySegment(MemorySegment ms, long offset,
3686                            ByteOrder bo) {
3687         if (ms.isReadOnly()) {
3688             throw new UnsupportedOperationException("Attempt to write a read-only segment");
3689         }
3690 
3691         offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
3692         maybeSwap(bo).intoMemorySegment0(ms, offset);
3693     }
3694 
3695     /**
3696      * {@inheritDoc} <!--workaround-->
3697      * @since 19
3698      */
3699     @Override
3700     @ForceInline
3701     public final
3702     void intoMemorySegment(MemorySegment ms, long offset,
3703                            ByteOrder bo,
3704                            VectorMask<Byte> m) {
3705         if (m.allTrue()) {
3706             intoMemorySegment(ms, offset, bo);
3707         } else {
3708             if (ms.isReadOnly()) {
3709                 throw new UnsupportedOperationException("Attempt to write a read-only segment");
3710             }
3711             ByteSpecies vsp = vspecies();
3712             checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
3713             maybeSwap(bo).intoMemorySegment0(ms, offset, m);
3714         }
3715     }
3716 
3717     // ================================================
3718 
3719     // Low-level memory operations.
3720     //
3721     // Note that all of these operations *must* inline into a context
3722     // where the exact species of the involved vector is a
3723     // compile-time constant.  Otherwise, the intrinsic generation
3724     // will fail and performance will suffer.
3725     //
3726     // In many cases this is achieved by re-deriving a version of the
3727     // method in each concrete subclass (per species).  The re-derived
3728     // method simply calls one of these generic methods, with exact
3729     // parameters for the controlling metadata, which is either a
3730     // typed vector or constant species instance.
3731 
3732     // Unchecked loading operations in native byte order.
3733     // Caller is responsible for applying index checks, masking, and
3734     // byte swapping.
3735 
3736     /*package-private*/
3737     abstract
3738     ByteVector fromArray0(byte[] a, int offset);
3739     @ForceInline
3740     final
3741     ByteVector fromArray0Template(byte[] a, int offset) {
3742         ByteSpecies vsp = vspecies();
3743         return VectorSupport.load(
3744             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3745             a, arrayAddress(a, offset),
3746             a, offset, vsp,
3747             (arr, off, s) -> s.ldOp(arr, (int) off,
3748                                     (arr_, off_, i) -> arr_[off_ + i]));
3749     }
3750 
3751     /*package-private*/
3752     abstract
3753     ByteVector fromArray0(byte[] a, int offset, VectorMask<Byte> m);
3754     @ForceInline
3755     final
3756     <M extends VectorMask<Byte>>
3757     ByteVector fromArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3758         m.check(species());
3759         ByteSpecies vsp = vspecies();
3760         return VectorSupport.loadMasked(
3761             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3762             a, arrayAddress(a, offset), m,
3763             a, offset, vsp,
3764             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
3765                                         (arr_, off_, i) -> arr_[off_ + i]));
3766     }
3767 
3768 
3769 
3770     /*package-private*/
3771     abstract
3772     ByteVector fromBooleanArray0(boolean[] a, int offset);
3773     @ForceInline
3774     final
3775     ByteVector fromBooleanArray0Template(boolean[] a, int offset) {
3776         ByteSpecies vsp = vspecies();
3777         return VectorSupport.load(
3778             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3779             a, booleanArrayAddress(a, offset),
3780             a, offset, vsp,
3781             (arr, off, s) -> s.ldOp(arr, (int) off,
3782                                     (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3783     }
3784 
3785     /*package-private*/
3786     abstract
3787     ByteVector fromBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m);
3788     @ForceInline
3789     final
3790     <M extends VectorMask<Byte>>
3791     ByteVector fromBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m) {
3792         m.check(species());
3793         ByteSpecies vsp = vspecies();
3794         return VectorSupport.loadMasked(
3795             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3796             a, booleanArrayAddress(a, offset), m,
3797             a, offset, vsp,
3798             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
3799                                         (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
3800     }
3801 
3802     abstract
3803     ByteVector fromMemorySegment0(MemorySegment bb, long offset);
3804     @ForceInline
3805     final
3806     ByteVector fromMemorySegment0Template(MemorySegment ms, long offset) {
3807         ByteSpecies vsp = vspecies();
3808         return ScopedMemoryAccess.loadFromMemorySegment(
3809                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3810                 (MemorySegmentProxy) ms, offset, vsp,
3811                 (msp, off, s) -> {
3812                     return s.ldLongOp((MemorySegment) msp, off, ByteVector::memorySegmentGet);
3813                 });
3814     }
3815 
3816     abstract
3817     ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m);
3818     @ForceInline
3819     final
3820     <M extends VectorMask<Byte>>
3821     ByteVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
3822         ByteSpecies vsp = vspecies();
3823         m.check(vsp);
3824         return ScopedMemoryAccess.loadFromMemorySegmentMasked(
3825                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3826                 (MemorySegmentProxy) ms, offset, m, vsp,
3827                 (msp, off, s, vm) -> {
3828                     return s.ldLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentGet);
3829                 });
3830     }
3831 
3832     // Unchecked storing operations in native byte order.
3833     // Caller is responsible for applying index checks, masking, and
3834     // byte swapping.
3835 
3836     abstract
3837     void intoArray0(byte[] a, int offset);
3838     @ForceInline
3839     final
3840     void intoArray0Template(byte[] a, int offset) {
3841         ByteSpecies vsp = vspecies();
3842         VectorSupport.store(
3843             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3844             a, arrayAddress(a, offset),
3845             this, a, offset,
3846             (arr, off, v)
3847             -> v.stOp(arr, (int) off,
3848                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3849     }
3850 
3851     abstract
3852     void intoArray0(byte[] a, int offset, VectorMask<Byte> m);
3853     @ForceInline
3854     final
3855     <M extends VectorMask<Byte>>
3856     void intoArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3857         m.check(species());
3858         ByteSpecies vsp = vspecies();
3859         VectorSupport.storeMasked(
3860             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3861             a, arrayAddress(a, offset),
3862             this, m, a, offset,
3863             (arr, off, v, vm)
3864             -> v.stOp(arr, (int) off, vm,
3865                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3866     }
3867 
3868 
3869     abstract
3870     void intoBooleanArray0(boolean[] a, int offset, VectorMask<Byte> m);
3871     @ForceInline
3872     final
3873     <M extends VectorMask<Byte>>
3874     void intoBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m) {
3875         m.check(species());
3876         ByteSpecies vsp = vspecies();
3877         ByteVector normalized = this.and((byte) 1);
3878         VectorSupport.storeMasked(
3879             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3880             a, booleanArrayAddress(a, offset),
3881             normalized, m, a, offset,
3882             (arr, off, v, vm)
3883             -> v.stOp(arr, (int) off, vm,
3884                       (arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
3885     }
3886 
3887     @ForceInline
3888     final
3889     void intoMemorySegment0(MemorySegment ms, long offset) {
3890         ByteSpecies vsp = vspecies();
3891         ScopedMemoryAccess.storeIntoMemorySegment(
3892                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3893                 this,
3894                 (MemorySegmentProxy) ms, offset,
3895                 (msp, off, v) -> {
3896                     v.stLongOp((MemorySegment) msp, off, ByteVector::memorySegmentSet);
3897                 });
3898     }
3899 
3900     abstract
3901     void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Byte> m);
3902     @ForceInline
3903     final
3904     <M extends VectorMask<Byte>>
3905     void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
3906         ByteSpecies vsp = vspecies();
3907         m.check(vsp);
3908         ScopedMemoryAccess.storeIntoMemorySegmentMasked(
3909                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3910                 this, m,
3911                 (MemorySegmentProxy) ms, offset,
3912                 (msp, off, v, vm) -> {
3913                     v.stLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentSet);
3914                 });
3915     }
3916 
3917 
3918     // End of low-level memory operations.
3919 
3920     private static
3921     void checkMaskFromIndexSize(int offset,
3922                                 ByteSpecies vsp,
3923                                 VectorMask<Byte> m,
3924                                 int scale,
3925                                 int limit) {
3926         ((AbstractMask<Byte>)m)
3927             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3928     }
3929 
3930     private static
3931     void checkMaskFromIndexSize(long offset,
3932                                 ByteSpecies vsp,
3933                                 VectorMask<Byte> m,
3934                                 int scale,
3935                                 long limit) {
3936         ((AbstractMask<Byte>)m)
3937             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3938     }
3939 
3940     @ForceInline
3941     private void conditionalStoreNYI(int offset,
3942                                      ByteSpecies vsp,
3943                                      VectorMask<Byte> m,
3944                                      int scale,
3945                                      int limit) {
3946         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3947             String msg =
3948                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3949                               offset, limit, m, vsp);
3950             throw new AssertionError(msg);
3951         }
3952     }
3953 
3954     /*package-private*/
3955     @Override
3956     @ForceInline
3957     final
3958     ByteVector maybeSwap(ByteOrder bo) {
3959         return this;
3960     }
3961 
3962     static final int ARRAY_SHIFT =
3963         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE);
3964     static final long ARRAY_BASE =
3965         Unsafe.ARRAY_BYTE_BASE_OFFSET;
3966 
3967     @ForceInline
3968     static long arrayAddress(byte[] a, int index) {
3969         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3970     }
3971 
3972 
3973     static final int ARRAY_BOOLEAN_SHIFT =
3974             31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BOOLEAN_INDEX_SCALE);
3975     static final long ARRAY_BOOLEAN_BASE =
3976             Unsafe.ARRAY_BOOLEAN_BASE_OFFSET;
3977 
3978     @ForceInline
3979     static long booleanArrayAddress(boolean[] a, int index) {
3980         return ARRAY_BOOLEAN_BASE + (((long)index) << ARRAY_BOOLEAN_SHIFT);
3981     }
3982 
3983     @ForceInline
3984     static long byteArrayAddress(byte[] a, int index) {
3985         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3986     }
3987 
3988     // ================================================
3989 
3990     /// Reinterpreting view methods:
3991     //   lanewise reinterpret: viewAsXVector()
3992     //   keep shape, redraw lanes: reinterpretAsEs()
3993 
3994     /**
3995      * {@inheritDoc} <!--workaround-->
3996      */
3997     @ForceInline
3998     @Override
3999     public final ByteVector reinterpretAsBytes() {
4000         return this;
4001     }
4002 
4003     /**
4004      * {@inheritDoc} <!--workaround-->
4005      */
4006     @ForceInline
4007     @Override
4008     public final ByteVector viewAsIntegralLanes() {
4009         return this;
4010     }
4011 
4012     /**
4013      * {@inheritDoc} <!--workaround-->
4014      *
4015      * @implNote This method always throws
4016      * {@code UnsupportedOperationException}, because there is no floating
4017      * point type of the same size as {@code byte}.  The return type
4018      * of this method is arbitrarily designated as
4019      * {@code Vector<?>}.  Future versions of this API may change the return
4020      * type if additional floating point types become available.
4021      */
4022     @ForceInline
4023     @Override
4024     public final
4025     Vector<?>
4026     viewAsFloatingLanes() {
4027         LaneType flt = LaneType.BYTE.asFloating();
4028         // asFloating() will throw UnsupportedOperationException for the unsupported type byte
4029         throw new AssertionError("Cannot reach here");
4030     }
4031 
4032     // ================================================
4033 
4034     /// Object methods: toString, equals, hashCode
4035     //
4036     // Object methods are defined as if via Arrays.toString, etc.,
4037     // is applied to the array of elements.  Two equal vectors
4038     // are required to have equal species and equal lane values.
4039 
4040     /**
4041      * Returns a string representation of this vector, of the form
4042      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
4043      * in lane order.
4044      *
4045      * The string is produced as if by a call to {@link
4046      * java.util.Arrays#toString(byte[]) Arrays.toString()},
4047      * as appropriate to the {@code byte} array returned by
4048      * {@link #toArray this.toArray()}.
4049      *
4050      * @return a string of the form {@code "[0,1,2...]"}
4051      * reporting the lane values of this vector
4052      */
4053     @Override
4054     @ForceInline
4055     public final
4056     String toString() {
4057         // now that toArray is strongly typed, we can define this
4058         return Arrays.toString(toArray());
4059     }
4060 
4061     /**
4062      * {@inheritDoc} <!--workaround-->
4063      */
4064     @Override
4065     @ForceInline
4066     public final
4067     boolean equals(Object obj) {
4068         if (obj instanceof Vector) {
4069             Vector<?> that = (Vector<?>) obj;
4070             if (this.species().equals(that.species())) {
4071                 return this.eq(that.check(this.species())).allTrue();
4072             }
4073         }
4074         return false;
4075     }
4076 
4077     /**
4078      * {@inheritDoc} <!--workaround-->
4079      */
4080     @Override
4081     @ForceInline
4082     public final
4083     int hashCode() {
4084         // now that toArray is strongly typed, we can define this
4085         return Objects.hash(species(), Arrays.hashCode(toArray()));
4086     }
4087 
4088     // ================================================
4089 
4090     // Species
4091 
4092     /**
4093      * Class representing {@link ByteVector}'s of the same {@link VectorShape VectorShape}.
4094      */
4095     /*package-private*/
4096     static final class ByteSpecies extends AbstractSpecies<Byte> {
4097         private ByteSpecies(VectorShape shape,
4098                 Class<? extends ByteVector> vectorType,
4099                 Class<? extends AbstractMask<Byte>> maskType,
4100                 Function<Object, ByteVector> vectorFactory) {
4101             super(shape, LaneType.of(byte.class),
4102                   vectorType, maskType,
4103                   vectorFactory);
4104             assert(this.elementSize() == Byte.SIZE);
4105         }
4106 
4107         // Specializing overrides:
4108 
4109         @Override
4110         @ForceInline
4111         public final Class<Byte> elementType() {
4112             return byte.class;
4113         }
4114 
4115         @Override
4116         @ForceInline
4117         final Class<Byte> genericElementType() {
4118             return Byte.class;
4119         }
4120 
4121         @SuppressWarnings("unchecked")
4122         @Override
4123         @ForceInline
4124         public final Class<? extends ByteVector> vectorType() {
4125             return (Class<? extends ByteVector>) vectorType;
4126         }
4127 
4128         @Override
4129         @ForceInline
4130         public final long checkValue(long e) {
4131             longToElementBits(e);  // only for exception
4132             return e;
4133         }
4134 
4135         /*package-private*/
4136         @Override
4137         @ForceInline
4138         final ByteVector broadcastBits(long bits) {
4139             return (ByteVector)
4140                 VectorSupport.fromBitsCoerced(
4141                     vectorType, byte.class, laneCount,
4142                     bits, MODE_BROADCAST, this,
4143                     (bits_, s_) -> s_.rvOp(i -> bits_));
4144         }
4145 
4146         /*package-private*/
4147         @ForceInline
4148         final ByteVector broadcast(byte e) {
4149             return broadcastBits(toBits(e));
4150         }
4151 
4152         @Override
4153         @ForceInline
4154         public final ByteVector broadcast(long e) {
4155             return broadcastBits(longToElementBits(e));
4156         }
4157 
4158         /*package-private*/
4159         final @Override
4160         @ForceInline
4161         long longToElementBits(long value) {
4162             // Do the conversion, and then test it for failure.
4163             byte e = (byte) value;
4164             if ((long) e != value) {
4165                 throw badElementBits(value, e);
4166             }
4167             return toBits(e);
4168         }
4169 
4170         /*package-private*/
4171         @ForceInline
4172         static long toIntegralChecked(byte e, boolean convertToInt) {
4173             long value = convertToInt ? (int) e : (long) e;
4174             if ((byte) value != e) {
4175                 throw badArrayBits(e, convertToInt, value);
4176             }
4177             return value;
4178         }
4179 
4180         /* this non-public one is for internal conversions */
4181         @Override
4182         @ForceInline
4183         final ByteVector fromIntValues(int[] values) {
4184             VectorIntrinsics.requireLength(values.length, laneCount);
4185             byte[] va = new byte[laneCount()];
4186             for (int i = 0; i < va.length; i++) {
4187                 int lv = values[i];
4188                 byte v = (byte) lv;
4189                 va[i] = v;
4190                 if ((int)v != lv) {
4191                     throw badElementBits(lv, v);
4192                 }
4193             }
4194             return dummyVector().fromArray0(va, 0);
4195         }
4196 
4197         // Virtual constructors
4198 
4199         @ForceInline
4200         @Override final
4201         public ByteVector fromArray(Object a, int offset) {
4202             // User entry point:  Be careful with inputs.
4203             return ByteVector
4204                 .fromArray(this, (byte[]) a, offset);
4205         }
4206 
4207         @ForceInline
4208         @Override final
4209         ByteVector dummyVector() {
4210             return (ByteVector) super.dummyVector();
4211         }
4212 
4213         /*package-private*/
4214         final @Override
4215         @ForceInline
4216         ByteVector rvOp(RVOp f) {
4217             byte[] res = new byte[laneCount()];
4218             for (int i = 0; i < res.length; i++) {
4219                 byte bits = (byte) f.apply(i);
4220                 res[i] = fromBits(bits);
4221             }
4222             return dummyVector().vectorFactory(res);
4223         }
4224 
4225         ByteVector vOp(FVOp f) {
4226             byte[] res = new byte[laneCount()];
4227             for (int i = 0; i < res.length; i++) {
4228                 res[i] = f.apply(i);
4229             }
4230             return dummyVector().vectorFactory(res);
4231         }
4232 
4233         ByteVector vOp(VectorMask<Byte> m, FVOp f) {
4234             byte[] res = new byte[laneCount()];
4235             boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
4236             for (int i = 0; i < res.length; i++) {
4237                 if (mbits[i]) {
4238                     res[i] = f.apply(i);
4239                 }
4240             }
4241             return dummyVector().vectorFactory(res);
4242         }
4243 
4244         /*package-private*/
4245         @ForceInline
4246         <M> ByteVector ldOp(M memory, int offset,
4247                                       FLdOp<M> f) {
4248             return dummyVector().ldOp(memory, offset, f);
4249         }
4250 
4251         /*package-private*/
4252         @ForceInline
4253         <M> ByteVector ldOp(M memory, int offset,
4254                                       VectorMask<Byte> m,
4255                                       FLdOp<M> f) {
4256             return dummyVector().ldOp(memory, offset, m, f);
4257         }
4258 
4259         /*package-private*/
4260         @ForceInline
4261         ByteVector ldLongOp(MemorySegment memory, long offset,
4262                                       FLdLongOp f) {
4263             return dummyVector().ldLongOp(memory, offset, f);
4264         }
4265 
4266         /*package-private*/
4267         @ForceInline
4268         ByteVector ldLongOp(MemorySegment memory, long offset,
4269                                       VectorMask<Byte> m,
4270                                       FLdLongOp f) {
4271             return dummyVector().ldLongOp(memory, offset, m, f);
4272         }
4273 
4274         /*package-private*/
4275         @ForceInline
4276         <M> void stOp(M memory, int offset, FStOp<M> f) {
4277             dummyVector().stOp(memory, offset, f);
4278         }
4279 
4280         /*package-private*/
4281         @ForceInline
4282         <M> void stOp(M memory, int offset,
4283                       AbstractMask<Byte> m,
4284                       FStOp<M> f) {
4285             dummyVector().stOp(memory, offset, m, f);
4286         }
4287 
4288         /*package-private*/
4289         @ForceInline
4290         void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
4291             dummyVector().stLongOp(memory, offset, f);
4292         }
4293 
4294         /*package-private*/
4295         @ForceInline
4296         void stLongOp(MemorySegment memory, long offset,
4297                       AbstractMask<Byte> m,
4298                       FStLongOp f) {
4299             dummyVector().stLongOp(memory, offset, m, f);
4300         }
4301 
4302         // N.B. Make sure these constant vectors and
4303         // masks load up correctly into registers.
4304         //
4305         // Also, see if we can avoid all that switching.
4306         // Could we cache both vectors and both masks in
4307         // this species object?
4308 
4309         // Zero and iota vector access
4310         @Override
4311         @ForceInline
4312         public final ByteVector zero() {
4313             if ((Class<?>) vectorType() == ByteMaxVector.class)
4314                 return ByteMaxVector.ZERO;
4315             switch (vectorBitSize()) {
4316                 case 64: return Byte64Vector.ZERO;
4317                 case 128: return Byte128Vector.ZERO;
4318                 case 256: return Byte256Vector.ZERO;
4319                 case 512: return Byte512Vector.ZERO;
4320             }
4321             throw new AssertionError();
4322         }
4323 
4324         @Override
4325         @ForceInline
4326         public final ByteVector iota() {
4327             if ((Class<?>) vectorType() == ByteMaxVector.class)
4328                 return ByteMaxVector.IOTA;
4329             switch (vectorBitSize()) {
4330                 case 64: return Byte64Vector.IOTA;
4331                 case 128: return Byte128Vector.IOTA;
4332                 case 256: return Byte256Vector.IOTA;
4333                 case 512: return Byte512Vector.IOTA;
4334             }
4335             throw new AssertionError();
4336         }
4337 
4338         // Mask access
4339         @Override
4340         @ForceInline
4341         public final VectorMask<Byte> maskAll(boolean bit) {
4342             if ((Class<?>) vectorType() == ByteMaxVector.class)
4343                 return ByteMaxVector.ByteMaxMask.maskAll(bit);
4344             switch (vectorBitSize()) {
4345                 case 64: return Byte64Vector.Byte64Mask.maskAll(bit);
4346                 case 128: return Byte128Vector.Byte128Mask.maskAll(bit);
4347                 case 256: return Byte256Vector.Byte256Mask.maskAll(bit);
4348                 case 512: return Byte512Vector.Byte512Mask.maskAll(bit);
4349             }
4350             throw new AssertionError();
4351         }
4352     }
4353 
4354     /**
4355      * Finds a species for an element type of {@code byte} and shape.
4356      *
4357      * @param s the shape
4358      * @return a species for an element type of {@code byte} and shape
4359      * @throws IllegalArgumentException if no such species exists for the shape
4360      */
4361     static ByteSpecies species(VectorShape s) {
4362         Objects.requireNonNull(s);
4363         switch (s.switchKey) {
4364             case VectorShape.SK_64_BIT: return (ByteSpecies) SPECIES_64;
4365             case VectorShape.SK_128_BIT: return (ByteSpecies) SPECIES_128;
4366             case VectorShape.SK_256_BIT: return (ByteSpecies) SPECIES_256;
4367             case VectorShape.SK_512_BIT: return (ByteSpecies) SPECIES_512;
4368             case VectorShape.SK_Max_BIT: return (ByteSpecies) SPECIES_MAX;
4369             default: throw new IllegalArgumentException("Bad shape: " + s);
4370         }
4371     }
4372 
4373     /** Species representing {@link ByteVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
4374     public static final VectorSpecies<Byte> SPECIES_64
4375         = new ByteSpecies(VectorShape.S_64_BIT,
4376                             Byte64Vector.class,
4377                             Byte64Vector.Byte64Mask.class,
4378                             Byte64Vector::new);
4379 
4380     /** Species representing {@link ByteVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
4381     public static final VectorSpecies<Byte> SPECIES_128
4382         = new ByteSpecies(VectorShape.S_128_BIT,
4383                             Byte128Vector.class,
4384                             Byte128Vector.Byte128Mask.class,
4385                             Byte128Vector::new);
4386 
4387     /** Species representing {@link ByteVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
4388     public static final VectorSpecies<Byte> SPECIES_256
4389         = new ByteSpecies(VectorShape.S_256_BIT,
4390                             Byte256Vector.class,
4391                             Byte256Vector.Byte256Mask.class,
4392                             Byte256Vector::new);
4393 
4394     /** Species representing {@link ByteVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
4395     public static final VectorSpecies<Byte> SPECIES_512
4396         = new ByteSpecies(VectorShape.S_512_BIT,
4397                             Byte512Vector.class,
4398                             Byte512Vector.Byte512Mask.class,
4399                             Byte512Vector::new);
4400 
4401     /** Species representing {@link ByteVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
4402     public static final VectorSpecies<Byte> SPECIES_MAX
4403         = new ByteSpecies(VectorShape.S_Max_BIT,
4404                             ByteMaxVector.class,
4405                             ByteMaxVector.ByteMaxMask.class,
4406                             ByteMaxVector::new);
4407 
4408     /**
4409      * Preferred species for {@link ByteVector}s.
4410      * A preferred species is a species of maximal bit-size for the platform.
4411      */
4412     public static final VectorSpecies<Byte> SPECIES_PREFERRED
4413         = (ByteSpecies) VectorSpecies.ofPreferred(byte.class);
4414 }