1 /*
   2  * Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteOrder;
  28 import java.util.Arrays;
  29 import java.util.Objects;
  30 import java.util.function.Function;
  31 
  32 import jdk.incubator.foreign.MemorySegment;
  33 import jdk.incubator.foreign.ValueLayout;
  34 import jdk.internal.access.foreign.MemorySegmentProxy;
  35 import jdk.internal.misc.ScopedMemoryAccess;
  36 import jdk.internal.misc.Unsafe;
  37 import jdk.internal.vm.annotation.ForceInline;
  38 import jdk.internal.vm.vector.VectorSupport;
  39 
  40 import static jdk.internal.vm.vector.VectorSupport.*;
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 
  43 import static jdk.incubator.vector.VectorOperators.*;
  44 
  45 // -- This file was mechanically generated: Do not edit! -- //
  46 
  47 /**
  48  * A specialized {@link Vector} representing an ordered immutable sequence of
  49  * {@code short} values.
  50  */
  51 @SuppressWarnings("cast")  // warning: redundant cast
  52 public abstract class ShortVector extends AbstractVector<Short> {
  53 
  54     ShortVector(short[] vec) {
  55         super(vec);
  56     }
  57 
  58     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  59 
  60     static final ValueLayout.OfShort ELEMENT_LAYOUT = ValueLayout.JAVA_SHORT.withBitAlignment(8);
  61 
  62     @ForceInline
  63     static int opCode(Operator op) {
  64         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  65     }
  66     @ForceInline
  67     static int opCode(Operator op, int requireKind) {
  68         requireKind |= VO_OPCODE_VALID;
  69         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  70     }
  71     @ForceInline
  72     static boolean opKind(Operator op, int bit) {
  73         return VectorOperators.opKind(op, bit);
  74     }
  75 
  76     // Virtualized factories and operators,
  77     // coded with portable definitions.
  78     // These are all @ForceInline in case
  79     // they need to be used performantly.
  80     // The various shape-specific subclasses
  81     // also specialize them by wrapping
  82     // them in a call like this:
  83     //    return (Byte128Vector)
  84     //       super.bOp((Byte128Vector) o);
  85     // The purpose of that is to forcibly inline
  86     // the generic definition from this file
  87     // into a sharply type- and size-specific
  88     // wrapper in the subclass file, so that
  89     // the JIT can specialize the code.
  90     // The code is only inlined and expanded
  91     // if it gets hot.  Think of it as a cheap
  92     // and lazy version of C++ templates.
  93 
  94     // Virtualized getter
  95 
  96     /*package-private*/
  97     abstract short[] vec();
  98 
  99     // Virtualized constructors
 100 
 101     /**
 102      * Build a vector directly using my own constructor.
 103      * It is an error if the array is aliased elsewhere.
 104      */
 105     /*package-private*/
 106     abstract ShortVector vectorFactory(short[] vec);
 107 
 108     /**
 109      * Build a mask directly using my species.
 110      * It is an error if the array is aliased elsewhere.
 111      */
 112     /*package-private*/
 113     @ForceInline
 114     final
 115     AbstractMask<Short> maskFactory(boolean[] bits) {
 116         return vspecies().maskFactory(bits);
 117     }
 118 
 119     // Constant loader (takes dummy as vector arg)
 120     interface FVOp {
 121         short apply(int i);
 122     }
 123 
 124     /*package-private*/
 125     @ForceInline
 126     final
 127     ShortVector vOp(FVOp f) {
 128         short[] res = new short[length()];
 129         for (int i = 0; i < res.length; i++) {
 130             res[i] = f.apply(i);
 131         }
 132         return vectorFactory(res);
 133     }
 134 
 135     @ForceInline
 136     final
 137     ShortVector vOp(VectorMask<Short> m, FVOp f) {
 138         short[] res = new short[length()];
 139         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 140         for (int i = 0; i < res.length; i++) {
 141             if (mbits[i]) {
 142                 res[i] = f.apply(i);
 143             }
 144         }
 145         return vectorFactory(res);
 146     }
 147 
 148     // Unary operator
 149 
 150     /*package-private*/
 151     interface FUnOp {
 152         short apply(int i, short a);
 153     }
 154 
 155     /*package-private*/
 156     abstract
 157     ShortVector uOp(FUnOp f);
 158     @ForceInline
 159     final
 160     ShortVector uOpTemplate(FUnOp f) {
 161         short[] vec = vec();
 162         short[] res = new short[length()];
 163         for (int i = 0; i < res.length; i++) {
 164             res[i] = f.apply(i, vec[i]);
 165         }
 166         return vectorFactory(res);
 167     }
 168 
 169     /*package-private*/
 170     abstract
 171     ShortVector uOp(VectorMask<Short> m,
 172                              FUnOp f);
 173     @ForceInline
 174     final
 175     ShortVector uOpTemplate(VectorMask<Short> m,
 176                                      FUnOp f) {
 177         if (m == null) {
 178             return uOpTemplate(f);
 179         }
 180         short[] vec = vec();
 181         short[] res = new short[length()];
 182         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 183         for (int i = 0; i < res.length; i++) {
 184             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 185         }
 186         return vectorFactory(res);
 187     }
 188 
 189     // Binary operator
 190 
 191     /*package-private*/
 192     interface FBinOp {
 193         short apply(int i, short a, short b);
 194     }
 195 
 196     /*package-private*/
 197     abstract
 198     ShortVector bOp(Vector<Short> o,
 199                              FBinOp f);
 200     @ForceInline
 201     final
 202     ShortVector bOpTemplate(Vector<Short> o,
 203                                      FBinOp f) {
 204         short[] res = new short[length()];
 205         short[] vec1 = this.vec();
 206         short[] vec2 = ((ShortVector)o).vec();
 207         for (int i = 0; i < res.length; i++) {
 208             res[i] = f.apply(i, vec1[i], vec2[i]);
 209         }
 210         return vectorFactory(res);
 211     }
 212 
 213     /*package-private*/
 214     abstract
 215     ShortVector bOp(Vector<Short> o,
 216                              VectorMask<Short> m,
 217                              FBinOp f);
 218     @ForceInline
 219     final
 220     ShortVector bOpTemplate(Vector<Short> o,
 221                                      VectorMask<Short> m,
 222                                      FBinOp f) {
 223         if (m == null) {
 224             return bOpTemplate(o, f);
 225         }
 226         short[] res = new short[length()];
 227         short[] vec1 = this.vec();
 228         short[] vec2 = ((ShortVector)o).vec();
 229         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 230         for (int i = 0; i < res.length; i++) {
 231             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 232         }
 233         return vectorFactory(res);
 234     }
 235 
 236     // Ternary operator
 237 
 238     /*package-private*/
 239     interface FTriOp {
 240         short apply(int i, short a, short b, short c);
 241     }
 242 
 243     /*package-private*/
 244     abstract
 245     ShortVector tOp(Vector<Short> o1,
 246                              Vector<Short> o2,
 247                              FTriOp f);
 248     @ForceInline
 249     final
 250     ShortVector tOpTemplate(Vector<Short> o1,
 251                                      Vector<Short> o2,
 252                                      FTriOp f) {
 253         short[] res = new short[length()];
 254         short[] vec1 = this.vec();
 255         short[] vec2 = ((ShortVector)o1).vec();
 256         short[] vec3 = ((ShortVector)o2).vec();
 257         for (int i = 0; i < res.length; i++) {
 258             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 259         }
 260         return vectorFactory(res);
 261     }
 262 
 263     /*package-private*/
 264     abstract
 265     ShortVector tOp(Vector<Short> o1,
 266                              Vector<Short> o2,
 267                              VectorMask<Short> m,
 268                              FTriOp f);
 269     @ForceInline
 270     final
 271     ShortVector tOpTemplate(Vector<Short> o1,
 272                                      Vector<Short> o2,
 273                                      VectorMask<Short> m,
 274                                      FTriOp f) {
 275         if (m == null) {
 276             return tOpTemplate(o1, o2, f);
 277         }
 278         short[] res = new short[length()];
 279         short[] vec1 = this.vec();
 280         short[] vec2 = ((ShortVector)o1).vec();
 281         short[] vec3 = ((ShortVector)o2).vec();
 282         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 283         for (int i = 0; i < res.length; i++) {
 284             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 285         }
 286         return vectorFactory(res);
 287     }
 288 
 289     // Reduction operator
 290 
 291     /*package-private*/
 292     abstract
 293     short rOp(short v, VectorMask<Short> m, FBinOp f);
 294 
 295     @ForceInline
 296     final
 297     short rOpTemplate(short v, VectorMask<Short> m, FBinOp f) {
 298         if (m == null) {
 299             return rOpTemplate(v, f);
 300         }
 301         short[] vec = vec();
 302         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 303         for (int i = 0; i < vec.length; i++) {
 304             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
 305         }
 306         return v;
 307     }
 308 
 309     @ForceInline
 310     final
 311     short rOpTemplate(short v, FBinOp f) {
 312         short[] vec = vec();
 313         for (int i = 0; i < vec.length; i++) {
 314             v = f.apply(i, v, vec[i]);
 315         }
 316         return v;
 317     }
 318 
 319     // Memory reference
 320 
 321     /*package-private*/
 322     interface FLdOp<M> {
 323         short apply(M memory, int offset, int i);
 324     }
 325 
 326     /*package-private*/
 327     @ForceInline
 328     final
 329     <M> ShortVector ldOp(M memory, int offset,
 330                                   FLdOp<M> f) {
 331         //dummy; no vec = vec();
 332         short[] res = new short[length()];
 333         for (int i = 0; i < res.length; i++) {
 334             res[i] = f.apply(memory, offset, i);
 335         }
 336         return vectorFactory(res);
 337     }
 338 
 339     /*package-private*/
 340     @ForceInline
 341     final
 342     <M> ShortVector ldOp(M memory, int offset,
 343                                   VectorMask<Short> m,
 344                                   FLdOp<M> f) {
 345         //short[] vec = vec();
 346         short[] res = new short[length()];
 347         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 348         for (int i = 0; i < res.length; i++) {
 349             if (mbits[i]) {
 350                 res[i] = f.apply(memory, offset, i);
 351             }
 352         }
 353         return vectorFactory(res);
 354     }
 355 
 356     /*package-private*/
 357     interface FLdLongOp {
 358         short apply(MemorySegment memory, long offset, int i);
 359     }
 360 
 361     /*package-private*/
 362     @ForceInline
 363     final
 364     ShortVector ldLongOp(MemorySegment memory, long offset,
 365                                   FLdLongOp f) {
 366         //dummy; no vec = vec();
 367         short[] res = new short[length()];
 368         for (int i = 0; i < res.length; i++) {
 369             res[i] = f.apply(memory, offset, i);
 370         }
 371         return vectorFactory(res);
 372     }
 373 
 374     /*package-private*/
 375     @ForceInline
 376     final
 377     ShortVector ldLongOp(MemorySegment memory, long offset,
 378                                   VectorMask<Short> m,
 379                                   FLdLongOp f) {
 380         //short[] vec = vec();
 381         short[] res = new short[length()];
 382         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 383         for (int i = 0; i < res.length; i++) {
 384             if (mbits[i]) {
 385                 res[i] = f.apply(memory, offset, i);
 386             }
 387         }
 388         return vectorFactory(res);
 389     }
 390 
 391     static short memorySegmentGet(MemorySegment ms, long o, int i) {
 392         return ms.get(ELEMENT_LAYOUT, o + i * 2L);
 393     }
 394 
 395     interface FStOp<M> {
 396         void apply(M memory, int offset, int i, short a);
 397     }
 398 
 399     /*package-private*/
 400     @ForceInline
 401     final
 402     <M> void stOp(M memory, int offset,
 403                   FStOp<M> f) {
 404         short[] vec = vec();
 405         for (int i = 0; i < vec.length; i++) {
 406             f.apply(memory, offset, i, vec[i]);
 407         }
 408     }
 409 
 410     /*package-private*/
 411     @ForceInline
 412     final
 413     <M> void stOp(M memory, int offset,
 414                   VectorMask<Short> m,
 415                   FStOp<M> f) {
 416         short[] vec = vec();
 417         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 418         for (int i = 0; i < vec.length; i++) {
 419             if (mbits[i]) {
 420                 f.apply(memory, offset, i, vec[i]);
 421             }
 422         }
 423     }
 424 
 425     interface FStLongOp {
 426         void apply(MemorySegment memory, long offset, int i, short a);
 427     }
 428 
 429     /*package-private*/
 430     @ForceInline
 431     final
 432     void stLongOp(MemorySegment memory, long offset,
 433                   FStLongOp f) {
 434         short[] vec = vec();
 435         for (int i = 0; i < vec.length; i++) {
 436             f.apply(memory, offset, i, vec[i]);
 437         }
 438     }
 439 
 440     /*package-private*/
 441     @ForceInline
 442     final
 443     void stLongOp(MemorySegment memory, long offset,
 444                   VectorMask<Short> m,
 445                   FStLongOp f) {
 446         short[] vec = vec();
 447         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 448         for (int i = 0; i < vec.length; i++) {
 449             if (mbits[i]) {
 450                 f.apply(memory, offset, i, vec[i]);
 451             }
 452         }
 453     }
 454 
 455     static void memorySegmentSet(MemorySegment ms, long o, int i, short e) {
 456         ms.set(ELEMENT_LAYOUT, o + i * 2L, e);
 457     }
 458 
 459     // Binary test
 460 
 461     /*package-private*/
 462     interface FBinTest {
 463         boolean apply(int cond, int i, short a, short b);
 464     }
 465 
 466     /*package-private*/
 467     @ForceInline
 468     final
 469     AbstractMask<Short> bTest(int cond,
 470                                   Vector<Short> o,
 471                                   FBinTest f) {
 472         short[] vec1 = vec();
 473         short[] vec2 = ((ShortVector)o).vec();
 474         boolean[] bits = new boolean[length()];
 475         for (int i = 0; i < length(); i++){
 476             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 477         }
 478         return maskFactory(bits);
 479     }
 480 
 481     /*package-private*/
 482     @ForceInline
 483     static short rotateLeft(short a, int n) {
 484         return (short)(((((short)a) & Short.toUnsignedInt((short)-1)) << (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) >>> (Short.SIZE - (n & Short.SIZE-1))));
 485     }
 486 
 487     /*package-private*/
 488     @ForceInline
 489     static short rotateRight(short a, int n) {
 490         return (short)(((((short)a) & Short.toUnsignedInt((short)-1)) >>> (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) << (Short.SIZE - (n & Short.SIZE-1))));
 491     }
 492 
 493     /*package-private*/
 494     @Override
 495     abstract ShortSpecies vspecies();
 496 
 497     /*package-private*/
 498     @ForceInline
 499     static long toBits(short e) {
 500         return  e;
 501     }
 502 
 503     /*package-private*/
 504     @ForceInline
 505     static short fromBits(long bits) {
 506         return ((short)bits);
 507     }
 508 
 509     static ShortVector expandHelper(Vector<Short> v, VectorMask<Short> m) {
 510         VectorSpecies<Short> vsp = m.vectorSpecies();
 511         ShortVector r  = (ShortVector) vsp.zero();
 512         ShortVector vi = (ShortVector) v;
 513         if (m.allTrue()) {
 514             return vi;
 515         }
 516         for (int i = 0, j = 0; i < vsp.length(); i++) {
 517             if (m.laneIsSet(i)) {
 518                 r = r.withLane(i, vi.lane(j++));
 519             }
 520         }
 521         return r;
 522     }
 523 
 524     static ShortVector compressHelper(Vector<Short> v, VectorMask<Short> m) {
 525         VectorSpecies<Short> vsp = m.vectorSpecies();
 526         ShortVector r  = (ShortVector) vsp.zero();
 527         ShortVector vi = (ShortVector) v;
 528         if (m.allTrue()) {
 529             return vi;
 530         }
 531         for (int i = 0, j = 0; i < vsp.length(); i++) {
 532             if (m.laneIsSet(i)) {
 533                 r = r.withLane(j++, vi.lane(i));
 534             }
 535         }
 536         return r;
 537     }
 538 
 539     // Static factories (other than memory operations)
 540 
 541     // Note: A surprising behavior in javadoc
 542     // sometimes makes a lone /** {@inheritDoc} */
 543     // comment drop the method altogether,
 544     // apparently if the method mentions an
 545     // parameter or return type of Vector<Short>
 546     // instead of Vector<E> as originally specified.
 547     // Adding an empty HTML fragment appears to
 548     // nudge javadoc into providing the desired
 549     // inherited documentation.  We use the HTML
 550     // comment <!--workaround--> for this.
 551 
 552     /**
 553      * Returns a vector of the given species
 554      * where all lane elements are set to
 555      * zero, the default primitive value.
 556      *
 557      * @param species species of the desired zero vector
 558      * @return a zero vector
 559      */
 560     @ForceInline
 561     public static ShortVector zero(VectorSpecies<Short> species) {
 562         ShortSpecies vsp = (ShortSpecies) species;
 563         return VectorSupport.fromBitsCoerced(vsp.vectorType(), short.class, species.length(),
 564                                 0, MODE_BROADCAST, vsp,
 565                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 566     }
 567 
 568     /**
 569      * Returns a vector of the same species as this one
 570      * where all lane elements are set to
 571      * the primitive value {@code e}.
 572      *
 573      * The contents of the current vector are discarded;
 574      * only the species is relevant to this operation.
 575      *
 576      * <p> This method returns the value of this expression:
 577      * {@code ShortVector.broadcast(this.species(), e)}.
 578      *
 579      * @apiNote
 580      * Unlike the similar method named {@code broadcast()}
 581      * in the supertype {@code Vector}, this method does not
 582      * need to validate its argument, and cannot throw
 583      * {@code IllegalArgumentException}.  This method is
 584      * therefore preferable to the supertype method.
 585      *
 586      * @param e the value to broadcast
 587      * @return a vector where all lane elements are set to
 588      *         the primitive value {@code e}
 589      * @see #broadcast(VectorSpecies,long)
 590      * @see Vector#broadcast(long)
 591      * @see VectorSpecies#broadcast(long)
 592      */
 593     public abstract ShortVector broadcast(short e);
 594 
 595     /**
 596      * Returns a vector of the given species
 597      * where all lane elements are set to
 598      * the primitive value {@code e}.
 599      *
 600      * @param species species of the desired vector
 601      * @param e the value to broadcast
 602      * @return a vector where all lane elements are set to
 603      *         the primitive value {@code e}
 604      * @see #broadcast(long)
 605      * @see Vector#broadcast(long)
 606      * @see VectorSpecies#broadcast(long)
 607      */
 608     @ForceInline
 609     public static ShortVector broadcast(VectorSpecies<Short> species, short e) {
 610         ShortSpecies vsp = (ShortSpecies) species;
 611         return vsp.broadcast(e);
 612     }
 613 
 614     /*package-private*/
 615     @ForceInline
 616     final ShortVector broadcastTemplate(short e) {
 617         ShortSpecies vsp = vspecies();
 618         return vsp.broadcast(e);
 619     }
 620 
 621     /**
 622      * {@inheritDoc} <!--workaround-->
 623      * @apiNote
 624      * When working with vector subtypes like {@code ShortVector},
 625      * {@linkplain #broadcast(short) the more strongly typed method}
 626      * is typically selected.  It can be explicitly selected
 627      * using a cast: {@code v.broadcast((short)e)}.
 628      * The two expressions will produce numerically identical results.
 629      */
 630     @Override
 631     public abstract ShortVector broadcast(long e);
 632 
 633     /**
 634      * Returns a vector of the given species
 635      * where all lane elements are set to
 636      * the primitive value {@code e}.
 637      *
 638      * The {@code long} value must be accurately representable
 639      * by the {@code ETYPE} of the vector species, so that
 640      * {@code e==(long)(ETYPE)e}.
 641      *
 642      * @param species species of the desired vector
 643      * @param e the value to broadcast
 644      * @return a vector where all lane elements are set to
 645      *         the primitive value {@code e}
 646      * @throws IllegalArgumentException
 647      *         if the given {@code long} value cannot
 648      *         be represented by the vector's {@code ETYPE}
 649      * @see #broadcast(VectorSpecies,short)
 650      * @see VectorSpecies#checkValue(long)
 651      */
 652     @ForceInline
 653     public static ShortVector broadcast(VectorSpecies<Short> species, long e) {
 654         ShortSpecies vsp = (ShortSpecies) species;
 655         return vsp.broadcast(e);
 656     }
 657 
 658     /*package-private*/
 659     @ForceInline
 660     final ShortVector broadcastTemplate(long e) {
 661         return vspecies().broadcast(e);
 662     }
 663 
 664     // Unary lanewise support
 665 
 666     /**
 667      * {@inheritDoc} <!--workaround-->
 668      */
 669     public abstract
 670     ShortVector lanewise(VectorOperators.Unary op);
 671 
 672     @ForceInline
 673     final
 674     ShortVector lanewiseTemplate(VectorOperators.Unary op) {
 675         if (opKind(op, VO_SPECIAL)) {
 676             if (op == ZOMO) {
 677                 return blend(broadcast(-1), compare(NE, 0));
 678             }
 679             if (op == NOT) {
 680                 return broadcast(-1).lanewise(XOR, this);
 681             }
 682         }
 683         int opc = opCode(op);
 684         return VectorSupport.unaryOp(
 685             opc, getClass(), null, short.class, length(),
 686             this, null,
 687             UN_IMPL.find(op, opc, ShortVector::unaryOperations));
 688     }
 689 
 690     /**
 691      * {@inheritDoc} <!--workaround-->
 692      */
 693     @Override
 694     public abstract
 695     ShortVector lanewise(VectorOperators.Unary op,
 696                                   VectorMask<Short> m);
 697     @ForceInline
 698     final
 699     ShortVector lanewiseTemplate(VectorOperators.Unary op,
 700                                           Class<? extends VectorMask<Short>> maskClass,
 701                                           VectorMask<Short> m) {
 702         m.check(maskClass, this);
 703         if (opKind(op, VO_SPECIAL)) {
 704             if (op == ZOMO) {
 705                 return blend(broadcast(-1), compare(NE, 0, m));
 706             }
 707             if (op == NOT) {
 708                 return lanewise(XOR, broadcast(-1), m);
 709             }
 710         }
 711         int opc = opCode(op);
 712         return VectorSupport.unaryOp(
 713             opc, getClass(), maskClass, short.class, length(),
 714             this, m,
 715             UN_IMPL.find(op, opc, ShortVector::unaryOperations));
 716     }
 717 
 718     private static final
 719     ImplCache<Unary, UnaryOperation<ShortVector, VectorMask<Short>>>
 720         UN_IMPL = new ImplCache<>(Unary.class, ShortVector.class);
 721 
 722     private static UnaryOperation<ShortVector, VectorMask<Short>> unaryOperations(int opc_) {
 723         switch (opc_) {
 724             case VECTOR_OP_NEG: return (v0, m) ->
 725                     v0.uOp(m, (i, a) -> (short) -a);
 726             case VECTOR_OP_ABS: return (v0, m) ->
 727                     v0.uOp(m, (i, a) -> (short) Math.abs(a));
 728             case VECTOR_OP_BIT_COUNT: return (v0, m) ->
 729                     v0.uOp(m, (i, a) -> (short) bitCount(a));
 730             case VECTOR_OP_TZ_COUNT: return (v0, m) ->
 731                     v0.uOp(m, (i, a) -> (short) numberOfTrailingZeros(a));
 732             case VECTOR_OP_LZ_COUNT: return (v0, m) ->
 733                     v0.uOp(m, (i, a) -> (short) numberOfLeadingZeros(a));
 734             case VECTOR_OP_REVERSE: return (v0, m) ->
 735                     v0.uOp(m, (i, a) -> reverse(a));
 736             case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
 737                     v0.uOp(m, (i, a) -> (short) Short.reverseBytes(a));
 738             default: return null;
 739         }
 740     }
 741 
 742     // Binary lanewise support
 743 
 744     /**
 745      * {@inheritDoc} <!--workaround-->
 746      * @see #lanewise(VectorOperators.Binary,short)
 747      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 748      */
 749     @Override
 750     public abstract
 751     ShortVector lanewise(VectorOperators.Binary op,
 752                                   Vector<Short> v);
 753     @ForceInline
 754     final
 755     ShortVector lanewiseTemplate(VectorOperators.Binary op,
 756                                           Vector<Short> v) {
 757         ShortVector that = (ShortVector) v;
 758         that.check(this);
 759 
 760         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 761             if (op == FIRST_NONZERO) {
 762                 // FIXME: Support this in the JIT.
 763                 VectorMask<Short> thisNZ
 764                     = this.viewAsIntegralLanes().compare(NE, (short) 0);
 765                 that = that.blend((short) 0, thisNZ.cast(vspecies()));
 766                 op = OR_UNCHECKED;
 767             }
 768             if (opKind(op, VO_SHIFT)) {
 769                 // As per shift specification for Java, mask the shift count.
 770                 // This allows the JIT to ignore some ISA details.
 771                 that = that.lanewise(AND, SHIFT_MASK);
 772             }
 773             if (op == AND_NOT) {
 774                 // FIXME: Support this in the JIT.
 775                 that = that.lanewise(NOT);
 776                 op = AND;
 777             } else if (op == DIV) {
 778                 VectorMask<Short> eqz = that.eq((short) 0);
 779                 if (eqz.anyTrue()) {
 780                     throw that.divZeroException();
 781                 }
 782             }
 783         }
 784 
 785         int opc = opCode(op);
 786         return VectorSupport.binaryOp(
 787             opc, getClass(), null, short.class, length(),
 788             this, that, null,
 789             BIN_IMPL.find(op, opc, ShortVector::binaryOperations));
 790     }
 791 
 792     /**
 793      * {@inheritDoc} <!--workaround-->
 794      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 795      */
 796     @Override
 797     public abstract
 798     ShortVector lanewise(VectorOperators.Binary op,
 799                                   Vector<Short> v,
 800                                   VectorMask<Short> m);
 801     @ForceInline
 802     final
 803     ShortVector lanewiseTemplate(VectorOperators.Binary op,
 804                                           Class<? extends VectorMask<Short>> maskClass,
 805                                           Vector<Short> v, VectorMask<Short> m) {
 806         ShortVector that = (ShortVector) v;
 807         that.check(this);
 808         m.check(maskClass, this);
 809 
 810         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 811             if (op == FIRST_NONZERO) {
 812                 // FIXME: Support this in the JIT.
 813                 VectorMask<Short> thisNZ
 814                     = this.viewAsIntegralLanes().compare(NE, (short) 0);
 815                 that = that.blend((short) 0, thisNZ.cast(vspecies()));
 816                 op = OR_UNCHECKED;
 817             }
 818             if (opKind(op, VO_SHIFT)) {
 819                 // As per shift specification for Java, mask the shift count.
 820                 // This allows the JIT to ignore some ISA details.
 821                 that = that.lanewise(AND, SHIFT_MASK);
 822             }
 823             if (op == AND_NOT) {
 824                 // FIXME: Support this in the JIT.
 825                 that = that.lanewise(NOT);
 826                 op = AND;
 827             } else if (op == DIV) {
 828                 VectorMask<Short> eqz = that.eq((short)0);
 829                 if (eqz.and(m).anyTrue()) {
 830                     throw that.divZeroException();
 831                 }
 832                 // suppress div/0 exceptions in unset lanes
 833                 that = that.lanewise(NOT, eqz);
 834             }
 835         }
 836 
 837         int opc = opCode(op);
 838         return VectorSupport.binaryOp(
 839             opc, getClass(), maskClass, short.class, length(),
 840             this, that, m,
 841             BIN_IMPL.find(op, opc, ShortVector::binaryOperations));
 842     }
 843 
 844     private static final
 845     ImplCache<Binary, BinaryOperation<ShortVector, VectorMask<Short>>>
 846         BIN_IMPL = new ImplCache<>(Binary.class, ShortVector.class);
 847 
 848     private static BinaryOperation<ShortVector, VectorMask<Short>> binaryOperations(int opc_) {
 849         switch (opc_) {
 850             case VECTOR_OP_ADD: return (v0, v1, vm) ->
 851                     v0.bOp(v1, vm, (i, a, b) -> (short)(a + b));
 852             case VECTOR_OP_SUB: return (v0, v1, vm) ->
 853                     v0.bOp(v1, vm, (i, a, b) -> (short)(a - b));
 854             case VECTOR_OP_MUL: return (v0, v1, vm) ->
 855                     v0.bOp(v1, vm, (i, a, b) -> (short)(a * b));
 856             case VECTOR_OP_DIV: return (v0, v1, vm) ->
 857                     v0.bOp(v1, vm, (i, a, b) -> (short)(a / b));
 858             case VECTOR_OP_MAX: return (v0, v1, vm) ->
 859                     v0.bOp(v1, vm, (i, a, b) -> (short)Math.max(a, b));
 860             case VECTOR_OP_MIN: return (v0, v1, vm) ->
 861                     v0.bOp(v1, vm, (i, a, b) -> (short)Math.min(a, b));
 862             case VECTOR_OP_AND: return (v0, v1, vm) ->
 863                     v0.bOp(v1, vm, (i, a, b) -> (short)(a & b));
 864             case VECTOR_OP_OR: return (v0, v1, vm) ->
 865                     v0.bOp(v1, vm, (i, a, b) -> (short)(a | b));
 866             case VECTOR_OP_XOR: return (v0, v1, vm) ->
 867                     v0.bOp(v1, vm, (i, a, b) -> (short)(a ^ b));
 868             case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
 869                     v0.bOp(v1, vm, (i, a, n) -> (short)(a << n));
 870             case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
 871                     v0.bOp(v1, vm, (i, a, n) -> (short)(a >> n));
 872             case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
 873                     v0.bOp(v1, vm, (i, a, n) -> (short)((a & LSHR_SETUP_MASK) >>> n));
 874             case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
 875                     v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
 876             case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
 877                     v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
 878             default: return null;
 879         }
 880     }
 881 
 882     // FIXME: Maybe all of the public final methods in this file (the
 883     // simple ones that just call lanewise) should be pushed down to
 884     // the X-VectorBits template.  They can't optimize properly at
 885     // this level, and must rely on inlining.  Does it work?
 886     // (If it works, of course keep the code here.)
 887 
 888     /**
 889      * Combines the lane values of this vector
 890      * with the value of a broadcast scalar.
 891      *
 892      * This is a lane-wise binary operation which applies
 893      * the selected operation to each lane.
 894      * The return value will be equal to this expression:
 895      * {@code this.lanewise(op, this.broadcast(e))}.
 896      *
 897      * @param op the operation used to process lane values
 898      * @param e the input scalar
 899      * @return the result of applying the operation lane-wise
 900      *         to the two input vectors
 901      * @throws UnsupportedOperationException if this vector does
 902      *         not support the requested operation
 903      * @see #lanewise(VectorOperators.Binary,Vector)
 904      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 905      */
 906     @ForceInline
 907     public final
 908     ShortVector lanewise(VectorOperators.Binary op,
 909                                   short e) {
 910         if (opKind(op, VO_SHIFT) && (short)(int)e == e) {
 911             return lanewiseShift(op, (int) e);
 912         }
 913         if (op == AND_NOT) {
 914             op = AND; e = (short) ~e;
 915         }
 916         return lanewise(op, broadcast(e));
 917     }
 918 
 919     /**
 920      * Combines the lane values of this vector
 921      * with the value of a broadcast scalar,
 922      * with selection of lane elements controlled by a mask.
 923      *
 924      * This is a masked lane-wise binary operation which applies
 925      * the selected operation to each lane.
 926      * The return value will be equal to this expression:
 927      * {@code this.lanewise(op, this.broadcast(e), m)}.
 928      *
 929      * @param op the operation used to process lane values
 930      * @param e the input scalar
 931      * @param m the mask controlling lane selection
 932      * @return the result of applying the operation lane-wise
 933      *         to the input vector and the scalar
 934      * @throws UnsupportedOperationException if this vector does
 935      *         not support the requested operation
 936      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 937      * @see #lanewise(VectorOperators.Binary,short)
 938      */
 939     @ForceInline
 940     public final
 941     ShortVector lanewise(VectorOperators.Binary op,
 942                                   short e,
 943                                   VectorMask<Short> m) {
 944         if (opKind(op, VO_SHIFT) && (short)(int)e == e) {
 945             return lanewiseShift(op, (int) e, m);
 946         }
 947         if (op == AND_NOT) {
 948             op = AND; e = (short) ~e;
 949         }
 950         return lanewise(op, broadcast(e), m);
 951     }
 952 
 953     /**
 954      * {@inheritDoc} <!--workaround-->
 955      * @apiNote
 956      * When working with vector subtypes like {@code ShortVector},
 957      * {@linkplain #lanewise(VectorOperators.Binary,short)
 958      * the more strongly typed method}
 959      * is typically selected.  It can be explicitly selected
 960      * using a cast: {@code v.lanewise(op,(short)e)}.
 961      * The two expressions will produce numerically identical results.
 962      */
 963     @ForceInline
 964     public final
 965     ShortVector lanewise(VectorOperators.Binary op,
 966                                   long e) {
 967         short e1 = (short) e;
 968         if ((long)e1 != e
 969             // allow shift ops to clip down their int parameters
 970             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 971             vspecies().checkValue(e);  // for exception
 972         }
 973         return lanewise(op, e1);
 974     }
 975 
 976     /**
 977      * {@inheritDoc} <!--workaround-->
 978      * @apiNote
 979      * When working with vector subtypes like {@code ShortVector},
 980      * {@linkplain #lanewise(VectorOperators.Binary,short,VectorMask)
 981      * the more strongly typed method}
 982      * is typically selected.  It can be explicitly selected
 983      * using a cast: {@code v.lanewise(op,(short)e,m)}.
 984      * The two expressions will produce numerically identical results.
 985      */
 986     @ForceInline
 987     public final
 988     ShortVector lanewise(VectorOperators.Binary op,
 989                                   long e, VectorMask<Short> m) {
 990         short e1 = (short) e;
 991         if ((long)e1 != e
 992             // allow shift ops to clip down their int parameters
 993             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 994             vspecies().checkValue(e);  // for exception
 995         }
 996         return lanewise(op, e1, m);
 997     }
 998 
 999     /*package-private*/
1000     abstract ShortVector
1001     lanewiseShift(VectorOperators.Binary op, int e);
1002 
1003     /*package-private*/
1004     @ForceInline
1005     final ShortVector
1006     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
1007         // Special handling for these.  FIXME: Refactor?
1008         assert(opKind(op, VO_SHIFT));
1009         // As per shift specification for Java, mask the shift count.
1010         e &= SHIFT_MASK;
1011         int opc = opCode(op);
1012         return VectorSupport.broadcastInt(
1013             opc, getClass(), null, short.class, length(),
1014             this, e, null,
1015             BIN_INT_IMPL.find(op, opc, ShortVector::broadcastIntOperations));
1016     }
1017 
1018     /*package-private*/
1019     abstract ShortVector
1020     lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Short> m);
1021 
1022     /*package-private*/
1023     @ForceInline
1024     final ShortVector
1025     lanewiseShiftTemplate(VectorOperators.Binary op,
1026                           Class<? extends VectorMask<Short>> maskClass,
1027                           int e, VectorMask<Short> m) {
1028         m.check(maskClass, this);
1029         assert(opKind(op, VO_SHIFT));
1030         // As per shift specification for Java, mask the shift count.
1031         e &= SHIFT_MASK;
1032         int opc = opCode(op);
1033         return VectorSupport.broadcastInt(
1034             opc, getClass(), maskClass, short.class, length(),
1035             this, e, m,
1036             BIN_INT_IMPL.find(op, opc, ShortVector::broadcastIntOperations));
1037     }
1038 
1039     private static final
1040     ImplCache<Binary,VectorBroadcastIntOp<ShortVector, VectorMask<Short>>> BIN_INT_IMPL
1041         = new ImplCache<>(Binary.class, ShortVector.class);
1042 
1043     private static VectorBroadcastIntOp<ShortVector, VectorMask<Short>> broadcastIntOperations(int opc_) {
1044         switch (opc_) {
1045             case VECTOR_OP_LSHIFT: return (v, n, m) ->
1046                     v.uOp(m, (i, a) -> (short)(a << n));
1047             case VECTOR_OP_RSHIFT: return (v, n, m) ->
1048                     v.uOp(m, (i, a) -> (short)(a >> n));
1049             case VECTOR_OP_URSHIFT: return (v, n, m) ->
1050                     v.uOp(m, (i, a) -> (short)((a & LSHR_SETUP_MASK) >>> n));
1051             case VECTOR_OP_LROTATE: return (v, n, m) ->
1052                     v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
1053             case VECTOR_OP_RROTATE: return (v, n, m) ->
1054                     v.uOp(m, (i, a) -> rotateRight(a, (int)n));
1055             default: return null;
1056         }
1057     }
1058 
1059     // As per shift specification for Java, mask the shift count.
1060     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
1061     // The latter two maskings go beyond the JLS, but seem reasonable
1062     // since our lane types are first-class types, not just dressed
1063     // up ints.
1064     private static final int SHIFT_MASK = (Short.SIZE - 1);
1065     // Also simulate >>> on sub-word variables with a mask.
1066     private static final int LSHR_SETUP_MASK = ((1 << Short.SIZE) - 1);
1067 
1068     // Ternary lanewise support
1069 
1070     // Ternary operators come in eight variations:
1071     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
1072     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
1073 
1074     // It is annoying to support all of these variations of masking
1075     // and broadcast, but it would be more surprising not to continue
1076     // the obvious pattern started by unary and binary.
1077 
1078    /**
1079      * {@inheritDoc} <!--workaround-->
1080      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1081      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
1082      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
1083      * @see #lanewise(VectorOperators.Ternary,short,short)
1084      * @see #lanewise(VectorOperators.Ternary,Vector,short)
1085      * @see #lanewise(VectorOperators.Ternary,short,Vector)
1086      */
1087     @Override
1088     public abstract
1089     ShortVector lanewise(VectorOperators.Ternary op,
1090                                                   Vector<Short> v1,
1091                                                   Vector<Short> v2);
1092     @ForceInline
1093     final
1094     ShortVector lanewiseTemplate(VectorOperators.Ternary op,
1095                                           Vector<Short> v1,
1096                                           Vector<Short> v2) {
1097         ShortVector that = (ShortVector) v1;
1098         ShortVector tother = (ShortVector) v2;
1099         // It's a word: https://www.dictionary.com/browse/tother
1100         // See also Chapter 11 of Dickens, Our Mutual Friend:
1101         // "Totherest Governor," replied Mr Riderhood...
1102         that.check(this);
1103         tother.check(this);
1104         if (op == BITWISE_BLEND) {
1105             // FIXME: Support this in the JIT.
1106             that = this.lanewise(XOR, that).lanewise(AND, tother);
1107             return this.lanewise(XOR, that);
1108         }
1109         int opc = opCode(op);
1110         return VectorSupport.ternaryOp(
1111             opc, getClass(), null, short.class, length(),
1112             this, that, tother, null,
1113             TERN_IMPL.find(op, opc, ShortVector::ternaryOperations));
1114     }
1115 
1116     /**
1117      * {@inheritDoc} <!--workaround-->
1118      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1119      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
1120      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
1121      */
1122     @Override
1123     public abstract
1124     ShortVector lanewise(VectorOperators.Ternary op,
1125                                   Vector<Short> v1,
1126                                   Vector<Short> v2,
1127                                   VectorMask<Short> m);
1128     @ForceInline
1129     final
1130     ShortVector lanewiseTemplate(VectorOperators.Ternary op,
1131                                           Class<? extends VectorMask<Short>> maskClass,
1132                                           Vector<Short> v1,
1133                                           Vector<Short> v2,
1134                                           VectorMask<Short> m) {
1135         ShortVector that = (ShortVector) v1;
1136         ShortVector tother = (ShortVector) v2;
1137         // It's a word: https://www.dictionary.com/browse/tother
1138         // See also Chapter 11 of Dickens, Our Mutual Friend:
1139         // "Totherest Governor," replied Mr Riderhood...
1140         that.check(this);
1141         tother.check(this);
1142         m.check(maskClass, this);
1143 
1144         if (op == BITWISE_BLEND) {
1145             // FIXME: Support this in the JIT.
1146             that = this.lanewise(XOR, that).lanewise(AND, tother);
1147             return this.lanewise(XOR, that, m);
1148         }
1149         int opc = opCode(op);
1150         return VectorSupport.ternaryOp(
1151             opc, getClass(), maskClass, short.class, length(),
1152             this, that, tother, m,
1153             TERN_IMPL.find(op, opc, ShortVector::ternaryOperations));
1154     }
1155 
1156     private static final
1157     ImplCache<Ternary, TernaryOperation<ShortVector, VectorMask<Short>>>
1158         TERN_IMPL = new ImplCache<>(Ternary.class, ShortVector.class);
1159 
1160     private static TernaryOperation<ShortVector, VectorMask<Short>> ternaryOperations(int opc_) {
1161         switch (opc_) {
1162             default: return null;
1163         }
1164     }
1165 
1166     /**
1167      * Combines the lane values of this vector
1168      * with the values of two broadcast scalars.
1169      *
1170      * This is a lane-wise ternary operation which applies
1171      * the selected operation to each lane.
1172      * The return value will be equal to this expression:
1173      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
1174      *
1175      * @param op the operation used to combine lane values
1176      * @param e1 the first input scalar
1177      * @param e2 the second input scalar
1178      * @return the result of applying the operation lane-wise
1179      *         to the input vector and the scalars
1180      * @throws UnsupportedOperationException if this vector does
1181      *         not support the requested operation
1182      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1183      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1184      */
1185     @ForceInline
1186     public final
1187     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
1188                                   short e1,
1189                                   short e2) {
1190         return lanewise(op, broadcast(e1), broadcast(e2));
1191     }
1192 
1193     /**
1194      * Combines the lane values of this vector
1195      * with the values of two broadcast scalars,
1196      * with selection of lane elements controlled by a mask.
1197      *
1198      * This is a masked lane-wise ternary operation which applies
1199      * the selected operation to each lane.
1200      * The return value will be equal to this expression:
1201      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1202      *
1203      * @param op the operation used to combine lane values
1204      * @param e1 the first input scalar
1205      * @param e2 the second input scalar
1206      * @param m the mask controlling lane selection
1207      * @return the result of applying the operation lane-wise
1208      *         to the input vector and the scalars
1209      * @throws UnsupportedOperationException if this vector does
1210      *         not support the requested operation
1211      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1212      * @see #lanewise(VectorOperators.Ternary,short,short)
1213      */
1214     @ForceInline
1215     public final
1216     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1217                                   short e1,
1218                                   short e2,
1219                                   VectorMask<Short> m) {
1220         return lanewise(op, broadcast(e1), broadcast(e2), m);
1221     }
1222 
1223     /**
1224      * Combines the lane values of this vector
1225      * with the values of another vector and a broadcast scalar.
1226      *
1227      * This is a lane-wise ternary operation which applies
1228      * the selected operation to each lane.
1229      * The return value will be equal to this expression:
1230      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1231      *
1232      * @param op the operation used to combine lane values
1233      * @param v1 the other input vector
1234      * @param e2 the input scalar
1235      * @return the result of applying the operation lane-wise
1236      *         to the input vectors and the scalar
1237      * @throws UnsupportedOperationException if this vector does
1238      *         not support the requested operation
1239      * @see #lanewise(VectorOperators.Ternary,short,short)
1240      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
1241      */
1242     @ForceInline
1243     public final
1244     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1245                                   Vector<Short> v1,
1246                                   short e2) {
1247         return lanewise(op, v1, broadcast(e2));
1248     }
1249 
1250     /**
1251      * Combines the lane values of this vector
1252      * with the values of another vector and a broadcast scalar,
1253      * with selection of lane elements controlled by a mask.
1254      *
1255      * This is a masked lane-wise ternary operation which applies
1256      * the selected operation to each lane.
1257      * The return value will be equal to this expression:
1258      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1259      *
1260      * @param op the operation used to combine lane values
1261      * @param v1 the other input vector
1262      * @param e2 the input scalar
1263      * @param m the mask controlling lane selection
1264      * @return the result of applying the operation lane-wise
1265      *         to the input vectors and the scalar
1266      * @throws UnsupportedOperationException if this vector does
1267      *         not support the requested operation
1268      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1269      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1270      * @see #lanewise(VectorOperators.Ternary,Vector,short)
1271      */
1272     @ForceInline
1273     public final
1274     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1275                                   Vector<Short> v1,
1276                                   short e2,
1277                                   VectorMask<Short> m) {
1278         return lanewise(op, v1, broadcast(e2), m);
1279     }
1280 
1281     /**
1282      * Combines the lane values of this vector
1283      * with the values of another vector and a broadcast scalar.
1284      *
1285      * This is a lane-wise ternary operation which applies
1286      * the selected operation to each lane.
1287      * The return value will be equal to this expression:
1288      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1289      *
1290      * @param op the operation used to combine lane values
1291      * @param e1 the input scalar
1292      * @param v2 the other input vector
1293      * @return the result of applying the operation lane-wise
1294      *         to the input vectors and the scalar
1295      * @throws UnsupportedOperationException if this vector does
1296      *         not support the requested operation
1297      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1298      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
1299      */
1300     @ForceInline
1301     public final
1302     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1303                                   short e1,
1304                                   Vector<Short> v2) {
1305         return lanewise(op, broadcast(e1), v2);
1306     }
1307 
1308     /**
1309      * Combines the lane values of this vector
1310      * with the values of another vector and a broadcast scalar,
1311      * with selection of lane elements controlled by a mask.
1312      *
1313      * This is a masked lane-wise ternary operation which applies
1314      * the selected operation to each lane.
1315      * The return value will be equal to this expression:
1316      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1317      *
1318      * @param op the operation used to combine lane values
1319      * @param e1 the input scalar
1320      * @param v2 the other input vector
1321      * @param m the mask controlling lane selection
1322      * @return the result of applying the operation lane-wise
1323      *         to the input vectors and the scalar
1324      * @throws UnsupportedOperationException if this vector does
1325      *         not support the requested operation
1326      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1327      * @see #lanewise(VectorOperators.Ternary,short,Vector)
1328      */
1329     @ForceInline
1330     public final
1331     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1332                                   short e1,
1333                                   Vector<Short> v2,
1334                                   VectorMask<Short> m) {
1335         return lanewise(op, broadcast(e1), v2, m);
1336     }
1337 
1338     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1339     // https://en.wikipedia.org/wiki/Ogdoad
1340 
1341     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1342     //
1343     // These include masked and non-masked versions.
1344     // This subclass adds broadcast (masked or not).
1345 
1346     /**
1347      * {@inheritDoc} <!--workaround-->
1348      * @see #add(short)
1349      */
1350     @Override
1351     @ForceInline
1352     public final ShortVector add(Vector<Short> v) {
1353         return lanewise(ADD, v);
1354     }
1355 
1356     /**
1357      * Adds this vector to the broadcast of an input scalar.
1358      *
1359      * This is a lane-wise binary operation which applies
1360      * the primitive addition operation ({@code +}) to each lane.
1361      *
1362      * This method is also equivalent to the expression
1363      * {@link #lanewise(VectorOperators.Binary,short)
1364      *    lanewise}{@code (}{@link VectorOperators#ADD
1365      *    ADD}{@code , e)}.
1366      *
1367      * @param e the input scalar
1368      * @return the result of adding each lane of this vector to the scalar
1369      * @see #add(Vector)
1370      * @see #broadcast(short)
1371      * @see #add(short,VectorMask)
1372      * @see VectorOperators#ADD
1373      * @see #lanewise(VectorOperators.Binary,Vector)
1374      * @see #lanewise(VectorOperators.Binary,short)
1375      */
1376     @ForceInline
1377     public final
1378     ShortVector add(short e) {
1379         return lanewise(ADD, e);
1380     }
1381 
1382     /**
1383      * {@inheritDoc} <!--workaround-->
1384      * @see #add(short,VectorMask)
1385      */
1386     @Override
1387     @ForceInline
1388     public final ShortVector add(Vector<Short> v,
1389                                           VectorMask<Short> m) {
1390         return lanewise(ADD, v, m);
1391     }
1392 
1393     /**
1394      * Adds this vector to the broadcast of an input scalar,
1395      * selecting lane elements controlled by a mask.
1396      *
1397      * This is a masked lane-wise binary operation which applies
1398      * the primitive addition operation ({@code +}) to each lane.
1399      *
1400      * This method is also equivalent to the expression
1401      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1402      *    lanewise}{@code (}{@link VectorOperators#ADD
1403      *    ADD}{@code , s, m)}.
1404      *
1405      * @param e the input scalar
1406      * @param m the mask controlling lane selection
1407      * @return the result of adding each lane of this vector to the scalar
1408      * @see #add(Vector,VectorMask)
1409      * @see #broadcast(short)
1410      * @see #add(short)
1411      * @see VectorOperators#ADD
1412      * @see #lanewise(VectorOperators.Binary,Vector)
1413      * @see #lanewise(VectorOperators.Binary,short)
1414      */
1415     @ForceInline
1416     public final ShortVector add(short e,
1417                                           VectorMask<Short> m) {
1418         return lanewise(ADD, e, m);
1419     }
1420 
1421     /**
1422      * {@inheritDoc} <!--workaround-->
1423      * @see #sub(short)
1424      */
1425     @Override
1426     @ForceInline
1427     public final ShortVector sub(Vector<Short> v) {
1428         return lanewise(SUB, v);
1429     }
1430 
1431     /**
1432      * Subtracts an input scalar from this vector.
1433      *
1434      * This is a masked lane-wise binary operation which applies
1435      * the primitive subtraction operation ({@code -}) to each lane.
1436      *
1437      * This method is also equivalent to the expression
1438      * {@link #lanewise(VectorOperators.Binary,short)
1439      *    lanewise}{@code (}{@link VectorOperators#SUB
1440      *    SUB}{@code , e)}.
1441      *
1442      * @param e the input scalar
1443      * @return the result of subtracting the scalar from each lane of this vector
1444      * @see #sub(Vector)
1445      * @see #broadcast(short)
1446      * @see #sub(short,VectorMask)
1447      * @see VectorOperators#SUB
1448      * @see #lanewise(VectorOperators.Binary,Vector)
1449      * @see #lanewise(VectorOperators.Binary,short)
1450      */
1451     @ForceInline
1452     public final ShortVector sub(short e) {
1453         return lanewise(SUB, e);
1454     }
1455 
1456     /**
1457      * {@inheritDoc} <!--workaround-->
1458      * @see #sub(short,VectorMask)
1459      */
1460     @Override
1461     @ForceInline
1462     public final ShortVector sub(Vector<Short> v,
1463                                           VectorMask<Short> m) {
1464         return lanewise(SUB, v, m);
1465     }
1466 
1467     /**
1468      * Subtracts an input scalar from this vector
1469      * under the control of a mask.
1470      *
1471      * This is a masked lane-wise binary operation which applies
1472      * the primitive subtraction operation ({@code -}) to each lane.
1473      *
1474      * This method is also equivalent to the expression
1475      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1476      *    lanewise}{@code (}{@link VectorOperators#SUB
1477      *    SUB}{@code , s, m)}.
1478      *
1479      * @param e the input scalar
1480      * @param m the mask controlling lane selection
1481      * @return the result of subtracting the scalar from each lane of this vector
1482      * @see #sub(Vector,VectorMask)
1483      * @see #broadcast(short)
1484      * @see #sub(short)
1485      * @see VectorOperators#SUB
1486      * @see #lanewise(VectorOperators.Binary,Vector)
1487      * @see #lanewise(VectorOperators.Binary,short)
1488      */
1489     @ForceInline
1490     public final ShortVector sub(short e,
1491                                           VectorMask<Short> m) {
1492         return lanewise(SUB, e, m);
1493     }
1494 
1495     /**
1496      * {@inheritDoc} <!--workaround-->
1497      * @see #mul(short)
1498      */
1499     @Override
1500     @ForceInline
1501     public final ShortVector mul(Vector<Short> v) {
1502         return lanewise(MUL, v);
1503     }
1504 
1505     /**
1506      * Multiplies this vector by the broadcast of an input scalar.
1507      *
1508      * This is a lane-wise binary operation which applies
1509      * the primitive multiplication operation ({@code *}) to each lane.
1510      *
1511      * This method is also equivalent to the expression
1512      * {@link #lanewise(VectorOperators.Binary,short)
1513      *    lanewise}{@code (}{@link VectorOperators#MUL
1514      *    MUL}{@code , e)}.
1515      *
1516      * @param e the input scalar
1517      * @return the result of multiplying this vector by the given scalar
1518      * @see #mul(Vector)
1519      * @see #broadcast(short)
1520      * @see #mul(short,VectorMask)
1521      * @see VectorOperators#MUL
1522      * @see #lanewise(VectorOperators.Binary,Vector)
1523      * @see #lanewise(VectorOperators.Binary,short)
1524      */
1525     @ForceInline
1526     public final ShortVector mul(short e) {
1527         return lanewise(MUL, e);
1528     }
1529 
1530     /**
1531      * {@inheritDoc} <!--workaround-->
1532      * @see #mul(short,VectorMask)
1533      */
1534     @Override
1535     @ForceInline
1536     public final ShortVector mul(Vector<Short> v,
1537                                           VectorMask<Short> m) {
1538         return lanewise(MUL, v, m);
1539     }
1540 
1541     /**
1542      * Multiplies this vector by the broadcast of an input scalar,
1543      * selecting lane elements controlled by a mask.
1544      *
1545      * This is a masked lane-wise binary operation which applies
1546      * the primitive multiplication operation ({@code *}) to each lane.
1547      *
1548      * This method is also equivalent to the expression
1549      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1550      *    lanewise}{@code (}{@link VectorOperators#MUL
1551      *    MUL}{@code , s, m)}.
1552      *
1553      * @param e the input scalar
1554      * @param m the mask controlling lane selection
1555      * @return the result of muling each lane of this vector to the scalar
1556      * @see #mul(Vector,VectorMask)
1557      * @see #broadcast(short)
1558      * @see #mul(short)
1559      * @see VectorOperators#MUL
1560      * @see #lanewise(VectorOperators.Binary,Vector)
1561      * @see #lanewise(VectorOperators.Binary,short)
1562      */
1563     @ForceInline
1564     public final ShortVector mul(short e,
1565                                           VectorMask<Short> m) {
1566         return lanewise(MUL, e, m);
1567     }
1568 
1569     /**
1570      * {@inheritDoc} <!--workaround-->
1571      * @apiNote If there is a zero divisor, {@code
1572      * ArithmeticException} will be thrown.
1573      */
1574     @Override
1575     @ForceInline
1576     public final ShortVector div(Vector<Short> v) {
1577         return lanewise(DIV, v);
1578     }
1579 
1580     /**
1581      * Divides this vector by the broadcast of an input scalar.
1582      *
1583      * This is a lane-wise binary operation which applies
1584      * the primitive division operation ({@code /}) to each lane.
1585      *
1586      * This method is also equivalent to the expression
1587      * {@link #lanewise(VectorOperators.Binary,short)
1588      *    lanewise}{@code (}{@link VectorOperators#DIV
1589      *    DIV}{@code , e)}.
1590      *
1591      * @apiNote If there is a zero divisor, {@code
1592      * ArithmeticException} will be thrown.
1593      *
1594      * @param e the input scalar
1595      * @return the result of dividing each lane of this vector by the scalar
1596      * @see #div(Vector)
1597      * @see #broadcast(short)
1598      * @see #div(short,VectorMask)
1599      * @see VectorOperators#DIV
1600      * @see #lanewise(VectorOperators.Binary,Vector)
1601      * @see #lanewise(VectorOperators.Binary,short)
1602      */
1603     @ForceInline
1604     public final ShortVector div(short e) {
1605         return lanewise(DIV, e);
1606     }
1607 
1608     /**
1609      * {@inheritDoc} <!--workaround-->
1610      * @see #div(short,VectorMask)
1611      * @apiNote If there is a zero divisor, {@code
1612      * ArithmeticException} will be thrown.
1613      */
1614     @Override
1615     @ForceInline
1616     public final ShortVector div(Vector<Short> v,
1617                                           VectorMask<Short> m) {
1618         return lanewise(DIV, v, m);
1619     }
1620 
1621     /**
1622      * Divides this vector by the broadcast of an input scalar,
1623      * selecting lane elements controlled by a mask.
1624      *
1625      * This is a masked lane-wise binary operation which applies
1626      * the primitive division operation ({@code /}) to each lane.
1627      *
1628      * This method is also equivalent to the expression
1629      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1630      *    lanewise}{@code (}{@link VectorOperators#DIV
1631      *    DIV}{@code , s, m)}.
1632      *
1633      * @apiNote If there is a zero divisor, {@code
1634      * ArithmeticException} will be thrown.
1635      *
1636      * @param e the input scalar
1637      * @param m the mask controlling lane selection
1638      * @return the result of dividing each lane of this vector by the scalar
1639      * @see #div(Vector,VectorMask)
1640      * @see #broadcast(short)
1641      * @see #div(short)
1642      * @see VectorOperators#DIV
1643      * @see #lanewise(VectorOperators.Binary,Vector)
1644      * @see #lanewise(VectorOperators.Binary,short)
1645      */
1646     @ForceInline
1647     public final ShortVector div(short e,
1648                                           VectorMask<Short> m) {
1649         return lanewise(DIV, e, m);
1650     }
1651 
1652     /// END OF FULL-SERVICE BINARY METHODS
1653 
1654     /// SECOND-TIER BINARY METHODS
1655     //
1656     // There are no masked versions.
1657 
1658     /**
1659      * {@inheritDoc} <!--workaround-->
1660      */
1661     @Override
1662     @ForceInline
1663     public final ShortVector min(Vector<Short> v) {
1664         return lanewise(MIN, v);
1665     }
1666 
1667     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1668     /**
1669      * Computes the smaller of this vector and the broadcast of an input scalar.
1670      *
1671      * This is a lane-wise binary operation which applies the
1672      * operation {@code Math.min()} to each pair of
1673      * corresponding lane values.
1674      *
1675      * This method is also equivalent to the expression
1676      * {@link #lanewise(VectorOperators.Binary,short)
1677      *    lanewise}{@code (}{@link VectorOperators#MIN
1678      *    MIN}{@code , e)}.
1679      *
1680      * @param e the input scalar
1681      * @return the result of multiplying this vector by the given scalar
1682      * @see #min(Vector)
1683      * @see #broadcast(short)
1684      * @see VectorOperators#MIN
1685      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1686      */
1687     @ForceInline
1688     public final ShortVector min(short e) {
1689         return lanewise(MIN, e);
1690     }
1691 
1692     /**
1693      * {@inheritDoc} <!--workaround-->
1694      */
1695     @Override
1696     @ForceInline
1697     public final ShortVector max(Vector<Short> v) {
1698         return lanewise(MAX, v);
1699     }
1700 
1701     /**
1702      * Computes the larger of this vector and the broadcast of an input scalar.
1703      *
1704      * This is a lane-wise binary operation which applies the
1705      * operation {@code Math.max()} to each pair of
1706      * corresponding lane values.
1707      *
1708      * This method is also equivalent to the expression
1709      * {@link #lanewise(VectorOperators.Binary,short)
1710      *    lanewise}{@code (}{@link VectorOperators#MAX
1711      *    MAX}{@code , e)}.
1712      *
1713      * @param e the input scalar
1714      * @return the result of multiplying this vector by the given scalar
1715      * @see #max(Vector)
1716      * @see #broadcast(short)
1717      * @see VectorOperators#MAX
1718      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1719      */
1720     @ForceInline
1721     public final ShortVector max(short e) {
1722         return lanewise(MAX, e);
1723     }
1724 
1725     // common bitwise operators: and, or, not (with scalar versions)
1726     /**
1727      * Computes the bitwise logical conjunction ({@code &})
1728      * of this vector and a second input vector.
1729      *
1730      * This is a lane-wise binary operation which applies the
1731      * the primitive bitwise "and" operation ({@code &})
1732      * to each pair of corresponding lane values.
1733      *
1734      * This method is also equivalent to the expression
1735      * {@link #lanewise(VectorOperators.Binary,Vector)
1736      *    lanewise}{@code (}{@link VectorOperators#AND
1737      *    AND}{@code , v)}.
1738      *
1739      * <p>
1740      * This is not a full-service named operation like
1741      * {@link #add(Vector) add}.  A masked version of
1742      * this operation is not directly available
1743      * but may be obtained via the masked version of
1744      * {@code lanewise}.
1745      *
1746      * @param v a second input vector
1747      * @return the bitwise {@code &} of this vector and the second input vector
1748      * @see #and(short)
1749      * @see #or(Vector)
1750      * @see #not()
1751      * @see VectorOperators#AND
1752      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1753      */
1754     @ForceInline
1755     public final ShortVector and(Vector<Short> v) {
1756         return lanewise(AND, v);
1757     }
1758 
1759     /**
1760      * Computes the bitwise logical conjunction ({@code &})
1761      * of this vector and a scalar.
1762      *
1763      * This is a lane-wise binary operation which applies the
1764      * the primitive bitwise "and" operation ({@code &})
1765      * to each pair of corresponding lane values.
1766      *
1767      * This method is also equivalent to the expression
1768      * {@link #lanewise(VectorOperators.Binary,Vector)
1769      *    lanewise}{@code (}{@link VectorOperators#AND
1770      *    AND}{@code , e)}.
1771      *
1772      * @param e an input scalar
1773      * @return the bitwise {@code &} of this vector and scalar
1774      * @see #and(Vector)
1775      * @see VectorOperators#AND
1776      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1777      */
1778     @ForceInline
1779     public final ShortVector and(short e) {
1780         return lanewise(AND, e);
1781     }
1782 
1783     /**
1784      * Computes the bitwise logical disjunction ({@code |})
1785      * of this vector and a second input vector.
1786      *
1787      * This is a lane-wise binary operation which applies the
1788      * the primitive bitwise "or" operation ({@code |})
1789      * to each pair of corresponding lane values.
1790      *
1791      * This method is also equivalent to the expression
1792      * {@link #lanewise(VectorOperators.Binary,Vector)
1793      *    lanewise}{@code (}{@link VectorOperators#OR
1794      *    AND}{@code , v)}.
1795      *
1796      * <p>
1797      * This is not a full-service named operation like
1798      * {@link #add(Vector) add}.  A masked version of
1799      * this operation is not directly available
1800      * but may be obtained via the masked version of
1801      * {@code lanewise}.
1802      *
1803      * @param v a second input vector
1804      * @return the bitwise {@code |} of this vector and the second input vector
1805      * @see #or(short)
1806      * @see #and(Vector)
1807      * @see #not()
1808      * @see VectorOperators#OR
1809      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1810      */
1811     @ForceInline
1812     public final ShortVector or(Vector<Short> v) {
1813         return lanewise(OR, v);
1814     }
1815 
1816     /**
1817      * Computes the bitwise logical disjunction ({@code |})
1818      * of this vector and a scalar.
1819      *
1820      * This is a lane-wise binary operation which applies the
1821      * the primitive bitwise "or" operation ({@code |})
1822      * to each pair of corresponding lane values.
1823      *
1824      * This method is also equivalent to the expression
1825      * {@link #lanewise(VectorOperators.Binary,Vector)
1826      *    lanewise}{@code (}{@link VectorOperators#OR
1827      *    OR}{@code , e)}.
1828      *
1829      * @param e an input scalar
1830      * @return the bitwise {@code |} of this vector and scalar
1831      * @see #or(Vector)
1832      * @see VectorOperators#OR
1833      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1834      */
1835     @ForceInline
1836     public final ShortVector or(short e) {
1837         return lanewise(OR, e);
1838     }
1839 
1840 
1841 
1842     /// UNARY METHODS
1843 
1844     /**
1845      * {@inheritDoc} <!--workaround-->
1846      */
1847     @Override
1848     @ForceInline
1849     public final
1850     ShortVector neg() {
1851         return lanewise(NEG);
1852     }
1853 
1854     /**
1855      * {@inheritDoc} <!--workaround-->
1856      */
1857     @Override
1858     @ForceInline
1859     public final
1860     ShortVector abs() {
1861         return lanewise(ABS);
1862     }
1863 
1864     static int bitCount(short a) {
1865         return Integer.bitCount((int)a & 0xFFFF);
1866     }
1867     static int numberOfTrailingZeros(short a) {
1868         return a != 0 ? Integer.numberOfTrailingZeros(a) : 16;
1869     }
1870     static int numberOfLeadingZeros(short a) {
1871         return a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0;
1872     }
1873 
1874     static short reverse(short a) {
1875         if (a == 0 || a == -1) return a;
1876 
1877         short b = rotateLeft(a, 8);
1878         b = (short) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
1879         b = (short) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
1880         b = (short) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
1881         return b;
1882     }
1883 
1884     // not (~)
1885     /**
1886      * Computes the bitwise logical complement ({@code ~})
1887      * of this vector.
1888      *
1889      * This is a lane-wise binary operation which applies the
1890      * the primitive bitwise "not" operation ({@code ~})
1891      * to each lane value.
1892      *
1893      * This method is also equivalent to the expression
1894      * {@link #lanewise(VectorOperators.Unary)
1895      *    lanewise}{@code (}{@link VectorOperators#NOT
1896      *    NOT}{@code )}.
1897      *
1898      * <p>
1899      * This is not a full-service named operation like
1900      * {@link #add(Vector) add}.  A masked version of
1901      * this operation is not directly available
1902      * but may be obtained via the masked version of
1903      * {@code lanewise}.
1904      *
1905      * @return the bitwise complement {@code ~} of this vector
1906      * @see #and(Vector)
1907      * @see VectorOperators#NOT
1908      * @see #lanewise(VectorOperators.Unary,VectorMask)
1909      */
1910     @ForceInline
1911     public final ShortVector not() {
1912         return lanewise(NOT);
1913     }
1914 
1915 
1916     /// COMPARISONS
1917 
1918     /**
1919      * {@inheritDoc} <!--workaround-->
1920      */
1921     @Override
1922     @ForceInline
1923     public final
1924     VectorMask<Short> eq(Vector<Short> v) {
1925         return compare(EQ, v);
1926     }
1927 
1928     /**
1929      * Tests if this vector is equal to an input scalar.
1930      *
1931      * This is a lane-wise binary test operation which applies
1932      * the primitive equals operation ({@code ==}) to each lane.
1933      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1934      *
1935      * @param e the input scalar
1936      * @return the result mask of testing if this vector
1937      *         is equal to {@code e}
1938      * @see #compare(VectorOperators.Comparison,short)
1939      */
1940     @ForceInline
1941     public final
1942     VectorMask<Short> eq(short e) {
1943         return compare(EQ, e);
1944     }
1945 
1946     /**
1947      * {@inheritDoc} <!--workaround-->
1948      */
1949     @Override
1950     @ForceInline
1951     public final
1952     VectorMask<Short> lt(Vector<Short> v) {
1953         return compare(LT, v);
1954     }
1955 
1956     /**
1957      * Tests if this vector is less than an input scalar.
1958      *
1959      * This is a lane-wise binary test operation which applies
1960      * the primitive less than operation ({@code <}) to each lane.
1961      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1962      *
1963      * @param e the input scalar
1964      * @return the mask result of testing if this vector
1965      *         is less than the input scalar
1966      * @see #compare(VectorOperators.Comparison,short)
1967      */
1968     @ForceInline
1969     public final
1970     VectorMask<Short> lt(short e) {
1971         return compare(LT, e);
1972     }
1973 
1974     /**
1975      * {@inheritDoc} <!--workaround-->
1976      */
1977     @Override
1978     public abstract
1979     VectorMask<Short> test(VectorOperators.Test op);
1980 
1981     /*package-private*/
1982     @ForceInline
1983     final
1984     <M extends VectorMask<Short>>
1985     M testTemplate(Class<M> maskType, Test op) {
1986         ShortSpecies vsp = vspecies();
1987         if (opKind(op, VO_SPECIAL)) {
1988             VectorMask<Short> m;
1989             if (op == IS_DEFAULT) {
1990                 m = compare(EQ, (short) 0);
1991             } else if (op == IS_NEGATIVE) {
1992                 m = compare(LT, (short) 0);
1993             }
1994             else {
1995                 throw new AssertionError(op);
1996             }
1997             return maskType.cast(m);
1998         }
1999         int opc = opCode(op);
2000         throw new AssertionError(op);
2001     }
2002 
2003     /**
2004      * {@inheritDoc} <!--workaround-->
2005      */
2006     @Override
2007     public abstract
2008     VectorMask<Short> test(VectorOperators.Test op,
2009                                   VectorMask<Short> m);
2010 
2011     /*package-private*/
2012     @ForceInline
2013     final
2014     <M extends VectorMask<Short>>
2015     M testTemplate(Class<M> maskType, Test op, M mask) {
2016         ShortSpecies vsp = vspecies();
2017         mask.check(maskType, this);
2018         if (opKind(op, VO_SPECIAL)) {
2019             VectorMask<Short> m = mask;
2020             if (op == IS_DEFAULT) {
2021                 m = compare(EQ, (short) 0, m);
2022             } else if (op == IS_NEGATIVE) {
2023                 m = compare(LT, (short) 0, m);
2024             }
2025             else {
2026                 throw new AssertionError(op);
2027             }
2028             return maskType.cast(m);
2029         }
2030         int opc = opCode(op);
2031         throw new AssertionError(op);
2032     }
2033 
2034     /**
2035      * {@inheritDoc} <!--workaround-->
2036      */
2037     @Override
2038     public abstract
2039     VectorMask<Short> compare(VectorOperators.Comparison op, Vector<Short> v);
2040 
2041     /*package-private*/
2042     @ForceInline
2043     final
2044     <M extends VectorMask<Short>>
2045     M compareTemplate(Class<M> maskType, Comparison op, Vector<Short> v) {
2046         ShortVector that = (ShortVector) v;
2047         that.check(this);
2048         int opc = opCode(op);
2049         return VectorSupport.compare(
2050             opc, getClass(), maskType, short.class, length(),
2051             this, that, null,
2052             (cond, v0, v1, m1) -> {
2053                 AbstractMask<Short> m
2054                     = v0.bTest(cond, v1, (cond_, i, a, b)
2055                                -> compareWithOp(cond, a, b));
2056                 @SuppressWarnings("unchecked")
2057                 M m2 = (M) m;
2058                 return m2;
2059             });
2060     }
2061 
2062     /*package-private*/
2063     @ForceInline
2064     final
2065     <M extends VectorMask<Short>>
2066     M compareTemplate(Class<M> maskType, Comparison op, Vector<Short> v, M m) {
2067         ShortVector that = (ShortVector) v;
2068         that.check(this);
2069         m.check(maskType, this);
2070         int opc = opCode(op);
2071         return VectorSupport.compare(
2072             opc, getClass(), maskType, short.class, length(),
2073             this, that, m,
2074             (cond, v0, v1, m1) -> {
2075                 AbstractMask<Short> cmpM
2076                     = v0.bTest(cond, v1, (cond_, i, a, b)
2077                                -> compareWithOp(cond, a, b));
2078                 @SuppressWarnings("unchecked")
2079                 M m2 = (M) cmpM.and(m1);
2080                 return m2;
2081             });
2082     }
2083 
2084     @ForceInline
2085     private static boolean compareWithOp(int cond, short a, short b) {
2086         return switch (cond) {
2087             case BT_eq -> a == b;
2088             case BT_ne -> a != b;
2089             case BT_lt -> a < b;
2090             case BT_le -> a <= b;
2091             case BT_gt -> a > b;
2092             case BT_ge -> a >= b;
2093             case BT_ult -> Short.compareUnsigned(a, b) < 0;
2094             case BT_ule -> Short.compareUnsigned(a, b) <= 0;
2095             case BT_ugt -> Short.compareUnsigned(a, b) > 0;
2096             case BT_uge -> Short.compareUnsigned(a, b) >= 0;
2097             default -> throw new AssertionError();
2098         };
2099     }
2100 
2101     /**
2102      * Tests this vector by comparing it with an input scalar,
2103      * according to the given comparison operation.
2104      *
2105      * This is a lane-wise binary test operation which applies
2106      * the comparison operation to each lane.
2107      * <p>
2108      * The result is the same as
2109      * {@code compare(op, broadcast(species(), e))}.
2110      * That is, the scalar may be regarded as broadcast to
2111      * a vector of the same species, and then compared
2112      * against the original vector, using the selected
2113      * comparison operation.
2114      *
2115      * @param op the operation used to compare lane values
2116      * @param e the input scalar
2117      * @return the mask result of testing lane-wise if this vector
2118      *         compares to the input, according to the selected
2119      *         comparison operator
2120      * @see ShortVector#compare(VectorOperators.Comparison,Vector)
2121      * @see #eq(short)
2122      * @see #lt(short)
2123      */
2124     public abstract
2125     VectorMask<Short> compare(Comparison op, short e);
2126 
2127     /*package-private*/
2128     @ForceInline
2129     final
2130     <M extends VectorMask<Short>>
2131     M compareTemplate(Class<M> maskType, Comparison op, short e) {
2132         return compareTemplate(maskType, op, broadcast(e));
2133     }
2134 
2135     /**
2136      * Tests this vector by comparing it with an input scalar,
2137      * according to the given comparison operation,
2138      * in lanes selected by a mask.
2139      *
2140      * This is a masked lane-wise binary test operation which applies
2141      * to each pair of corresponding lane values.
2142      *
2143      * The returned result is equal to the expression
2144      * {@code compare(op,s).and(m)}.
2145      *
2146      * @param op the operation used to compare lane values
2147      * @param e the input scalar
2148      * @param m the mask controlling lane selection
2149      * @return the mask result of testing lane-wise if this vector
2150      *         compares to the input, according to the selected
2151      *         comparison operator,
2152      *         and only in the lanes selected by the mask
2153      * @see ShortVector#compare(VectorOperators.Comparison,Vector,VectorMask)
2154      */
2155     @ForceInline
2156     public final VectorMask<Short> compare(VectorOperators.Comparison op,
2157                                                short e,
2158                                                VectorMask<Short> m) {
2159         return compare(op, broadcast(e), m);
2160     }
2161 
2162     /**
2163      * {@inheritDoc} <!--workaround-->
2164      */
2165     @Override
2166     public abstract
2167     VectorMask<Short> compare(Comparison op, long e);
2168 
2169     /*package-private*/
2170     @ForceInline
2171     final
2172     <M extends VectorMask<Short>>
2173     M compareTemplate(Class<M> maskType, Comparison op, long e) {
2174         return compareTemplate(maskType, op, broadcast(e));
2175     }
2176 
2177     /**
2178      * {@inheritDoc} <!--workaround-->
2179      */
2180     @Override
2181     @ForceInline
2182     public final
2183     VectorMask<Short> compare(Comparison op, long e, VectorMask<Short> m) {
2184         return compare(op, broadcast(e), m);
2185     }
2186 
2187 
2188 
2189     /**
2190      * {@inheritDoc} <!--workaround-->
2191      */
2192     @Override public abstract
2193     ShortVector blend(Vector<Short> v, VectorMask<Short> m);
2194 
2195     /*package-private*/
2196     @ForceInline
2197     final
2198     <M extends VectorMask<Short>>
2199     ShortVector
2200     blendTemplate(Class<M> maskType, ShortVector v, M m) {
2201         v.check(this);
2202         return VectorSupport.blend(
2203             getClass(), maskType, short.class, length(),
2204             this, v, m,
2205             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
2206     }
2207 
2208     /**
2209      * {@inheritDoc} <!--workaround-->
2210      */
2211     @Override public abstract ShortVector addIndex(int scale);
2212 
2213     /*package-private*/
2214     @ForceInline
2215     final ShortVector addIndexTemplate(int scale) {
2216         ShortSpecies vsp = vspecies();
2217         // make sure VLENGTH*scale doesn't overflow:
2218         vsp.checkScale(scale);
2219         return VectorSupport.indexVector(
2220             getClass(), short.class, length(),
2221             this, scale, vsp,
2222             (v, scale_, s)
2223             -> {
2224                 // If the platform doesn't support an INDEX
2225                 // instruction directly, load IOTA from memory
2226                 // and multiply.
2227                 ShortVector iota = s.iota();
2228                 short sc = (short) scale_;
2229                 return v.add(sc == 1 ? iota : iota.mul(sc));
2230             });
2231     }
2232 
2233     /**
2234      * Replaces selected lanes of this vector with
2235      * a scalar value
2236      * under the control of a mask.
2237      *
2238      * This is a masked lane-wise binary operation which
2239      * selects each lane value from one or the other input.
2240      *
2241      * The returned result is equal to the expression
2242      * {@code blend(broadcast(e),m)}.
2243      *
2244      * @param e the input scalar, containing the replacement lane value
2245      * @param m the mask controlling lane selection of the scalar
2246      * @return the result of blending the lane elements of this vector with
2247      *         the scalar value
2248      */
2249     @ForceInline
2250     public final ShortVector blend(short e,
2251                                             VectorMask<Short> m) {
2252         return blend(broadcast(e), m);
2253     }
2254 
2255     /**
2256      * Replaces selected lanes of this vector with
2257      * a scalar value
2258      * under the control of a mask.
2259      *
2260      * This is a masked lane-wise binary operation which
2261      * selects each lane value from one or the other input.
2262      *
2263      * The returned result is equal to the expression
2264      * {@code blend(broadcast(e),m)}.
2265      *
2266      * @param e the input scalar, containing the replacement lane value
2267      * @param m the mask controlling lane selection of the scalar
2268      * @return the result of blending the lane elements of this vector with
2269      *         the scalar value
2270      */
2271     @ForceInline
2272     public final ShortVector blend(long e,
2273                                             VectorMask<Short> m) {
2274         return blend(broadcast(e), m);
2275     }
2276 
2277     /**
2278      * {@inheritDoc} <!--workaround-->
2279      */
2280     @Override
2281     public abstract
2282     ShortVector slice(int origin, Vector<Short> v1);
2283 
2284     /*package-private*/
2285     final
2286     @ForceInline
2287     ShortVector sliceTemplate(int origin, Vector<Short> v1) {
2288         ShortVector that = (ShortVector) v1;
2289         that.check(this);
2290         Objects.checkIndex(origin, length() + 1);
2291         VectorShuffle<Short> iota = iotaShuffle();
2292         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
2293         iota = iotaShuffle(origin, 1, true);
2294         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
2295     }
2296 
2297     /**
2298      * {@inheritDoc} <!--workaround-->
2299      */
2300     @Override
2301     @ForceInline
2302     public final
2303     ShortVector slice(int origin,
2304                                Vector<Short> w,
2305                                VectorMask<Short> m) {
2306         return broadcast(0).blend(slice(origin, w), m);
2307     }
2308 
2309     /**
2310      * {@inheritDoc} <!--workaround-->
2311      */
2312     @Override
2313     public abstract
2314     ShortVector slice(int origin);
2315 
2316     /*package-private*/
2317     final
2318     @ForceInline
2319     ShortVector sliceTemplate(int origin) {
2320         Objects.checkIndex(origin, length() + 1);
2321         VectorShuffle<Short> iota = iotaShuffle();
2322         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
2323         iota = iotaShuffle(origin, 1, true);
2324         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2325     }
2326 
2327     /**
2328      * {@inheritDoc} <!--workaround-->
2329      */
2330     @Override
2331     public abstract
2332     ShortVector unslice(int origin, Vector<Short> w, int part);
2333 
2334     /*package-private*/
2335     final
2336     @ForceInline
2337     ShortVector
2338     unsliceTemplate(int origin, Vector<Short> w, int part) {
2339         ShortVector that = (ShortVector) w;
2340         that.check(this);
2341         Objects.checkIndex(origin, length() + 1);
2342         VectorShuffle<Short> iota = iotaShuffle();
2343         VectorMask<Short> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
2344                                                                   (broadcast((short)(origin))));
2345         iota = iotaShuffle(-origin, 1, true);
2346         return that.blend(this.rearrange(iota), blendMask);
2347     }
2348 
2349     /*package-private*/
2350     final
2351     @ForceInline
2352     <M extends VectorMask<Short>>
2353     ShortVector
2354     unsliceTemplate(Class<M> maskType, int origin, Vector<Short> w, int part, M m) {
2355         ShortVector that = (ShortVector) w;
2356         that.check(this);
2357         ShortVector slice = that.sliceTemplate(origin, that);
2358         slice = slice.blendTemplate(maskType, this, m);
2359         return slice.unsliceTemplate(origin, w, part);
2360     }
2361 
2362     /**
2363      * {@inheritDoc} <!--workaround-->
2364      */
2365     @Override
2366     public abstract
2367     ShortVector unslice(int origin, Vector<Short> w, int part, VectorMask<Short> m);
2368 
2369     /**
2370      * {@inheritDoc} <!--workaround-->
2371      */
2372     @Override
2373     public abstract
2374     ShortVector unslice(int origin);
2375 
2376     /*package-private*/
2377     final
2378     @ForceInline
2379     ShortVector
2380     unsliceTemplate(int origin) {
2381         Objects.checkIndex(origin, length() + 1);
2382         VectorShuffle<Short> iota = iotaShuffle();
2383         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.GE,
2384                                                                   (broadcast((short)(origin))));
2385         iota = iotaShuffle(-origin, 1, true);
2386         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2387     }
2388 
2389     private ArrayIndexOutOfBoundsException
2390     wrongPartForSlice(int part) {
2391         String msg = String.format("bad part number %d for slice operation",
2392                                    part);
2393         return new ArrayIndexOutOfBoundsException(msg);
2394     }
2395 
2396     /**
2397      * {@inheritDoc} <!--workaround-->
2398      */
2399     @Override
2400     public abstract
2401     ShortVector rearrange(VectorShuffle<Short> m);
2402 
2403     /*package-private*/
2404     @ForceInline
2405     final
2406     <S extends VectorShuffle<Short>>
2407     ShortVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2408         shuffle.checkIndexes();
2409         return VectorSupport.rearrangeOp(
2410             getClass(), shuffletype, null, short.class, length(),
2411             this, shuffle, null,
2412             (v1, s_, m_) -> v1.uOp((i, a) -> {
2413                 int ei = s_.laneSource(i);
2414                 return v1.lane(ei);
2415             }));
2416     }
2417 
2418     /**
2419      * {@inheritDoc} <!--workaround-->
2420      */
2421     @Override
2422     public abstract
2423     ShortVector rearrange(VectorShuffle<Short> s,
2424                                    VectorMask<Short> m);
2425 
2426     /*package-private*/
2427     @ForceInline
2428     final
2429     <S extends VectorShuffle<Short>, M extends VectorMask<Short>>
2430     ShortVector rearrangeTemplate(Class<S> shuffletype,
2431                                            Class<M> masktype,
2432                                            S shuffle,
2433                                            M m) {
2434 
2435         m.check(masktype, this);
2436         VectorMask<Short> valid = shuffle.laneIsValid();
2437         if (m.andNot(valid).anyTrue()) {
2438             shuffle.checkIndexes();
2439             throw new AssertionError();
2440         }
2441         return VectorSupport.rearrangeOp(
2442                    getClass(), shuffletype, masktype, short.class, length(),
2443                    this, shuffle, m,
2444                    (v1, s_, m_) -> v1.uOp((i, a) -> {
2445                         int ei = s_.laneSource(i);
2446                         return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
2447                    }));
2448     }
2449 
2450     /**
2451      * {@inheritDoc} <!--workaround-->
2452      */
2453     @Override
2454     public abstract
2455     ShortVector rearrange(VectorShuffle<Short> s,
2456                                    Vector<Short> v);
2457 
2458     /*package-private*/
2459     @ForceInline
2460     final
2461     <S extends VectorShuffle<Short>>
2462     ShortVector rearrangeTemplate(Class<S> shuffletype,
2463                                            S shuffle,
2464                                            ShortVector v) {
2465         VectorMask<Short> valid = shuffle.laneIsValid();
2466         @SuppressWarnings("unchecked")
2467         S ws = (S) shuffle.wrapIndexes();
2468         ShortVector r0 =
2469             VectorSupport.rearrangeOp(
2470                 getClass(), shuffletype, null, short.class, length(),
2471                 this, ws, null,
2472                 (v0, s_, m_) -> v0.uOp((i, a) -> {
2473                     int ei = s_.laneSource(i);
2474                     return v0.lane(ei);
2475                 }));
2476         ShortVector r1 =
2477             VectorSupport.rearrangeOp(
2478                 getClass(), shuffletype, null, short.class, length(),
2479                 v, ws, null,
2480                 (v1, s_, m_) -> v1.uOp((i, a) -> {
2481                     int ei = s_.laneSource(i);
2482                     return v1.lane(ei);
2483                 }));
2484         return r1.blend(r0, valid);
2485     }
2486 
2487     @ForceInline
2488     private final
2489     VectorShuffle<Short> toShuffle0(ShortSpecies dsp) {
2490         short[] a = toArray();
2491         int[] sa = new int[a.length];
2492         for (int i = 0; i < a.length; i++) {
2493             sa[i] = (int) a[i];
2494         }
2495         return VectorShuffle.fromArray(dsp, sa, 0);
2496     }
2497 
2498     /*package-private*/
2499     @ForceInline
2500     final
2501     VectorShuffle<Short> toShuffleTemplate(Class<?> shuffleType) {
2502         ShortSpecies vsp = vspecies();
2503         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2504                                      getClass(), short.class, length(),
2505                                      shuffleType, byte.class, length(),
2506                                      this, vsp,
2507                                      ShortVector::toShuffle0);
2508     }
2509 
2510     /**
2511      * {@inheritDoc} <!--workaround-->
2512      * @since 19
2513      */
2514     @Override
2515     public abstract
2516     ShortVector compress(VectorMask<Short> m);
2517 
2518     /*package-private*/
2519     @ForceInline
2520     final
2521     <M extends AbstractMask<Short>>
2522     ShortVector compressTemplate(Class<M> masktype, M m) {
2523       m.check(masktype, this);
2524       return (ShortVector) VectorSupport.comExpOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
2525                                                    short.class, length(), this, m,
2526                                                    (v1, m1) -> compressHelper(v1, m1));
2527     }
2528 
2529     /**
2530      * {@inheritDoc} <!--workaround-->
2531      * @since 19
2532      */
2533     @Override
2534     public abstract
2535     ShortVector expand(VectorMask<Short> m);
2536 
2537     /*package-private*/
2538     @ForceInline
2539     final
2540     <M extends AbstractMask<Short>>
2541     ShortVector expandTemplate(Class<M> masktype, M m) {
2542       m.check(masktype, this);
2543       return (ShortVector) VectorSupport.comExpOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
2544                                                    short.class, length(), this, m,
2545                                                    (v1, m1) -> expandHelper(v1, m1));
2546     }
2547 
2548 
2549     /**
2550      * {@inheritDoc} <!--workaround-->
2551      */
2552     @Override
2553     public abstract
2554     ShortVector selectFrom(Vector<Short> v);
2555 
2556     /*package-private*/
2557     @ForceInline
2558     final ShortVector selectFromTemplate(ShortVector v) {
2559         return v.rearrange(this.toShuffle());
2560     }
2561 
2562     /**
2563      * {@inheritDoc} <!--workaround-->
2564      */
2565     @Override
2566     public abstract
2567     ShortVector selectFrom(Vector<Short> s, VectorMask<Short> m);
2568 
2569     /*package-private*/
2570     @ForceInline
2571     final ShortVector selectFromTemplate(ShortVector v,
2572                                                   AbstractMask<Short> m) {
2573         return v.rearrange(this.toShuffle(), m);
2574     }
2575 
2576     /// Ternary operations
2577 
2578     /**
2579      * Blends together the bits of two vectors under
2580      * the control of a third, which supplies mask bits.
2581      *
2582      * This is a lane-wise ternary operation which performs
2583      * a bitwise blending operation {@code (a&~c)|(b&c)}
2584      * to each lane.
2585      *
2586      * This method is also equivalent to the expression
2587      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2588      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2589      *    BITWISE_BLEND}{@code , bits, mask)}.
2590      *
2591      * @param bits input bits to blend into the current vector
2592      * @param mask a bitwise mask to enable blending of the input bits
2593      * @return the bitwise blend of the given bits into the current vector,
2594      *         under control of the bitwise mask
2595      * @see #bitwiseBlend(short,short)
2596      * @see #bitwiseBlend(short,Vector)
2597      * @see #bitwiseBlend(Vector,short)
2598      * @see VectorOperators#BITWISE_BLEND
2599      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2600      */
2601     @ForceInline
2602     public final
2603     ShortVector bitwiseBlend(Vector<Short> bits, Vector<Short> mask) {
2604         return lanewise(BITWISE_BLEND, bits, mask);
2605     }
2606 
2607     /**
2608      * Blends together the bits of a vector and a scalar under
2609      * the control of another scalar, which supplies mask bits.
2610      *
2611      * This is a lane-wise ternary operation which performs
2612      * a bitwise blending operation {@code (a&~c)|(b&c)}
2613      * to each lane.
2614      *
2615      * This method is also equivalent to the expression
2616      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2617      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2618      *    BITWISE_BLEND}{@code , bits, mask)}.
2619      *
2620      * @param bits input bits to blend into the current vector
2621      * @param mask a bitwise mask to enable blending of the input bits
2622      * @return the bitwise blend of the given bits into the current vector,
2623      *         under control of the bitwise mask
2624      * @see #bitwiseBlend(Vector,Vector)
2625      * @see VectorOperators#BITWISE_BLEND
2626      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
2627      */
2628     @ForceInline
2629     public final
2630     ShortVector bitwiseBlend(short bits, short mask) {
2631         return lanewise(BITWISE_BLEND, bits, mask);
2632     }
2633 
2634     /**
2635      * Blends together the bits of a vector and a scalar under
2636      * the control of another vector, which supplies mask bits.
2637      *
2638      * This is a lane-wise ternary operation which performs
2639      * a bitwise blending operation {@code (a&~c)|(b&c)}
2640      * to each lane.
2641      *
2642      * This method is also equivalent to the expression
2643      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2644      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2645      *    BITWISE_BLEND}{@code , bits, mask)}.
2646      *
2647      * @param bits input bits to blend into the current vector
2648      * @param mask a bitwise mask to enable blending of the input bits
2649      * @return the bitwise blend of the given bits into the current vector,
2650      *         under control of the bitwise mask
2651      * @see #bitwiseBlend(Vector,Vector)
2652      * @see VectorOperators#BITWISE_BLEND
2653      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
2654      */
2655     @ForceInline
2656     public final
2657     ShortVector bitwiseBlend(short bits, Vector<Short> mask) {
2658         return lanewise(BITWISE_BLEND, bits, mask);
2659     }
2660 
2661     /**
2662      * Blends together the bits of two vectors under
2663      * the control of a scalar, which supplies mask bits.
2664      *
2665      * This is a lane-wise ternary operation which performs
2666      * a bitwise blending operation {@code (a&~c)|(b&c)}
2667      * to each lane.
2668      *
2669      * This method is also equivalent to the expression
2670      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2671      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2672      *    BITWISE_BLEND}{@code , bits, mask)}.
2673      *
2674      * @param bits input bits to blend into the current vector
2675      * @param mask a bitwise mask to enable blending of the input bits
2676      * @return the bitwise blend of the given bits into the current vector,
2677      *         under control of the bitwise mask
2678      * @see #bitwiseBlend(Vector,Vector)
2679      * @see VectorOperators#BITWISE_BLEND
2680      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
2681      */
2682     @ForceInline
2683     public final
2684     ShortVector bitwiseBlend(Vector<Short> bits, short mask) {
2685         return lanewise(BITWISE_BLEND, bits, mask);
2686     }
2687 
2688 
2689     // Type specific horizontal reductions
2690 
2691     /**
2692      * Returns a value accumulated from all the lanes of this vector.
2693      *
2694      * This is an associative cross-lane reduction operation which
2695      * applies the specified operation to all the lane elements.
2696      * <p>
2697      * A few reduction operations do not support arbitrary reordering
2698      * of their operands, yet are included here because of their
2699      * usefulness.
2700      * <ul>
2701      * <li>
2702      * In the case of {@code FIRST_NONZERO}, the reduction returns
2703      * the value from the lowest-numbered non-zero lane.
2704      * <li>
2705      * All other reduction operations are fully commutative and
2706      * associative.  The implementation can choose any order of
2707      * processing, yet it will always produce the same result.
2708      * </ul>
2709      *
2710      * @param op the operation used to combine lane values
2711      * @return the accumulated result
2712      * @throws UnsupportedOperationException if this vector does
2713      *         not support the requested operation
2714      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2715      * @see #add(Vector)
2716      * @see #mul(Vector)
2717      * @see #min(Vector)
2718      * @see #max(Vector)
2719      * @see #and(Vector)
2720      * @see #or(Vector)
2721      * @see VectorOperators#XOR
2722      * @see VectorOperators#FIRST_NONZERO
2723      */
2724     public abstract short reduceLanes(VectorOperators.Associative op);
2725 
2726     /**
2727      * Returns a value accumulated from selected lanes of this vector,
2728      * controlled by a mask.
2729      *
2730      * This is an associative cross-lane reduction operation which
2731      * applies the specified operation to the selected lane elements.
2732      * <p>
2733      * If no elements are selected, an operation-specific identity
2734      * value is returned.
2735      * <ul>
2736      * <li>
2737      * If the operation is
2738      *  {@code ADD}, {@code XOR}, {@code OR},
2739      * or {@code FIRST_NONZERO},
2740      * then the identity value is zero, the default {@code short} value.
2741      * <li>
2742      * If the operation is {@code MUL},
2743      * then the identity value is one.
2744      * <li>
2745      * If the operation is {@code AND},
2746      * then the identity value is minus one (all bits set).
2747      * <li>
2748      * If the operation is {@code MAX},
2749      * then the identity value is {@code Short.MIN_VALUE}.
2750      * <li>
2751      * If the operation is {@code MIN},
2752      * then the identity value is {@code Short.MAX_VALUE}.
2753      * </ul>
2754      * <p>
2755      * A few reduction operations do not support arbitrary reordering
2756      * of their operands, yet are included here because of their
2757      * usefulness.
2758      * <ul>
2759      * <li>
2760      * In the case of {@code FIRST_NONZERO}, the reduction returns
2761      * the value from the lowest-numbered non-zero lane.
2762      * <li>
2763      * All other reduction operations are fully commutative and
2764      * associative.  The implementation can choose any order of
2765      * processing, yet it will always produce the same result.
2766      * </ul>
2767      *
2768      * @param op the operation used to combine lane values
2769      * @param m the mask controlling lane selection
2770      * @return the reduced result accumulated from the selected lane values
2771      * @throws UnsupportedOperationException if this vector does
2772      *         not support the requested operation
2773      * @see #reduceLanes(VectorOperators.Associative)
2774      */
2775     public abstract short reduceLanes(VectorOperators.Associative op,
2776                                        VectorMask<Short> m);
2777 
2778     /*package-private*/
2779     @ForceInline
2780     final
2781     short reduceLanesTemplate(VectorOperators.Associative op,
2782                                Class<? extends VectorMask<Short>> maskClass,
2783                                VectorMask<Short> m) {
2784         m.check(maskClass, this);
2785         if (op == FIRST_NONZERO) {
2786             // FIXME:  The JIT should handle this.
2787             ShortVector v = broadcast((short) 0).blend(this, m);
2788             return v.reduceLanesTemplate(op);
2789         }
2790         int opc = opCode(op);
2791         return fromBits(VectorSupport.reductionCoerced(
2792             opc, getClass(), maskClass, short.class, length(),
2793             this, m,
2794             REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations)));
2795     }
2796 
2797     /*package-private*/
2798     @ForceInline
2799     final
2800     short reduceLanesTemplate(VectorOperators.Associative op) {
2801         if (op == FIRST_NONZERO) {
2802             // FIXME:  The JIT should handle this.
2803             VectorMask<Short> thisNZ
2804                 = this.viewAsIntegralLanes().compare(NE, (short) 0);
2805             int ft = thisNZ.firstTrue();
2806             return ft < length() ? this.lane(ft) : (short) 0;
2807         }
2808         int opc = opCode(op);
2809         return fromBits(VectorSupport.reductionCoerced(
2810             opc, getClass(), null, short.class, length(),
2811             this, null,
2812             REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations)));
2813     }
2814 
2815     private static final
2816     ImplCache<Associative, ReductionOperation<ShortVector, VectorMask<Short>>>
2817         REDUCE_IMPL = new ImplCache<>(Associative.class, ShortVector.class);
2818 
2819     private static ReductionOperation<ShortVector, VectorMask<Short>> reductionOperations(int opc_) {
2820         switch (opc_) {
2821             case VECTOR_OP_ADD: return (v, m) ->
2822                     toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a + b)));
2823             case VECTOR_OP_MUL: return (v, m) ->
2824                     toBits(v.rOp((short)1, m, (i, a, b) -> (short)(a * b)));
2825             case VECTOR_OP_MIN: return (v, m) ->
2826                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (short) Math.min(a, b)));
2827             case VECTOR_OP_MAX: return (v, m) ->
2828                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (short) Math.max(a, b)));
2829             case VECTOR_OP_AND: return (v, m) ->
2830                     toBits(v.rOp((short)-1, m, (i, a, b) -> (short)(a & b)));
2831             case VECTOR_OP_OR: return (v, m) ->
2832                     toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a | b)));
2833             case VECTOR_OP_XOR: return (v, m) ->
2834                     toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a ^ b)));
2835             default: return null;
2836         }
2837     }
2838 
2839     private static final short MIN_OR_INF = Short.MIN_VALUE;
2840     private static final short MAX_OR_INF = Short.MAX_VALUE;
2841 
2842     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2843     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2844                                                      VectorMask<Short> m);
2845 
2846     // Type specific accessors
2847 
2848     /**
2849      * Gets the lane element at lane index {@code i}
2850      *
2851      * @param i the lane index
2852      * @return the lane element at lane index {@code i}
2853      * @throws IllegalArgumentException if the index is is out of range
2854      * ({@code < 0 || >= length()})
2855      */
2856     public abstract short lane(int i);
2857 
2858     /**
2859      * Replaces the lane element of this vector at lane index {@code i} with
2860      * value {@code e}.
2861      *
2862      * This is a cross-lane operation and behaves as if it returns the result
2863      * of blending this vector with an input vector that is the result of
2864      * broadcasting {@code e} and a mask that has only one lane set at lane
2865      * index {@code i}.
2866      *
2867      * @param i the lane index of the lane element to be replaced
2868      * @param e the value to be placed
2869      * @return the result of replacing the lane element of this vector at lane
2870      * index {@code i} with value {@code e}.
2871      * @throws IllegalArgumentException if the index is is out of range
2872      * ({@code < 0 || >= length()})
2873      */
2874     public abstract ShortVector withLane(int i, short e);
2875 
2876     // Memory load operations
2877 
2878     /**
2879      * Returns an array of type {@code short[]}
2880      * containing all the lane values.
2881      * The array length is the same as the vector length.
2882      * The array elements are stored in lane order.
2883      * <p>
2884      * This method behaves as if it stores
2885      * this vector into an allocated array
2886      * (using {@link #intoArray(short[], int) intoArray})
2887      * and returns the array as follows:
2888      * <pre>{@code
2889      *   short[] a = new short[this.length()];
2890      *   this.intoArray(a, 0);
2891      *   return a;
2892      * }</pre>
2893      *
2894      * @return an array containing the lane values of this vector
2895      */
2896     @ForceInline
2897     @Override
2898     public final short[] toArray() {
2899         short[] a = new short[vspecies().laneCount()];
2900         intoArray(a, 0);
2901         return a;
2902     }
2903 
2904     /** {@inheritDoc} <!--workaround-->
2905      * @implNote
2906      * When this method is used on used on vectors
2907      * of type {@code ShortVector},
2908      * there will be no loss of precision or range,
2909      * and so no {@code UnsupportedOperationException} will
2910      * be thrown.
2911      */
2912     @ForceInline
2913     @Override
2914     public final int[] toIntArray() {
2915         short[] a = toArray();
2916         int[] res = new int[a.length];
2917         for (int i = 0; i < a.length; i++) {
2918             short e = a[i];
2919             res[i] = (int) ShortSpecies.toIntegralChecked(e, true);
2920         }
2921         return res;
2922     }
2923 
2924     /** {@inheritDoc} <!--workaround-->
2925      * @implNote
2926      * When this method is used on used on vectors
2927      * of type {@code ShortVector},
2928      * there will be no loss of precision or range,
2929      * and so no {@code UnsupportedOperationException} will
2930      * be thrown.
2931      */
2932     @ForceInline
2933     @Override
2934     public final long[] toLongArray() {
2935         short[] a = toArray();
2936         long[] res = new long[a.length];
2937         for (int i = 0; i < a.length; i++) {
2938             short e = a[i];
2939             res[i] = ShortSpecies.toIntegralChecked(e, false);
2940         }
2941         return res;
2942     }
2943 
2944     /** {@inheritDoc} <!--workaround-->
2945      * @implNote
2946      * When this method is used on used on vectors
2947      * of type {@code ShortVector},
2948      * there will be no loss of precision.
2949      */
2950     @ForceInline
2951     @Override
2952     public final double[] toDoubleArray() {
2953         short[] a = toArray();
2954         double[] res = new double[a.length];
2955         for (int i = 0; i < a.length; i++) {
2956             res[i] = (double) a[i];
2957         }
2958         return res;
2959     }
2960 
2961     /**
2962      * Loads a vector from an array of type {@code short[]}
2963      * starting at an offset.
2964      * For each vector lane, where {@code N} is the vector lane index, the
2965      * array element at index {@code offset + N} is placed into the
2966      * resulting vector at lane index {@code N}.
2967      *
2968      * @param species species of desired vector
2969      * @param a the array
2970      * @param offset the offset into the array
2971      * @return the vector loaded from an array
2972      * @throws IndexOutOfBoundsException
2973      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2974      *         for any lane {@code N} in the vector
2975      */
2976     @ForceInline
2977     public static
2978     ShortVector fromArray(VectorSpecies<Short> species,
2979                                    short[] a, int offset) {
2980         offset = checkFromIndexSize(offset, species.length(), a.length);
2981         ShortSpecies vsp = (ShortSpecies) species;
2982         return vsp.dummyVector().fromArray0(a, offset);
2983     }
2984 
2985     /**
2986      * Loads a vector from an array of type {@code short[]}
2987      * starting at an offset and using a mask.
2988      * Lanes where the mask is unset are filled with the default
2989      * value of {@code short} (zero).
2990      * For each vector lane, where {@code N} is the vector lane index,
2991      * if the mask lane at index {@code N} is set then the array element at
2992      * index {@code offset + N} is placed into the resulting vector at lane index
2993      * {@code N}, otherwise the default element value is placed into the
2994      * resulting vector at lane index {@code N}.
2995      *
2996      * @param species species of desired vector
2997      * @param a the array
2998      * @param offset the offset into the array
2999      * @param m the mask controlling lane selection
3000      * @return the vector loaded from an array
3001      * @throws IndexOutOfBoundsException
3002      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3003      *         for any lane {@code N} in the vector
3004      *         where the mask is set
3005      */
3006     @ForceInline
3007     public static
3008     ShortVector fromArray(VectorSpecies<Short> species,
3009                                    short[] a, int offset,
3010                                    VectorMask<Short> m) {
3011         ShortSpecies vsp = (ShortSpecies) species;
3012         if (offset >= 0 && offset <= (a.length - species.length())) {
3013             return vsp.dummyVector().fromArray0(a, offset, m);
3014         }
3015 
3016         // FIXME: optimize
3017         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3018         return vsp.vOp(m, i -> a[offset + i]);
3019     }
3020 
3021     /**
3022      * Gathers a new vector composed of elements from an array of type
3023      * {@code short[]},
3024      * using indexes obtained by adding a fixed {@code offset} to a
3025      * series of secondary offsets from an <em>index map</em>.
3026      * The index map is a contiguous sequence of {@code VLENGTH}
3027      * elements in a second array of {@code int}s, starting at a given
3028      * {@code mapOffset}.
3029      * <p>
3030      * For each vector lane, where {@code N} is the vector lane index,
3031      * the lane is loaded from the array
3032      * element {@code a[f(N)]}, where {@code f(N)} is the
3033      * index mapping expression
3034      * {@code offset + indexMap[mapOffset + N]]}.
3035      *
3036      * @param species species of desired vector
3037      * @param a the array
3038      * @param offset the offset into the array, may be negative if relative
3039      * indexes in the index map compensate to produce a value within the
3040      * array bounds
3041      * @param indexMap the index map
3042      * @param mapOffset the offset into the index map
3043      * @return the vector loaded from the indexed elements of the array
3044      * @throws IndexOutOfBoundsException
3045      *         if {@code mapOffset+N < 0}
3046      *         or if {@code mapOffset+N >= indexMap.length},
3047      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3048      *         is an invalid index into {@code a},
3049      *         for any lane {@code N} in the vector
3050      * @see ShortVector#toIntArray()
3051      */
3052     @ForceInline
3053     public static
3054     ShortVector fromArray(VectorSpecies<Short> species,
3055                                    short[] a, int offset,
3056                                    int[] indexMap, int mapOffset) {
3057         ShortSpecies vsp = (ShortSpecies) species;
3058         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
3059     }
3060 
3061     /**
3062      * Gathers a new vector composed of elements from an array of type
3063      * {@code short[]},
3064      * under the control of a mask, and
3065      * using indexes obtained by adding a fixed {@code offset} to a
3066      * series of secondary offsets from an <em>index map</em>.
3067      * The index map is a contiguous sequence of {@code VLENGTH}
3068      * elements in a second array of {@code int}s, starting at a given
3069      * {@code mapOffset}.
3070      * <p>
3071      * For each vector lane, where {@code N} is the vector lane index,
3072      * if the lane is set in the mask,
3073      * the lane is loaded from the array
3074      * element {@code a[f(N)]}, where {@code f(N)} is the
3075      * index mapping expression
3076      * {@code offset + indexMap[mapOffset + N]]}.
3077      * Unset lanes in the resulting vector are set to zero.
3078      *
3079      * @param species species of desired vector
3080      * @param a the array
3081      * @param offset the offset into the array, may be negative if relative
3082      * indexes in the index map compensate to produce a value within the
3083      * array bounds
3084      * @param indexMap the index map
3085      * @param mapOffset the offset into the index map
3086      * @param m the mask controlling lane selection
3087      * @return the vector loaded from the indexed elements of the array
3088      * @throws IndexOutOfBoundsException
3089      *         if {@code mapOffset+N < 0}
3090      *         or if {@code mapOffset+N >= indexMap.length},
3091      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3092      *         is an invalid index into {@code a},
3093      *         for any lane {@code N} in the vector
3094      *         where the mask is set
3095      * @see ShortVector#toIntArray()
3096      */
3097     @ForceInline
3098     public static
3099     ShortVector fromArray(VectorSpecies<Short> species,
3100                                    short[] a, int offset,
3101                                    int[] indexMap, int mapOffset,
3102                                    VectorMask<Short> m) {
3103         ShortSpecies vsp = (ShortSpecies) species;
3104         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
3105     }
3106 
3107     /**
3108      * Loads a vector from an array of type {@code char[]}
3109      * starting at an offset.
3110      * For each vector lane, where {@code N} is the vector lane index, the
3111      * array element at index {@code offset + N}
3112      * is first cast to a {@code short} value and then
3113      * placed into the resulting vector at lane index {@code N}.
3114      *
3115      * @param species species of desired vector
3116      * @param a the array
3117      * @param offset the offset into the array
3118      * @return the vector loaded from an array
3119      * @throws IndexOutOfBoundsException
3120      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3121      *         for any lane {@code N} in the vector
3122      */
3123     @ForceInline
3124     public static
3125     ShortVector fromCharArray(VectorSpecies<Short> species,
3126                                        char[] a, int offset) {
3127         offset = checkFromIndexSize(offset, species.length(), a.length);
3128         ShortSpecies vsp = (ShortSpecies) species;
3129         return vsp.dummyVector().fromCharArray0(a, offset);
3130     }
3131 
3132     /**
3133      * Loads a vector from an array of type {@code char[]}
3134      * starting at an offset and using a mask.
3135      * Lanes where the mask is unset are filled with the default
3136      * value of {@code short} (zero).
3137      * For each vector lane, where {@code N} is the vector lane index,
3138      * if the mask lane at index {@code N} is set then the array element at
3139      * index {@code offset + N}
3140      * is first cast to a {@code short} value and then
3141      * placed into the resulting vector at lane index
3142      * {@code N}, otherwise the default element value is placed into the
3143      * resulting vector at lane index {@code N}.
3144      *
3145      * @param species species of desired vector
3146      * @param a the array
3147      * @param offset the offset into the array
3148      * @param m the mask controlling lane selection
3149      * @return the vector loaded from an array
3150      * @throws IndexOutOfBoundsException
3151      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3152      *         for any lane {@code N} in the vector
3153      *         where the mask is set
3154      */
3155     @ForceInline
3156     public static
3157     ShortVector fromCharArray(VectorSpecies<Short> species,
3158                                        char[] a, int offset,
3159                                        VectorMask<Short> m) {
3160         ShortSpecies vsp = (ShortSpecies) species;
3161         if (offset >= 0 && offset <= (a.length - species.length())) {
3162             return vsp.dummyVector().fromCharArray0(a, offset, m);
3163         }
3164 
3165         // FIXME: optimize
3166         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3167         return vsp.vOp(m, i -> (short) a[offset + i]);
3168     }
3169 
3170     /**
3171      * Gathers a new vector composed of elements from an array of type
3172      * {@code char[]},
3173      * using indexes obtained by adding a fixed {@code offset} to a
3174      * series of secondary offsets from an <em>index map</em>.
3175      * The index map is a contiguous sequence of {@code VLENGTH}
3176      * elements in a second array of {@code int}s, starting at a given
3177      * {@code mapOffset}.
3178      * <p>
3179      * For each vector lane, where {@code N} is the vector lane index,
3180      * the lane is loaded from the expression
3181      * {@code (short) a[f(N)]}, where {@code f(N)} is the
3182      * index mapping expression
3183      * {@code offset + indexMap[mapOffset + N]]}.
3184      *
3185      * @param species species of desired vector
3186      * @param a the array
3187      * @param offset the offset into the array, may be negative if relative
3188      * indexes in the index map compensate to produce a value within the
3189      * array bounds
3190      * @param indexMap the index map
3191      * @param mapOffset the offset into the index map
3192      * @return the vector loaded from the indexed elements of the array
3193      * @throws IndexOutOfBoundsException
3194      *         if {@code mapOffset+N < 0}
3195      *         or if {@code mapOffset+N >= indexMap.length},
3196      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3197      *         is an invalid index into {@code a},
3198      *         for any lane {@code N} in the vector
3199      * @see ShortVector#toIntArray()
3200      */
3201     @ForceInline
3202     public static
3203     ShortVector fromCharArray(VectorSpecies<Short> species,
3204                                        char[] a, int offset,
3205                                        int[] indexMap, int mapOffset) {
3206         // FIXME: optimize
3207         ShortSpecies vsp = (ShortSpecies) species;
3208         return vsp.vOp(n -> (short) a[offset + indexMap[mapOffset + n]]);
3209     }
3210 
3211     /**
3212      * Gathers a new vector composed of elements from an array of type
3213      * {@code char[]},
3214      * under the control of a mask, and
3215      * using indexes obtained by adding a fixed {@code offset} to a
3216      * series of secondary offsets from an <em>index map</em>.
3217      * The index map is a contiguous sequence of {@code VLENGTH}
3218      * elements in a second array of {@code int}s, starting at a given
3219      * {@code mapOffset}.
3220      * <p>
3221      * For each vector lane, where {@code N} is the vector lane index,
3222      * if the lane is set in the mask,
3223      * the lane is loaded from the expression
3224      * {@code (short) a[f(N)]}, where {@code f(N)} is the
3225      * index mapping expression
3226      * {@code offset + indexMap[mapOffset + N]]}.
3227      * Unset lanes in the resulting vector are set to zero.
3228      *
3229      * @param species species of desired vector
3230      * @param a the array
3231      * @param offset the offset into the array, may be negative if relative
3232      * indexes in the index map compensate to produce a value within the
3233      * array bounds
3234      * @param indexMap the index map
3235      * @param mapOffset the offset into the index map
3236      * @param m the mask controlling lane selection
3237      * @return the vector loaded from the indexed elements of the array
3238      * @throws IndexOutOfBoundsException
3239      *         if {@code mapOffset+N < 0}
3240      *         or if {@code mapOffset+N >= indexMap.length},
3241      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3242      *         is an invalid index into {@code a},
3243      *         for any lane {@code N} in the vector
3244      *         where the mask is set
3245      * @see ShortVector#toIntArray()
3246      */
3247     @ForceInline
3248     public static
3249     ShortVector fromCharArray(VectorSpecies<Short> species,
3250                                        char[] a, int offset,
3251                                        int[] indexMap, int mapOffset,
3252                                        VectorMask<Short> m) {
3253         // FIXME: optimize
3254         ShortSpecies vsp = (ShortSpecies) species;
3255         return vsp.vOp(m, n -> (short) a[offset + indexMap[mapOffset + n]]);
3256     }
3257 
3258 
3259     /**
3260      * Loads a vector from a {@linkplain MemorySegment memory segment}
3261      * starting at an offset into the memory segment.
3262      * Bytes are composed into primitive lane elements according
3263      * to the specified byte order.
3264      * The vector is arranged into lanes according to
3265      * <a href="Vector.html#lane-order">memory ordering</a>.
3266      * <p>
3267      * This method behaves as if it returns the result of calling
3268      * {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
3269      * fromMemorySegment()} as follows:
3270      * <pre>{@code
3271      * var m = species.maskAll(true);
3272      * return fromMemorySegment(species, ms, offset, bo, m);
3273      * }</pre>
3274      *
3275      * @param species species of desired vector
3276      * @param ms the memory segment
3277      * @param offset the offset into the memory segment
3278      * @param bo the intended byte order
3279      * @return a vector loaded from the memory segment
3280      * @throws IndexOutOfBoundsException
3281      *         if {@code offset+N*2 < 0}
3282      *         or {@code offset+N*2 >= ms.byteSize()}
3283      *         for any lane {@code N} in the vector
3284      * @throws IllegalArgumentException if the memory segment is a heap segment that is
3285      *         not backed by a {@code byte[]} array.
3286      * @throws IllegalStateException if the memory segment's session is not alive,
3287      *         or if access occurs from a thread other than the thread owning the session.
3288      * @since 19
3289      */
3290     @ForceInline
3291     public static
3292     ShortVector fromMemorySegment(VectorSpecies<Short> species,
3293                                            MemorySegment ms, long offset,
3294                                            ByteOrder bo) {
3295         offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
3296         ShortSpecies vsp = (ShortSpecies) species;
3297         return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
3298     }
3299 
3300     /**
3301      * Loads a vector from a {@linkplain MemorySegment memory segment}
3302      * starting at an offset into the memory segment
3303      * and using a mask.
3304      * Lanes where the mask is unset are filled with the default
3305      * value of {@code short} (zero).
3306      * Bytes are composed into primitive lane elements according
3307      * to the specified byte order.
3308      * The vector is arranged into lanes according to
3309      * <a href="Vector.html#lane-order">memory ordering</a>.
3310      * <p>
3311      * The following pseudocode illustrates the behavior:
3312      * <pre>{@code
3313      * var slice = ms.asSlice(offset);
3314      * short[] ar = new short[species.length()];
3315      * for (int n = 0; n < ar.length; n++) {
3316      *     if (m.laneIsSet(n)) {
3317      *         ar[n] = slice.getAtIndex(ValuaLayout.JAVA_SHORT.withBitAlignment(8), n);
3318      *     }
3319      * }
3320      * ShortVector r = ShortVector.fromArray(species, ar, 0);
3321      * }</pre>
3322      * @implNote
3323      * This operation is likely to be more efficient if
3324      * the specified byte order is the same as
3325      * {@linkplain ByteOrder#nativeOrder()
3326      * the platform native order},
3327      * since this method will not need to reorder
3328      * the bytes of lane values.
3329      *
3330      * @param species species of desired vector
3331      * @param ms the memory segment
3332      * @param offset the offset into the memory segment
3333      * @param bo the intended byte order
3334      * @param m the mask controlling lane selection
3335      * @return a vector loaded from the memory segment
3336      * @throws IndexOutOfBoundsException
3337      *         if {@code offset+N*2 < 0}
3338      *         or {@code offset+N*2 >= ms.byteSize()}
3339      *         for any lane {@code N} in the vector
3340      *         where the mask is set
3341      * @throws IllegalArgumentException if the memory segment is a heap segment that is
3342      *         not backed by a {@code byte[]} array.
3343      * @throws IllegalStateException if the memory segment's session is not alive,
3344      *         or if access occurs from a thread other than the thread owning the session.
3345      * @since 19
3346      */
3347     @ForceInline
3348     public static
3349     ShortVector fromMemorySegment(VectorSpecies<Short> species,
3350                                            MemorySegment ms, long offset,
3351                                            ByteOrder bo,
3352                                            VectorMask<Short> m) {
3353         ShortSpecies vsp = (ShortSpecies) species;
3354         if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
3355             return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
3356         }
3357 
3358         // FIXME: optimize
3359         checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize());
3360         return vsp.ldLongOp(ms, offset, m, ShortVector::memorySegmentGet);
3361     }
3362 
3363     // Memory store operations
3364 
3365     /**
3366      * Stores this vector into an array of type {@code short[]}
3367      * starting at an offset.
3368      * <p>
3369      * For each vector lane, where {@code N} is the vector lane index,
3370      * the lane element at index {@code N} is stored into the array
3371      * element {@code a[offset+N]}.
3372      *
3373      * @param a the array, of type {@code short[]}
3374      * @param offset the offset into the array
3375      * @throws IndexOutOfBoundsException
3376      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3377      *         for any lane {@code N} in the vector
3378      */
3379     @ForceInline
3380     public final
3381     void intoArray(short[] a, int offset) {
3382         offset = checkFromIndexSize(offset, length(), a.length);
3383         ShortSpecies vsp = vspecies();
3384         VectorSupport.store(
3385             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3386             a, arrayAddress(a, offset),
3387             this,
3388             a, offset,
3389             (arr, off, v)
3390             -> v.stOp(arr, (int) off,
3391                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3392     }
3393 
3394     /**
3395      * Stores this vector into an array of type {@code short[]}
3396      * starting at offset and using a mask.
3397      * <p>
3398      * For each vector lane, where {@code N} is the vector lane index,
3399      * the lane element at index {@code N} is stored into the array
3400      * element {@code a[offset+N]}.
3401      * If the mask lane at {@code N} is unset then the corresponding
3402      * array element {@code a[offset+N]} is left unchanged.
3403      * <p>
3404      * Array range checking is done for lanes where the mask is set.
3405      * Lanes where the mask is unset are not stored and do not need
3406      * to correspond to legitimate elements of {@code a}.
3407      * That is, unset lanes may correspond to array indexes less than
3408      * zero or beyond the end of the array.
3409      *
3410      * @param a the array, of type {@code short[]}
3411      * @param offset the offset into the array
3412      * @param m the mask controlling lane storage
3413      * @throws IndexOutOfBoundsException
3414      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3415      *         for any lane {@code N} in the vector
3416      *         where the mask is set
3417      */
3418     @ForceInline
3419     public final
3420     void intoArray(short[] a, int offset,
3421                    VectorMask<Short> m) {
3422         if (m.allTrue()) {
3423             intoArray(a, offset);
3424         } else {
3425             ShortSpecies vsp = vspecies();
3426             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3427             intoArray0(a, offset, m);
3428         }
3429     }
3430 
3431     /**
3432      * Scatters this vector into an array of type {@code short[]}
3433      * using indexes obtained by adding a fixed {@code offset} to a
3434      * series of secondary offsets from an <em>index map</em>.
3435      * The index map is a contiguous sequence of {@code VLENGTH}
3436      * elements in a second array of {@code int}s, starting at a given
3437      * {@code mapOffset}.
3438      * <p>
3439      * For each vector lane, where {@code N} is the vector lane index,
3440      * the lane element at index {@code N} is stored into the array
3441      * element {@code a[f(N)]}, where {@code f(N)} is the
3442      * index mapping expression
3443      * {@code offset + indexMap[mapOffset + N]]}.
3444      *
3445      * @param a the array
3446      * @param offset an offset to combine with the index map offsets
3447      * @param indexMap the index map
3448      * @param mapOffset the offset into the index map
3449      * @throws IndexOutOfBoundsException
3450      *         if {@code mapOffset+N < 0}
3451      *         or if {@code mapOffset+N >= indexMap.length},
3452      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3453      *         is an invalid index into {@code a},
3454      *         for any lane {@code N} in the vector
3455      * @see ShortVector#toIntArray()
3456      */
3457     @ForceInline
3458     public final
3459     void intoArray(short[] a, int offset,
3460                    int[] indexMap, int mapOffset) {
3461         stOp(a, offset,
3462              (arr, off, i, e) -> {
3463                  int j = indexMap[mapOffset + i];
3464                  arr[off + j] = e;
3465              });
3466     }
3467 
3468     /**
3469      * Scatters this vector into an array of type {@code short[]},
3470      * under the control of a mask, and
3471      * using indexes obtained by adding a fixed {@code offset} to a
3472      * series of secondary offsets from an <em>index map</em>.
3473      * The index map is a contiguous sequence of {@code VLENGTH}
3474      * elements in a second array of {@code int}s, starting at a given
3475      * {@code mapOffset}.
3476      * <p>
3477      * For each vector lane, where {@code N} is the vector lane index,
3478      * if the mask lane at index {@code N} is set then
3479      * the lane element at index {@code N} is stored into the array
3480      * element {@code a[f(N)]}, where {@code f(N)} is the
3481      * index mapping expression
3482      * {@code offset + indexMap[mapOffset + N]]}.
3483      *
3484      * @param a the array
3485      * @param offset an offset to combine with the index map offsets
3486      * @param indexMap the index map
3487      * @param mapOffset the offset into the index map
3488      * @param m the mask
3489      * @throws IndexOutOfBoundsException
3490      *         if {@code mapOffset+N < 0}
3491      *         or if {@code mapOffset+N >= indexMap.length},
3492      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3493      *         is an invalid index into {@code a},
3494      *         for any lane {@code N} in the vector
3495      *         where the mask is set
3496      * @see ShortVector#toIntArray()
3497      */
3498     @ForceInline
3499     public final
3500     void intoArray(short[] a, int offset,
3501                    int[] indexMap, int mapOffset,
3502                    VectorMask<Short> m) {
3503         stOp(a, offset, m,
3504              (arr, off, i, e) -> {
3505                  int j = indexMap[mapOffset + i];
3506                  arr[off + j] = e;
3507              });
3508     }
3509 
3510     /**
3511      * Stores this vector into an array of type {@code char[]}
3512      * starting at an offset.
3513      * <p>
3514      * For each vector lane, where {@code N} is the vector lane index,
3515      * the lane element at index {@code N}
3516      * is first cast to a {@code char} value and then
3517      * stored into the array element {@code a[offset+N]}.
3518      *
3519      * @param a the array, of type {@code char[]}
3520      * @param offset the offset into the array
3521      * @throws IndexOutOfBoundsException
3522      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3523      *         for any lane {@code N} in the vector
3524      */
3525     @ForceInline
3526     public final
3527     void intoCharArray(char[] a, int offset) {
3528         offset = checkFromIndexSize(offset, length(), a.length);
3529         ShortSpecies vsp = vspecies();
3530         VectorSupport.store(
3531             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3532             a, charArrayAddress(a, offset),
3533             this,
3534             a, offset,
3535             (arr, off, v)
3536             -> v.stOp(arr, (int) off,
3537                       (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
3538     }
3539 
3540     /**
3541      * Stores this vector into an array of type {@code char[]}
3542      * starting at offset and using a mask.
3543      * <p>
3544      * For each vector lane, where {@code N} is the vector lane index,
3545      * the lane element at index {@code N}
3546      * is first cast to a {@code char} value and then
3547      * stored into the array element {@code a[offset+N]}.
3548      * If the mask lane at {@code N} is unset then the corresponding
3549      * array element {@code a[offset+N]} is left unchanged.
3550      * <p>
3551      * Array range checking is done for lanes where the mask is set.
3552      * Lanes where the mask is unset are not stored and do not need
3553      * to correspond to legitimate elements of {@code a}.
3554      * That is, unset lanes may correspond to array indexes less than
3555      * zero or beyond the end of the array.
3556      *
3557      * @param a the array, of type {@code char[]}
3558      * @param offset the offset into the array
3559      * @param m the mask controlling lane storage
3560      * @throws IndexOutOfBoundsException
3561      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3562      *         for any lane {@code N} in the vector
3563      *         where the mask is set
3564      */
3565     @ForceInline
3566     public final
3567     void intoCharArray(char[] a, int offset,
3568                        VectorMask<Short> m) {
3569         if (m.allTrue()) {
3570             intoCharArray(a, offset);
3571         } else {
3572             ShortSpecies vsp = vspecies();
3573             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3574             intoCharArray0(a, offset, m);
3575         }
3576     }
3577 
3578     /**
3579      * Scatters this vector into an array of type {@code char[]}
3580      * using indexes obtained by adding a fixed {@code offset} to a
3581      * series of secondary offsets from an <em>index map</em>.
3582      * The index map is a contiguous sequence of {@code VLENGTH}
3583      * elements in a second array of {@code int}s, starting at a given
3584      * {@code mapOffset}.
3585      * <p>
3586      * For each vector lane, where {@code N} is the vector lane index,
3587      * the lane element at index {@code N}
3588      * is first cast to a {@code char} value and then
3589      * stored into the array
3590      * element {@code a[f(N)]}, where {@code f(N)} is the
3591      * index mapping expression
3592      * {@code offset + indexMap[mapOffset + N]]}.
3593      *
3594      * @param a the array
3595      * @param offset an offset to combine with the index map offsets
3596      * @param indexMap the index map
3597      * @param mapOffset the offset into the index map
3598      * @throws IndexOutOfBoundsException
3599      *         if {@code mapOffset+N < 0}
3600      *         or if {@code mapOffset+N >= indexMap.length},
3601      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3602      *         is an invalid index into {@code a},
3603      *         for any lane {@code N} in the vector
3604      * @see ShortVector#toIntArray()
3605      */
3606     @ForceInline
3607     public final
3608     void intoCharArray(char[] a, int offset,
3609                        int[] indexMap, int mapOffset) {
3610         // FIXME: optimize
3611         stOp(a, offset,
3612              (arr, off, i, e) -> {
3613                  int j = indexMap[mapOffset + i];
3614                  arr[off + j] = (char) e;
3615              });
3616     }
3617 
3618     /**
3619      * Scatters this vector into an array of type {@code char[]},
3620      * under the control of a mask, and
3621      * using indexes obtained by adding a fixed {@code offset} to a
3622      * series of secondary offsets from an <em>index map</em>.
3623      * The index map is a contiguous sequence of {@code VLENGTH}
3624      * elements in a second array of {@code int}s, starting at a given
3625      * {@code mapOffset}.
3626      * <p>
3627      * For each vector lane, where {@code N} is the vector lane index,
3628      * if the mask lane at index {@code N} is set then
3629      * the lane element at index {@code N}
3630      * is first cast to a {@code char} value and then
3631      * stored into the array
3632      * element {@code a[f(N)]}, where {@code f(N)} is the
3633      * index mapping expression
3634      * {@code offset + indexMap[mapOffset + N]]}.
3635      *
3636      * @param a the array
3637      * @param offset an offset to combine with the index map offsets
3638      * @param indexMap the index map
3639      * @param mapOffset the offset into the index map
3640      * @param m the mask
3641      * @throws IndexOutOfBoundsException
3642      *         if {@code mapOffset+N < 0}
3643      *         or if {@code mapOffset+N >= indexMap.length},
3644      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3645      *         is an invalid index into {@code a},
3646      *         for any lane {@code N} in the vector
3647      *         where the mask is set
3648      * @see ShortVector#toIntArray()
3649      */
3650     @ForceInline
3651     public final
3652     void intoCharArray(char[] a, int offset,
3653                        int[] indexMap, int mapOffset,
3654                        VectorMask<Short> m) {
3655         // FIXME: optimize
3656         stOp(a, offset, m,
3657              (arr, off, i, e) -> {
3658                  int j = indexMap[mapOffset + i];
3659                  arr[off + j] = (char) e;
3660              });
3661     }
3662 
3663 
3664     /**
3665      * {@inheritDoc} <!--workaround-->
3666      * @since 19
3667      */
3668     @Override
3669     @ForceInline
3670     public final
3671     void intoMemorySegment(MemorySegment ms, long offset,
3672                            ByteOrder bo) {
3673         if (ms.isReadOnly()) {
3674             throw new UnsupportedOperationException("Attempt to write a read-only segment");
3675         }
3676 
3677         offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
3678         maybeSwap(bo).intoMemorySegment0(ms, offset);
3679     }
3680 
3681     /**
3682      * {@inheritDoc} <!--workaround-->
3683      * @since 19
3684      */
3685     @Override
3686     @ForceInline
3687     public final
3688     void intoMemorySegment(MemorySegment ms, long offset,
3689                            ByteOrder bo,
3690                            VectorMask<Short> m) {
3691         if (m.allTrue()) {
3692             intoMemorySegment(ms, offset, bo);
3693         } else {
3694             if (ms.isReadOnly()) {
3695                 throw new UnsupportedOperationException("Attempt to write a read-only segment");
3696             }
3697             ShortSpecies vsp = vspecies();
3698             checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize());
3699             maybeSwap(bo).intoMemorySegment0(ms, offset, m);
3700         }
3701     }
3702 
3703     // ================================================
3704 
3705     // Low-level memory operations.
3706     //
3707     // Note that all of these operations *must* inline into a context
3708     // where the exact species of the involved vector is a
3709     // compile-time constant.  Otherwise, the intrinsic generation
3710     // will fail and performance will suffer.
3711     //
3712     // In many cases this is achieved by re-deriving a version of the
3713     // method in each concrete subclass (per species).  The re-derived
3714     // method simply calls one of these generic methods, with exact
3715     // parameters for the controlling metadata, which is either a
3716     // typed vector or constant species instance.
3717 
3718     // Unchecked loading operations in native byte order.
3719     // Caller is responsible for applying index checks, masking, and
3720     // byte swapping.
3721 
3722     /*package-private*/
3723     abstract
3724     ShortVector fromArray0(short[] a, int offset);
3725     @ForceInline
3726     final
3727     ShortVector fromArray0Template(short[] a, int offset) {
3728         ShortSpecies vsp = vspecies();
3729         return VectorSupport.load(
3730             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3731             a, arrayAddress(a, offset),
3732             a, offset, vsp,
3733             (arr, off, s) -> s.ldOp(arr, (int) off,
3734                                     (arr_, off_, i) -> arr_[off_ + i]));
3735     }
3736 
3737     /*package-private*/
3738     abstract
3739     ShortVector fromArray0(short[] a, int offset, VectorMask<Short> m);
3740     @ForceInline
3741     final
3742     <M extends VectorMask<Short>>
3743     ShortVector fromArray0Template(Class<M> maskClass, short[] a, int offset, M m) {
3744         m.check(species());
3745         ShortSpecies vsp = vspecies();
3746         return VectorSupport.loadMasked(
3747             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3748             a, arrayAddress(a, offset), m,
3749             a, offset, vsp,
3750             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
3751                                         (arr_, off_, i) -> arr_[off_ + i]));
3752     }
3753 
3754 
3755     /*package-private*/
3756     abstract
3757     ShortVector fromCharArray0(char[] a, int offset);
3758     @ForceInline
3759     final
3760     ShortVector fromCharArray0Template(char[] a, int offset) {
3761         ShortSpecies vsp = vspecies();
3762         return VectorSupport.load(
3763             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3764             a, charArrayAddress(a, offset),
3765             a, offset, vsp,
3766             (arr, off, s) -> s.ldOp(arr, (int) off,
3767                                     (arr_, off_, i) -> (short) arr_[off_ + i]));
3768     }
3769 
3770     /*package-private*/
3771     abstract
3772     ShortVector fromCharArray0(char[] a, int offset, VectorMask<Short> m);
3773     @ForceInline
3774     final
3775     <M extends VectorMask<Short>>
3776     ShortVector fromCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
3777         m.check(species());
3778         ShortSpecies vsp = vspecies();
3779         return VectorSupport.loadMasked(
3780                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3781                 a, charArrayAddress(a, offset), m,
3782                 a, offset, vsp,
3783                 (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
3784                                             (arr_, off_, i) -> (short) arr_[off_ + i]));
3785     }
3786 
3787 
3788     abstract
3789     ShortVector fromMemorySegment0(MemorySegment bb, long offset);
3790     @ForceInline
3791     final
3792     ShortVector fromMemorySegment0Template(MemorySegment ms, long offset) {
3793         ShortSpecies vsp = vspecies();
3794         return ScopedMemoryAccess.loadFromMemorySegment(
3795                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3796                 (MemorySegmentProxy) ms, offset, vsp,
3797                 (msp, off, s) -> {
3798                     return s.ldLongOp((MemorySegment) msp, off, ShortVector::memorySegmentGet);
3799                 });
3800     }
3801 
3802     abstract
3803     ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m);
3804     @ForceInline
3805     final
3806     <M extends VectorMask<Short>>
3807     ShortVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
3808         ShortSpecies vsp = vspecies();
3809         m.check(vsp);
3810         return ScopedMemoryAccess.loadFromMemorySegmentMasked(
3811                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3812                 (MemorySegmentProxy) ms, offset, m, vsp,
3813                 (msp, off, s, vm) -> {
3814                     return s.ldLongOp((MemorySegment) msp, off, vm, ShortVector::memorySegmentGet);
3815                 });
3816     }
3817 
3818     // Unchecked storing operations in native byte order.
3819     // Caller is responsible for applying index checks, masking, and
3820     // byte swapping.
3821 
3822     abstract
3823     void intoArray0(short[] a, int offset);
3824     @ForceInline
3825     final
3826     void intoArray0Template(short[] a, int offset) {
3827         ShortSpecies vsp = vspecies();
3828         VectorSupport.store(
3829             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3830             a, arrayAddress(a, offset),
3831             this, a, offset,
3832             (arr, off, v)
3833             -> v.stOp(arr, (int) off,
3834                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3835     }
3836 
3837     abstract
3838     void intoArray0(short[] a, int offset, VectorMask<Short> m);
3839     @ForceInline
3840     final
3841     <M extends VectorMask<Short>>
3842     void intoArray0Template(Class<M> maskClass, short[] a, int offset, M m) {
3843         m.check(species());
3844         ShortSpecies vsp = vspecies();
3845         VectorSupport.storeMasked(
3846             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3847             a, arrayAddress(a, offset),
3848             this, m, a, offset,
3849             (arr, off, v, vm)
3850             -> v.stOp(arr, (int) off, vm,
3851                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3852     }
3853 
3854 
3855 
3856     @ForceInline
3857     final
3858     void intoMemorySegment0(MemorySegment ms, long offset) {
3859         ShortSpecies vsp = vspecies();
3860         ScopedMemoryAccess.storeIntoMemorySegment(
3861                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3862                 this,
3863                 (MemorySegmentProxy) ms, offset,
3864                 (msp, off, v) -> {
3865                     v.stLongOp((MemorySegment) msp, off, ShortVector::memorySegmentSet);
3866                 });
3867     }
3868 
3869     abstract
3870     void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Short> m);
3871     @ForceInline
3872     final
3873     <M extends VectorMask<Short>>
3874     void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
3875         ShortSpecies vsp = vspecies();
3876         m.check(vsp);
3877         ScopedMemoryAccess.storeIntoMemorySegmentMasked(
3878                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3879                 this, m,
3880                 (MemorySegmentProxy) ms, offset,
3881                 (msp, off, v, vm) -> {
3882                     v.stLongOp((MemorySegment) msp, off, vm, ShortVector::memorySegmentSet);
3883                 });
3884     }
3885 
3886     /*package-private*/
3887     abstract
3888     void intoCharArray0(char[] a, int offset, VectorMask<Short> m);
3889     @ForceInline
3890     final
3891     <M extends VectorMask<Short>>
3892     void intoCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
3893         m.check(species());
3894         ShortSpecies vsp = vspecies();
3895         VectorSupport.storeMasked(
3896             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3897             a, charArrayAddress(a, offset),
3898             this, m, a, offset,
3899             (arr, off, v, vm)
3900             -> v.stOp(arr, (int) off, vm,
3901                       (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
3902     }
3903 
3904     // End of low-level memory operations.
3905 
3906     private static
3907     void checkMaskFromIndexSize(int offset,
3908                                 ShortSpecies vsp,
3909                                 VectorMask<Short> m,
3910                                 int scale,
3911                                 int limit) {
3912         ((AbstractMask<Short>)m)
3913             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3914     }
3915 
3916     private static
3917     void checkMaskFromIndexSize(long offset,
3918                                 ShortSpecies vsp,
3919                                 VectorMask<Short> m,
3920                                 int scale,
3921                                 long limit) {
3922         ((AbstractMask<Short>)m)
3923             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3924     }
3925 
3926     @ForceInline
3927     private void conditionalStoreNYI(int offset,
3928                                      ShortSpecies vsp,
3929                                      VectorMask<Short> m,
3930                                      int scale,
3931                                      int limit) {
3932         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3933             String msg =
3934                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3935                               offset, limit, m, vsp);
3936             throw new AssertionError(msg);
3937         }
3938     }
3939 
3940     /*package-private*/
3941     @Override
3942     @ForceInline
3943     final
3944     ShortVector maybeSwap(ByteOrder bo) {
3945         if (bo != NATIVE_ENDIAN) {
3946             return this.reinterpretAsBytes()
3947                 .rearrange(swapBytesShuffle())
3948                 .reinterpretAsShorts();
3949         }
3950         return this;
3951     }
3952 
3953     static final int ARRAY_SHIFT =
3954         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_SHORT_INDEX_SCALE);
3955     static final long ARRAY_BASE =
3956         Unsafe.ARRAY_SHORT_BASE_OFFSET;
3957 
3958     @ForceInline
3959     static long arrayAddress(short[] a, int index) {
3960         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3961     }
3962 
3963     static final int ARRAY_CHAR_SHIFT =
3964             31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_CHAR_INDEX_SCALE);
3965     static final long ARRAY_CHAR_BASE =
3966             Unsafe.ARRAY_CHAR_BASE_OFFSET;
3967 
3968     @ForceInline
3969     static long charArrayAddress(char[] a, int index) {
3970         return ARRAY_CHAR_BASE + (((long)index) << ARRAY_CHAR_SHIFT);
3971     }
3972 
3973 
3974     @ForceInline
3975     static long byteArrayAddress(byte[] a, int index) {
3976         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3977     }
3978 
3979     // ================================================
3980 
3981     /// Reinterpreting view methods:
3982     //   lanewise reinterpret: viewAsXVector()
3983     //   keep shape, redraw lanes: reinterpretAsEs()
3984 
3985     /**
3986      * {@inheritDoc} <!--workaround-->
3987      */
3988     @ForceInline
3989     @Override
3990     public final ByteVector reinterpretAsBytes() {
3991          // Going to ByteVector, pay close attention to byte order.
3992          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
3993          return asByteVectorRaw();
3994          //return asByteVectorRaw().rearrange(swapBytesShuffle());
3995     }
3996 
3997     /**
3998      * {@inheritDoc} <!--workaround-->
3999      */
4000     @ForceInline
4001     @Override
4002     public final ShortVector viewAsIntegralLanes() {
4003         return this;
4004     }
4005 
4006     /**
4007      * {@inheritDoc} <!--workaround-->
4008      *
4009      * @implNote This method always throws
4010      * {@code UnsupportedOperationException}, because there is no floating
4011      * point type of the same size as {@code short}.  The return type
4012      * of this method is arbitrarily designated as
4013      * {@code Vector<?>}.  Future versions of this API may change the return
4014      * type if additional floating point types become available.
4015      */
4016     @ForceInline
4017     @Override
4018     public final
4019     Vector<?>
4020     viewAsFloatingLanes() {
4021         LaneType flt = LaneType.SHORT.asFloating();
4022         // asFloating() will throw UnsupportedOperationException for the unsupported type short
4023         throw new AssertionError("Cannot reach here");
4024     }
4025 
4026     // ================================================
4027 
4028     /// Object methods: toString, equals, hashCode
4029     //
4030     // Object methods are defined as if via Arrays.toString, etc.,
4031     // is applied to the array of elements.  Two equal vectors
4032     // are required to have equal species and equal lane values.
4033 
4034     /**
4035      * Returns a string representation of this vector, of the form
4036      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
4037      * in lane order.
4038      *
4039      * The string is produced as if by a call to {@link
4040      * java.util.Arrays#toString(short[]) Arrays.toString()},
4041      * as appropriate to the {@code short} array returned by
4042      * {@link #toArray this.toArray()}.
4043      *
4044      * @return a string of the form {@code "[0,1,2...]"}
4045      * reporting the lane values of this vector
4046      */
4047     @Override
4048     @ForceInline
4049     public final
4050     String toString() {
4051         // now that toArray is strongly typed, we can define this
4052         return Arrays.toString(toArray());
4053     }
4054 
4055     /**
4056      * {@inheritDoc} <!--workaround-->
4057      */
4058     @Override
4059     @ForceInline
4060     public final
4061     boolean equals(Object obj) {
4062         if (obj instanceof Vector) {
4063             Vector<?> that = (Vector<?>) obj;
4064             if (this.species().equals(that.species())) {
4065                 return this.eq(that.check(this.species())).allTrue();
4066             }
4067         }
4068         return false;
4069     }
4070 
4071     /**
4072      * {@inheritDoc} <!--workaround-->
4073      */
4074     @Override
4075     @ForceInline
4076     public final
4077     int hashCode() {
4078         // now that toArray is strongly typed, we can define this
4079         return Objects.hash(species(), Arrays.hashCode(toArray()));
4080     }
4081 
4082     // ================================================
4083 
4084     // Species
4085 
4086     /**
4087      * Class representing {@link ShortVector}'s of the same {@link VectorShape VectorShape}.
4088      */
4089     /*package-private*/
4090     static final class ShortSpecies extends AbstractSpecies<Short> {
4091         private ShortSpecies(VectorShape shape,
4092                 Class<? extends ShortVector> vectorType,
4093                 Class<? extends AbstractMask<Short>> maskType,
4094                 Function<Object, ShortVector> vectorFactory) {
4095             super(shape, LaneType.of(short.class),
4096                   vectorType, maskType,
4097                   vectorFactory);
4098             assert(this.elementSize() == Short.SIZE);
4099         }
4100 
4101         // Specializing overrides:
4102 
4103         @Override
4104         @ForceInline
4105         public final Class<Short> elementType() {
4106             return short.class;
4107         }
4108 
4109         @Override
4110         @ForceInline
4111         final Class<Short> genericElementType() {
4112             return Short.class;
4113         }
4114 
4115         @SuppressWarnings("unchecked")
4116         @Override
4117         @ForceInline
4118         public final Class<? extends ShortVector> vectorType() {
4119             return (Class<? extends ShortVector>) vectorType;
4120         }
4121 
4122         @Override
4123         @ForceInline
4124         public final long checkValue(long e) {
4125             longToElementBits(e);  // only for exception
4126             return e;
4127         }
4128 
4129         /*package-private*/
4130         @Override
4131         @ForceInline
4132         final ShortVector broadcastBits(long bits) {
4133             return (ShortVector)
4134                 VectorSupport.fromBitsCoerced(
4135                     vectorType, short.class, laneCount,
4136                     bits, MODE_BROADCAST, this,
4137                     (bits_, s_) -> s_.rvOp(i -> bits_));
4138         }
4139 
4140         /*package-private*/
4141         @ForceInline
4142         final ShortVector broadcast(short e) {
4143             return broadcastBits(toBits(e));
4144         }
4145 
4146         @Override
4147         @ForceInline
4148         public final ShortVector broadcast(long e) {
4149             return broadcastBits(longToElementBits(e));
4150         }
4151 
4152         /*package-private*/
4153         final @Override
4154         @ForceInline
4155         long longToElementBits(long value) {
4156             // Do the conversion, and then test it for failure.
4157             short e = (short) value;
4158             if ((long) e != value) {
4159                 throw badElementBits(value, e);
4160             }
4161             return toBits(e);
4162         }
4163 
4164         /*package-private*/
4165         @ForceInline
4166         static long toIntegralChecked(short e, boolean convertToInt) {
4167             long value = convertToInt ? (int) e : (long) e;
4168             if ((short) value != e) {
4169                 throw badArrayBits(e, convertToInt, value);
4170             }
4171             return value;
4172         }
4173 
4174         /* this non-public one is for internal conversions */
4175         @Override
4176         @ForceInline
4177         final ShortVector fromIntValues(int[] values) {
4178             VectorIntrinsics.requireLength(values.length, laneCount);
4179             short[] va = new short[laneCount()];
4180             for (int i = 0; i < va.length; i++) {
4181                 int lv = values[i];
4182                 short v = (short) lv;
4183                 va[i] = v;
4184                 if ((int)v != lv) {
4185                     throw badElementBits(lv, v);
4186                 }
4187             }
4188             return dummyVector().fromArray0(va, 0);
4189         }
4190 
4191         // Virtual constructors
4192 
4193         @ForceInline
4194         @Override final
4195         public ShortVector fromArray(Object a, int offset) {
4196             // User entry point:  Be careful with inputs.
4197             return ShortVector
4198                 .fromArray(this, (short[]) a, offset);
4199         }
4200 
4201         @ForceInline
4202         @Override final
4203         ShortVector dummyVector() {
4204             return (ShortVector) super.dummyVector();
4205         }
4206 
4207         /*package-private*/
4208         final @Override
4209         @ForceInline
4210         ShortVector rvOp(RVOp f) {
4211             short[] res = new short[laneCount()];
4212             for (int i = 0; i < res.length; i++) {
4213                 short bits = (short) f.apply(i);
4214                 res[i] = fromBits(bits);
4215             }
4216             return dummyVector().vectorFactory(res);
4217         }
4218 
4219         ShortVector vOp(FVOp f) {
4220             short[] res = new short[laneCount()];
4221             for (int i = 0; i < res.length; i++) {
4222                 res[i] = f.apply(i);
4223             }
4224             return dummyVector().vectorFactory(res);
4225         }
4226 
4227         ShortVector vOp(VectorMask<Short> m, FVOp f) {
4228             short[] res = new short[laneCount()];
4229             boolean[] mbits = ((AbstractMask<Short>)m).getBits();
4230             for (int i = 0; i < res.length; i++) {
4231                 if (mbits[i]) {
4232                     res[i] = f.apply(i);
4233                 }
4234             }
4235             return dummyVector().vectorFactory(res);
4236         }
4237 
4238         /*package-private*/
4239         @ForceInline
4240         <M> ShortVector ldOp(M memory, int offset,
4241                                       FLdOp<M> f) {
4242             return dummyVector().ldOp(memory, offset, f);
4243         }
4244 
4245         /*package-private*/
4246         @ForceInline
4247         <M> ShortVector ldOp(M memory, int offset,
4248                                       VectorMask<Short> m,
4249                                       FLdOp<M> f) {
4250             return dummyVector().ldOp(memory, offset, m, f);
4251         }
4252 
4253         /*package-private*/
4254         @ForceInline
4255         ShortVector ldLongOp(MemorySegment memory, long offset,
4256                                       FLdLongOp f) {
4257             return dummyVector().ldLongOp(memory, offset, f);
4258         }
4259 
4260         /*package-private*/
4261         @ForceInline
4262         ShortVector ldLongOp(MemorySegment memory, long offset,
4263                                       VectorMask<Short> m,
4264                                       FLdLongOp f) {
4265             return dummyVector().ldLongOp(memory, offset, m, f);
4266         }
4267 
4268         /*package-private*/
4269         @ForceInline
4270         <M> void stOp(M memory, int offset, FStOp<M> f) {
4271             dummyVector().stOp(memory, offset, f);
4272         }
4273 
4274         /*package-private*/
4275         @ForceInline
4276         <M> void stOp(M memory, int offset,
4277                       AbstractMask<Short> m,
4278                       FStOp<M> f) {
4279             dummyVector().stOp(memory, offset, m, f);
4280         }
4281 
4282         /*package-private*/
4283         @ForceInline
4284         void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
4285             dummyVector().stLongOp(memory, offset, f);
4286         }
4287 
4288         /*package-private*/
4289         @ForceInline
4290         void stLongOp(MemorySegment memory, long offset,
4291                       AbstractMask<Short> m,
4292                       FStLongOp f) {
4293             dummyVector().stLongOp(memory, offset, m, f);
4294         }
4295 
4296         // N.B. Make sure these constant vectors and
4297         // masks load up correctly into registers.
4298         //
4299         // Also, see if we can avoid all that switching.
4300         // Could we cache both vectors and both masks in
4301         // this species object?
4302 
4303         // Zero and iota vector access
4304         @Override
4305         @ForceInline
4306         public final ShortVector zero() {
4307             if ((Class<?>) vectorType() == ShortMaxVector.class)
4308                 return ShortMaxVector.ZERO;
4309             switch (vectorBitSize()) {
4310                 case 64: return Short64Vector.ZERO;
4311                 case 128: return Short128Vector.ZERO;
4312                 case 256: return Short256Vector.ZERO;
4313                 case 512: return Short512Vector.ZERO;
4314             }
4315             throw new AssertionError();
4316         }
4317 
4318         @Override
4319         @ForceInline
4320         public final ShortVector iota() {
4321             if ((Class<?>) vectorType() == ShortMaxVector.class)
4322                 return ShortMaxVector.IOTA;
4323             switch (vectorBitSize()) {
4324                 case 64: return Short64Vector.IOTA;
4325                 case 128: return Short128Vector.IOTA;
4326                 case 256: return Short256Vector.IOTA;
4327                 case 512: return Short512Vector.IOTA;
4328             }
4329             throw new AssertionError();
4330         }
4331 
4332         // Mask access
4333         @Override
4334         @ForceInline
4335         public final VectorMask<Short> maskAll(boolean bit) {
4336             if ((Class<?>) vectorType() == ShortMaxVector.class)
4337                 return ShortMaxVector.ShortMaxMask.maskAll(bit);
4338             switch (vectorBitSize()) {
4339                 case 64: return Short64Vector.Short64Mask.maskAll(bit);
4340                 case 128: return Short128Vector.Short128Mask.maskAll(bit);
4341                 case 256: return Short256Vector.Short256Mask.maskAll(bit);
4342                 case 512: return Short512Vector.Short512Mask.maskAll(bit);
4343             }
4344             throw new AssertionError();
4345         }
4346     }
4347 
4348     /**
4349      * Finds a species for an element type of {@code short} and shape.
4350      *
4351      * @param s the shape
4352      * @return a species for an element type of {@code short} and shape
4353      * @throws IllegalArgumentException if no such species exists for the shape
4354      */
4355     static ShortSpecies species(VectorShape s) {
4356         Objects.requireNonNull(s);
4357         switch (s.switchKey) {
4358             case VectorShape.SK_64_BIT: return (ShortSpecies) SPECIES_64;
4359             case VectorShape.SK_128_BIT: return (ShortSpecies) SPECIES_128;
4360             case VectorShape.SK_256_BIT: return (ShortSpecies) SPECIES_256;
4361             case VectorShape.SK_512_BIT: return (ShortSpecies) SPECIES_512;
4362             case VectorShape.SK_Max_BIT: return (ShortSpecies) SPECIES_MAX;
4363             default: throw new IllegalArgumentException("Bad shape: " + s);
4364         }
4365     }
4366 
4367     /** Species representing {@link ShortVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
4368     public static final VectorSpecies<Short> SPECIES_64
4369         = new ShortSpecies(VectorShape.S_64_BIT,
4370                             Short64Vector.class,
4371                             Short64Vector.Short64Mask.class,
4372                             Short64Vector::new);
4373 
4374     /** Species representing {@link ShortVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
4375     public static final VectorSpecies<Short> SPECIES_128
4376         = new ShortSpecies(VectorShape.S_128_BIT,
4377                             Short128Vector.class,
4378                             Short128Vector.Short128Mask.class,
4379                             Short128Vector::new);
4380 
4381     /** Species representing {@link ShortVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
4382     public static final VectorSpecies<Short> SPECIES_256
4383         = new ShortSpecies(VectorShape.S_256_BIT,
4384                             Short256Vector.class,
4385                             Short256Vector.Short256Mask.class,
4386                             Short256Vector::new);
4387 
4388     /** Species representing {@link ShortVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
4389     public static final VectorSpecies<Short> SPECIES_512
4390         = new ShortSpecies(VectorShape.S_512_BIT,
4391                             Short512Vector.class,
4392                             Short512Vector.Short512Mask.class,
4393                             Short512Vector::new);
4394 
4395     /** Species representing {@link ShortVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
4396     public static final VectorSpecies<Short> SPECIES_MAX
4397         = new ShortSpecies(VectorShape.S_Max_BIT,
4398                             ShortMaxVector.class,
4399                             ShortMaxVector.ShortMaxMask.class,
4400                             ShortMaxVector::new);
4401 
4402     /**
4403      * Preferred species for {@link ShortVector}s.
4404      * A preferred species is a species of maximal bit-size for the platform.
4405      */
4406     public static final VectorSpecies<Short> SPECIES_PREFERRED
4407         = (ShortSpecies) VectorSpecies.ofPreferred(short.class);
4408 }