1 /*
   2  * Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.Function;
  33 import java.util.function.UnaryOperator;
  34 
  35 import jdk.internal.misc.ScopedMemoryAccess;
  36 import jdk.internal.misc.Unsafe;
  37 import jdk.internal.vm.annotation.ForceInline;
  38 import jdk.internal.vm.vector.VectorSupport;
  39 
  40 import static jdk.internal.vm.vector.VectorSupport.*;
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 
  43 import static jdk.incubator.vector.VectorOperators.*;
  44 
  45 // -- This file was mechanically generated: Do not edit! -- //
  46 
  47 /**
  48  * A specialized {@link Vector} representing an ordered immutable sequence of
  49  * {@code short} values.
  50  */
  51 @SuppressWarnings("cast")  // warning: redundant cast
  52 public abstract class ShortVector extends AbstractVector<Short> {
  53 
  54     ShortVector(short[] vec) {
  55         super(vec);
  56     }
  57 
  58     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  59 
  60     @ForceInline
  61     static int opCode(Operator op) {
  62         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  63     }
  64     @ForceInline
  65     static int opCode(Operator op, int requireKind) {
  66         requireKind |= VO_OPCODE_VALID;
  67         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  68     }
  69     @ForceInline
  70     static boolean opKind(Operator op, int bit) {
  71         return VectorOperators.opKind(op, bit);
  72     }
  73 
  74     // Virtualized factories and operators,
  75     // coded with portable definitions.
  76     // These are all @ForceInline in case
  77     // they need to be used performantly.
  78     // The various shape-specific subclasses
  79     // also specialize them by wrapping
  80     // them in a call like this:
  81     //    return (Byte128Vector)
  82     //       super.bOp((Byte128Vector) o);
  83     // The purpose of that is to forcibly inline
  84     // the generic definition from this file
  85     // into a sharply type- and size-specific
  86     // wrapper in the subclass file, so that
  87     // the JIT can specialize the code.
  88     // The code is only inlined and expanded
  89     // if it gets hot.  Think of it as a cheap
  90     // and lazy version of C++ templates.
  91 
  92     // Virtualized getter
  93 
  94     /*package-private*/
  95     abstract short[] vec();
  96 
  97     // Virtualized constructors
  98 
  99     /**
 100      * Build a vector directly using my own constructor.
 101      * It is an error if the array is aliased elsewhere.
 102      */
 103     /*package-private*/
 104     abstract ShortVector vectorFactory(short[] vec);
 105 
 106     /**
 107      * Build a mask directly using my species.
 108      * It is an error if the array is aliased elsewhere.
 109      */
 110     /*package-private*/
 111     @ForceInline
 112     final
 113     AbstractMask<Short> maskFactory(boolean[] bits) {
 114         return vspecies().maskFactory(bits);
 115     }
 116 
 117     // Constant loader (takes dummy as vector arg)
 118     interface FVOp {
 119         short apply(int i);
 120     }
 121 
 122     /*package-private*/
 123     @ForceInline
 124     final
 125     ShortVector vOp(FVOp f) {
 126         short[] res = new short[length()];
 127         for (int i = 0; i < res.length; i++) {
 128             res[i] = f.apply(i);
 129         }
 130         return vectorFactory(res);
 131     }
 132 
 133     @ForceInline
 134     final
 135     ShortVector vOp(VectorMask<Short> m, FVOp f) {
 136         short[] res = new short[length()];
 137         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 138         for (int i = 0; i < res.length; i++) {
 139             if (mbits[i]) {
 140                 res[i] = f.apply(i);
 141             }
 142         }
 143         return vectorFactory(res);
 144     }
 145 
 146     // Unary operator
 147 
 148     /*package-private*/
 149     interface FUnOp {
 150         short apply(int i, short a);
 151     }
 152 
 153     /*package-private*/
 154     abstract
 155     ShortVector uOp(FUnOp f);
 156     @ForceInline
 157     final
 158     ShortVector uOpTemplate(FUnOp f) {
 159         short[] vec = vec();
 160         short[] res = new short[length()];
 161         for (int i = 0; i < res.length; i++) {
 162             res[i] = f.apply(i, vec[i]);
 163         }
 164         return vectorFactory(res);
 165     }
 166 
 167     /*package-private*/
 168     abstract
 169     ShortVector uOp(VectorMask<Short> m,
 170                              FUnOp f);
 171     @ForceInline
 172     final
 173     ShortVector uOpTemplate(VectorMask<Short> m,
 174                                      FUnOp f) {
 175         if (m == null) {
 176             return uOpTemplate(f);
 177         }
 178         short[] vec = vec();
 179         short[] res = new short[length()];
 180         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 181         for (int i = 0; i < res.length; i++) {
 182             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 183         }
 184         return vectorFactory(res);
 185     }
 186 
 187     // Binary operator
 188 
 189     /*package-private*/
 190     interface FBinOp {
 191         short apply(int i, short a, short b);
 192     }
 193 
 194     /*package-private*/
 195     abstract
 196     ShortVector bOp(Vector<Short> o,
 197                              FBinOp f);
 198     @ForceInline
 199     final
 200     ShortVector bOpTemplate(Vector<Short> o,
 201                                      FBinOp f) {
 202         short[] res = new short[length()];
 203         short[] vec1 = this.vec();
 204         short[] vec2 = ((ShortVector)o).vec();
 205         for (int i = 0; i < res.length; i++) {
 206             res[i] = f.apply(i, vec1[i], vec2[i]);
 207         }
 208         return vectorFactory(res);
 209     }
 210 
 211     /*package-private*/
 212     abstract
 213     ShortVector bOp(Vector<Short> o,
 214                              VectorMask<Short> m,
 215                              FBinOp f);
 216     @ForceInline
 217     final
 218     ShortVector bOpTemplate(Vector<Short> o,
 219                                      VectorMask<Short> m,
 220                                      FBinOp f) {
 221         if (m == null) {
 222             return bOpTemplate(o, f);
 223         }
 224         short[] res = new short[length()];
 225         short[] vec1 = this.vec();
 226         short[] vec2 = ((ShortVector)o).vec();
 227         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 228         for (int i = 0; i < res.length; i++) {
 229             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 230         }
 231         return vectorFactory(res);
 232     }
 233 
 234     // Ternary operator
 235 
 236     /*package-private*/
 237     interface FTriOp {
 238         short apply(int i, short a, short b, short c);
 239     }
 240 
 241     /*package-private*/
 242     abstract
 243     ShortVector tOp(Vector<Short> o1,
 244                              Vector<Short> o2,
 245                              FTriOp f);
 246     @ForceInline
 247     final
 248     ShortVector tOpTemplate(Vector<Short> o1,
 249                                      Vector<Short> o2,
 250                                      FTriOp f) {
 251         short[] res = new short[length()];
 252         short[] vec1 = this.vec();
 253         short[] vec2 = ((ShortVector)o1).vec();
 254         short[] vec3 = ((ShortVector)o2).vec();
 255         for (int i = 0; i < res.length; i++) {
 256             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 257         }
 258         return vectorFactory(res);
 259     }
 260 
 261     /*package-private*/
 262     abstract
 263     ShortVector tOp(Vector<Short> o1,
 264                              Vector<Short> o2,
 265                              VectorMask<Short> m,
 266                              FTriOp f);
 267     @ForceInline
 268     final
 269     ShortVector tOpTemplate(Vector<Short> o1,
 270                                      Vector<Short> o2,
 271                                      VectorMask<Short> m,
 272                                      FTriOp f) {
 273         if (m == null) {
 274             return tOpTemplate(o1, o2, f);
 275         }
 276         short[] res = new short[length()];
 277         short[] vec1 = this.vec();
 278         short[] vec2 = ((ShortVector)o1).vec();
 279         short[] vec3 = ((ShortVector)o2).vec();
 280         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 281         for (int i = 0; i < res.length; i++) {
 282             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 283         }
 284         return vectorFactory(res);
 285     }
 286 
 287     // Reduction operator
 288 
 289     /*package-private*/
 290     abstract
 291     short rOp(short v, VectorMask<Short> m, FBinOp f);
 292 
 293     @ForceInline
 294     final
 295     short rOpTemplate(short v, VectorMask<Short> m, FBinOp f) {
 296         if (m == null) {
 297             return rOpTemplate(v, f);
 298         }
 299         short[] vec = vec();
 300         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 301         for (int i = 0; i < vec.length; i++) {
 302             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
 303         }
 304         return v;
 305     }
 306 
 307     @ForceInline
 308     final
 309     short rOpTemplate(short v, FBinOp f) {
 310         short[] vec = vec();
 311         for (int i = 0; i < vec.length; i++) {
 312             v = f.apply(i, v, vec[i]);
 313         }
 314         return v;
 315     }
 316 
 317     // Memory reference
 318 
 319     /*package-private*/
 320     interface FLdOp<M> {
 321         short apply(M memory, int offset, int i);
 322     }
 323 
 324     /*package-private*/
 325     @ForceInline
 326     final
 327     <M> ShortVector ldOp(M memory, int offset,
 328                                   FLdOp<M> f) {
 329         //dummy; no vec = vec();
 330         short[] res = new short[length()];
 331         for (int i = 0; i < res.length; i++) {
 332             res[i] = f.apply(memory, offset, i);
 333         }
 334         return vectorFactory(res);
 335     }
 336 
 337     /*package-private*/
 338     @ForceInline
 339     final
 340     <M> ShortVector ldOp(M memory, int offset,
 341                                   VectorMask<Short> m,
 342                                   FLdOp<M> f) {
 343         //short[] vec = vec();
 344         short[] res = new short[length()];
 345         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 346         for (int i = 0; i < res.length; i++) {
 347             if (mbits[i]) {
 348                 res[i] = f.apply(memory, offset, i);
 349             }
 350         }
 351         return vectorFactory(res);
 352     }
 353 
 354     interface FStOp<M> {
 355         void apply(M memory, int offset, int i, short a);
 356     }
 357 
 358     /*package-private*/
 359     @ForceInline
 360     final
 361     <M> void stOp(M memory, int offset,
 362                   FStOp<M> f) {
 363         short[] vec = vec();
 364         for (int i = 0; i < vec.length; i++) {
 365             f.apply(memory, offset, i, vec[i]);
 366         }
 367     }
 368 
 369     /*package-private*/
 370     @ForceInline
 371     final
 372     <M> void stOp(M memory, int offset,
 373                   VectorMask<Short> m,
 374                   FStOp<M> f) {
 375         short[] vec = vec();
 376         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 377         for (int i = 0; i < vec.length; i++) {
 378             if (mbits[i]) {
 379                 f.apply(memory, offset, i, vec[i]);
 380             }
 381         }
 382     }
 383 
 384     // Binary test
 385 
 386     /*package-private*/
 387     interface FBinTest {
 388         boolean apply(int cond, int i, short a, short b);
 389     }
 390 
 391     /*package-private*/
 392     @ForceInline
 393     final
 394     AbstractMask<Short> bTest(int cond,
 395                                   Vector<Short> o,
 396                                   FBinTest f) {
 397         short[] vec1 = vec();
 398         short[] vec2 = ((ShortVector)o).vec();
 399         boolean[] bits = new boolean[length()];
 400         for (int i = 0; i < length(); i++){
 401             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 402         }
 403         return maskFactory(bits);
 404     }
 405 
 406     /*package-private*/
 407     @ForceInline
 408     static short rotateLeft(short a, int n) {
 409         return (short)(((((short)a) & Short.toUnsignedInt((short)-1)) << (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) >>> (Short.SIZE - (n & Short.SIZE-1))));
 410     }
 411 
 412     /*package-private*/
 413     @ForceInline
 414     static short rotateRight(short a, int n) {
 415         return (short)(((((short)a) & Short.toUnsignedInt((short)-1)) >>> (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) << (Short.SIZE - (n & Short.SIZE-1))));
 416     }
 417 
 418     /*package-private*/
 419     @Override
 420     abstract ShortSpecies vspecies();
 421 
 422     /*package-private*/
 423     @ForceInline
 424     static long toBits(short e) {
 425         return  e;
 426     }
 427 
 428     /*package-private*/
 429     @ForceInline
 430     static short fromBits(long bits) {
 431         return ((short)bits);
 432     }
 433 
 434     // Static factories (other than memory operations)
 435 
 436     // Note: A surprising behavior in javadoc
 437     // sometimes makes a lone /** {@inheritDoc} */
 438     // comment drop the method altogether,
 439     // apparently if the method mentions an
 440     // parameter or return type of Vector<Short>
 441     // instead of Vector<E> as originally specified.
 442     // Adding an empty HTML fragment appears to
 443     // nudge javadoc into providing the desired
 444     // inherited documentation.  We use the HTML
 445     // comment <!--workaround--> for this.
 446 
 447     /**
 448      * Returns a vector of the given species
 449      * where all lane elements are set to
 450      * zero, the default primitive value.
 451      *
 452      * @param species species of the desired zero vector
 453      * @return a zero vector
 454      */
 455     @ForceInline
 456     public static ShortVector zero(VectorSpecies<Short> species) {
 457         ShortSpecies vsp = (ShortSpecies) species;
 458         return VectorSupport.fromBitsCoerced(vsp.vectorType(), short.class, species.length(),
 459                                 0, MODE_BROADCAST, vsp,
 460                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 461     }
 462 
 463     /**
 464      * Returns a vector of the same species as this one
 465      * where all lane elements are set to
 466      * the primitive value {@code e}.
 467      *
 468      * The contents of the current vector are discarded;
 469      * only the species is relevant to this operation.
 470      *
 471      * <p> This method returns the value of this expression:
 472      * {@code ShortVector.broadcast(this.species(), e)}.
 473      *
 474      * @apiNote
 475      * Unlike the similar method named {@code broadcast()}
 476      * in the supertype {@code Vector}, this method does not
 477      * need to validate its argument, and cannot throw
 478      * {@code IllegalArgumentException}.  This method is
 479      * therefore preferable to the supertype method.
 480      *
 481      * @param e the value to broadcast
 482      * @return a vector where all lane elements are set to
 483      *         the primitive value {@code e}
 484      * @see #broadcast(VectorSpecies,long)
 485      * @see Vector#broadcast(long)
 486      * @see VectorSpecies#broadcast(long)
 487      */
 488     public abstract ShortVector broadcast(short e);
 489 
 490     /**
 491      * Returns a vector of the given species
 492      * where all lane elements are set to
 493      * the primitive value {@code e}.
 494      *
 495      * @param species species of the desired vector
 496      * @param e the value to broadcast
 497      * @return a vector where all lane elements are set to
 498      *         the primitive value {@code e}
 499      * @see #broadcast(long)
 500      * @see Vector#broadcast(long)
 501      * @see VectorSpecies#broadcast(long)
 502      */
 503     @ForceInline
 504     public static ShortVector broadcast(VectorSpecies<Short> species, short e) {
 505         ShortSpecies vsp = (ShortSpecies) species;
 506         return vsp.broadcast(e);
 507     }
 508 
 509     /*package-private*/
 510     @ForceInline
 511     final ShortVector broadcastTemplate(short e) {
 512         ShortSpecies vsp = vspecies();
 513         return vsp.broadcast(e);
 514     }
 515 
 516     /**
 517      * {@inheritDoc} <!--workaround-->
 518      * @apiNote
 519      * When working with vector subtypes like {@code ShortVector},
 520      * {@linkplain #broadcast(short) the more strongly typed method}
 521      * is typically selected.  It can be explicitly selected
 522      * using a cast: {@code v.broadcast((short)e)}.
 523      * The two expressions will produce numerically identical results.
 524      */
 525     @Override
 526     public abstract ShortVector broadcast(long e);
 527 
 528     /**
 529      * Returns a vector of the given species
 530      * where all lane elements are set to
 531      * the primitive value {@code e}.
 532      *
 533      * The {@code long} value must be accurately representable
 534      * by the {@code ETYPE} of the vector species, so that
 535      * {@code e==(long)(ETYPE)e}.
 536      *
 537      * @param species species of the desired vector
 538      * @param e the value to broadcast
 539      * @return a vector where all lane elements are set to
 540      *         the primitive value {@code e}
 541      * @throws IllegalArgumentException
 542      *         if the given {@code long} value cannot
 543      *         be represented by the vector's {@code ETYPE}
 544      * @see #broadcast(VectorSpecies,short)
 545      * @see VectorSpecies#checkValue(long)
 546      */
 547     @ForceInline
 548     public static ShortVector broadcast(VectorSpecies<Short> species, long e) {
 549         ShortSpecies vsp = (ShortSpecies) species;
 550         return vsp.broadcast(e);
 551     }
 552 
 553     /*package-private*/
 554     @ForceInline
 555     final ShortVector broadcastTemplate(long e) {
 556         return vspecies().broadcast(e);
 557     }
 558 
 559     // Unary lanewise support
 560 
 561     /**
 562      * {@inheritDoc} <!--workaround-->
 563      */
 564     public abstract
 565     ShortVector lanewise(VectorOperators.Unary op);
 566 
 567     @ForceInline
 568     final
 569     ShortVector lanewiseTemplate(VectorOperators.Unary op) {
 570         if (opKind(op, VO_SPECIAL)) {
 571             if (op == ZOMO) {
 572                 return blend(broadcast(-1), compare(NE, 0));
 573             }
 574             if (op == NOT) {
 575                 return broadcast(-1).lanewise(XOR, this);
 576             }
 577         }
 578         int opc = opCode(op);
 579         return VectorSupport.unaryOp(
 580             opc, getClass(), null, short.class, length(),
 581             this, null,
 582             UN_IMPL.find(op, opc, ShortVector::unaryOperations));
 583     }
 584 
 585     /**
 586      * {@inheritDoc} <!--workaround-->
 587      */
 588     @Override
 589     public abstract
 590     ShortVector lanewise(VectorOperators.Unary op,
 591                                   VectorMask<Short> m);
 592     @ForceInline
 593     final
 594     ShortVector lanewiseTemplate(VectorOperators.Unary op,
 595                                           Class<? extends VectorMask<Short>> maskClass,
 596                                           VectorMask<Short> m) {
 597         m.check(maskClass, this);
 598         if (opKind(op, VO_SPECIAL)) {
 599             if (op == ZOMO) {
 600                 return blend(broadcast(-1), compare(NE, 0, m));
 601             }
 602             if (op == NOT) {
 603                 return lanewise(XOR, broadcast(-1), m);
 604             }
 605         }
 606         int opc = opCode(op);
 607         return VectorSupport.unaryOp(
 608             opc, getClass(), maskClass, short.class, length(),
 609             this, m,
 610             UN_IMPL.find(op, opc, ShortVector::unaryOperations));
 611     }
 612 
 613     private static final
 614     ImplCache<Unary, UnaryOperation<ShortVector, VectorMask<Short>>>
 615         UN_IMPL = new ImplCache<>(Unary.class, ShortVector.class);
 616 
 617     private static UnaryOperation<ShortVector, VectorMask<Short>> unaryOperations(int opc_) {
 618         switch (opc_) {
 619             case VECTOR_OP_NEG: return (v0, m) ->
 620                     v0.uOp(m, (i, a) -> (short) -a);
 621             case VECTOR_OP_ABS: return (v0, m) ->
 622                     v0.uOp(m, (i, a) -> (short) Math.abs(a));
 623             default: return null;
 624         }
 625     }
 626 
 627     // Binary lanewise support
 628 
 629     /**
 630      * {@inheritDoc} <!--workaround-->
 631      * @see #lanewise(VectorOperators.Binary,short)
 632      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 633      */
 634     @Override
 635     public abstract
 636     ShortVector lanewise(VectorOperators.Binary op,
 637                                   Vector<Short> v);
 638     @ForceInline
 639     final
 640     ShortVector lanewiseTemplate(VectorOperators.Binary op,
 641                                           Vector<Short> v) {
 642         ShortVector that = (ShortVector) v;
 643         that.check(this);
 644 
 645         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 646             if (op == FIRST_NONZERO) {
 647                 // FIXME: Support this in the JIT.
 648                 VectorMask<Short> thisNZ
 649                     = this.viewAsIntegralLanes().compare(NE, (short) 0);
 650                 that = that.blend((short) 0, thisNZ.cast(vspecies()));
 651                 op = OR_UNCHECKED;
 652             }
 653             if (opKind(op, VO_SHIFT)) {
 654                 // As per shift specification for Java, mask the shift count.
 655                 // This allows the JIT to ignore some ISA details.
 656                 that = that.lanewise(AND, SHIFT_MASK);
 657             }
 658             if (op == AND_NOT) {
 659                 // FIXME: Support this in the JIT.
 660                 that = that.lanewise(NOT);
 661                 op = AND;
 662             } else if (op == DIV) {
 663                 VectorMask<Short> eqz = that.eq((short) 0);
 664                 if (eqz.anyTrue()) {
 665                     throw that.divZeroException();
 666                 }
 667             }
 668         }
 669 
 670         int opc = opCode(op);
 671         return VectorSupport.binaryOp(
 672             opc, getClass(), null, short.class, length(),
 673             this, that, null,
 674             BIN_IMPL.find(op, opc, ShortVector::binaryOperations));
 675     }
 676 
 677     /**
 678      * {@inheritDoc} <!--workaround-->
 679      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 680      */
 681     @Override
 682     public abstract
 683     ShortVector lanewise(VectorOperators.Binary op,
 684                                   Vector<Short> v,
 685                                   VectorMask<Short> m);
 686     @ForceInline
 687     final
 688     ShortVector lanewiseTemplate(VectorOperators.Binary op,
 689                                           Class<? extends VectorMask<Short>> maskClass,
 690                                           Vector<Short> v, VectorMask<Short> m) {
 691         ShortVector that = (ShortVector) v;
 692         that.check(this);
 693         m.check(maskClass, this);
 694 
 695         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 696             if (op == FIRST_NONZERO) {
 697                 // FIXME: Support this in the JIT.
 698                 VectorMask<Short> thisNZ
 699                     = this.viewAsIntegralLanes().compare(NE, (short) 0);
 700                 that = that.blend((short) 0, thisNZ.cast(vspecies()));
 701                 op = OR_UNCHECKED;
 702             }
 703             if (opKind(op, VO_SHIFT)) {
 704                 // As per shift specification for Java, mask the shift count.
 705                 // This allows the JIT to ignore some ISA details.
 706                 that = that.lanewise(AND, SHIFT_MASK);
 707             }
 708             if (op == AND_NOT) {
 709                 // FIXME: Support this in the JIT.
 710                 that = that.lanewise(NOT);
 711                 op = AND;
 712             } else if (op == DIV) {
 713                 VectorMask<Short> eqz = that.eq((short)0);
 714                 if (eqz.and(m).anyTrue()) {
 715                     throw that.divZeroException();
 716                 }
 717                 // suppress div/0 exceptions in unset lanes
 718                 that = that.lanewise(NOT, eqz);
 719             }
 720         }
 721 
 722         int opc = opCode(op);
 723         return VectorSupport.binaryOp(
 724             opc, getClass(), maskClass, short.class, length(),
 725             this, that, m,
 726             BIN_IMPL.find(op, opc, ShortVector::binaryOperations));
 727     }
 728 
 729     private static final
 730     ImplCache<Binary, BinaryOperation<ShortVector, VectorMask<Short>>>
 731         BIN_IMPL = new ImplCache<>(Binary.class, ShortVector.class);
 732 
 733     private static BinaryOperation<ShortVector, VectorMask<Short>> binaryOperations(int opc_) {
 734         switch (opc_) {
 735             case VECTOR_OP_ADD: return (v0, v1, vm) ->
 736                     v0.bOp(v1, vm, (i, a, b) -> (short)(a + b));
 737             case VECTOR_OP_SUB: return (v0, v1, vm) ->
 738                     v0.bOp(v1, vm, (i, a, b) -> (short)(a - b));
 739             case VECTOR_OP_MUL: return (v0, v1, vm) ->
 740                     v0.bOp(v1, vm, (i, a, b) -> (short)(a * b));
 741             case VECTOR_OP_DIV: return (v0, v1, vm) ->
 742                     v0.bOp(v1, vm, (i, a, b) -> (short)(a / b));
 743             case VECTOR_OP_MAX: return (v0, v1, vm) ->
 744                     v0.bOp(v1, vm, (i, a, b) -> (short)Math.max(a, b));
 745             case VECTOR_OP_MIN: return (v0, v1, vm) ->
 746                     v0.bOp(v1, vm, (i, a, b) -> (short)Math.min(a, b));
 747             case VECTOR_OP_AND: return (v0, v1, vm) ->
 748                     v0.bOp(v1, vm, (i, a, b) -> (short)(a & b));
 749             case VECTOR_OP_OR: return (v0, v1, vm) ->
 750                     v0.bOp(v1, vm, (i, a, b) -> (short)(a | b));
 751             case VECTOR_OP_XOR: return (v0, v1, vm) ->
 752                     v0.bOp(v1, vm, (i, a, b) -> (short)(a ^ b));
 753             case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
 754                     v0.bOp(v1, vm, (i, a, n) -> (short)(a << n));
 755             case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
 756                     v0.bOp(v1, vm, (i, a, n) -> (short)(a >> n));
 757             case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
 758                     v0.bOp(v1, vm, (i, a, n) -> (short)((a & LSHR_SETUP_MASK) >>> n));
 759             case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
 760                     v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
 761             case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
 762                     v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
 763             default: return null;
 764         }
 765     }
 766 
 767     // FIXME: Maybe all of the public final methods in this file (the
 768     // simple ones that just call lanewise) should be pushed down to
 769     // the X-VectorBits template.  They can't optimize properly at
 770     // this level, and must rely on inlining.  Does it work?
 771     // (If it works, of course keep the code here.)
 772 
 773     /**
 774      * Combines the lane values of this vector
 775      * with the value of a broadcast scalar.
 776      *
 777      * This is a lane-wise binary operation which applies
 778      * the selected operation to each lane.
 779      * The return value will be equal to this expression:
 780      * {@code this.lanewise(op, this.broadcast(e))}.
 781      *
 782      * @param op the operation used to process lane values
 783      * @param e the input scalar
 784      * @return the result of applying the operation lane-wise
 785      *         to the two input vectors
 786      * @throws UnsupportedOperationException if this vector does
 787      *         not support the requested operation
 788      * @see #lanewise(VectorOperators.Binary,Vector)
 789      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 790      */
 791     @ForceInline
 792     public final
 793     ShortVector lanewise(VectorOperators.Binary op,
 794                                   short e) {
 795         if (opKind(op, VO_SHIFT) && (short)(int)e == e) {
 796             return lanewiseShift(op, (int) e);
 797         }
 798         if (op == AND_NOT) {
 799             op = AND; e = (short) ~e;
 800         }
 801         return lanewise(op, broadcast(e));
 802     }
 803 
 804     /**
 805      * Combines the lane values of this vector
 806      * with the value of a broadcast scalar,
 807      * with selection of lane elements controlled by a mask.
 808      *
 809      * This is a masked lane-wise binary operation which applies
 810      * the selected operation to each lane.
 811      * The return value will be equal to this expression:
 812      * {@code this.lanewise(op, this.broadcast(e), m)}.
 813      *
 814      * @param op the operation used to process lane values
 815      * @param e the input scalar
 816      * @param m the mask controlling lane selection
 817      * @return the result of applying the operation lane-wise
 818      *         to the input vector and the scalar
 819      * @throws UnsupportedOperationException if this vector does
 820      *         not support the requested operation
 821      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 822      * @see #lanewise(VectorOperators.Binary,short)
 823      */
 824     @ForceInline
 825     public final
 826     ShortVector lanewise(VectorOperators.Binary op,
 827                                   short e,
 828                                   VectorMask<Short> m) {
 829         if (opKind(op, VO_SHIFT) && (short)(int)e == e) {
 830             return lanewiseShift(op, (int) e, m);
 831         }
 832         if (op == AND_NOT) {
 833             op = AND; e = (short) ~e;
 834         }
 835         return lanewise(op, broadcast(e), m);
 836     }
 837 
 838     /**
 839      * {@inheritDoc} <!--workaround-->
 840      * @apiNote
 841      * When working with vector subtypes like {@code ShortVector},
 842      * {@linkplain #lanewise(VectorOperators.Binary,short)
 843      * the more strongly typed method}
 844      * is typically selected.  It can be explicitly selected
 845      * using a cast: {@code v.lanewise(op,(short)e)}.
 846      * The two expressions will produce numerically identical results.
 847      */
 848     @ForceInline
 849     public final
 850     ShortVector lanewise(VectorOperators.Binary op,
 851                                   long e) {
 852         short e1 = (short) e;
 853         if ((long)e1 != e
 854             // allow shift ops to clip down their int parameters
 855             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 856             vspecies().checkValue(e);  // for exception
 857         }
 858         return lanewise(op, e1);
 859     }
 860 
 861     /**
 862      * {@inheritDoc} <!--workaround-->
 863      * @apiNote
 864      * When working with vector subtypes like {@code ShortVector},
 865      * {@linkplain #lanewise(VectorOperators.Binary,short,VectorMask)
 866      * the more strongly typed method}
 867      * is typically selected.  It can be explicitly selected
 868      * using a cast: {@code v.lanewise(op,(short)e,m)}.
 869      * The two expressions will produce numerically identical results.
 870      */
 871     @ForceInline
 872     public final
 873     ShortVector lanewise(VectorOperators.Binary op,
 874                                   long e, VectorMask<Short> m) {
 875         short e1 = (short) e;
 876         if ((long)e1 != e
 877             // allow shift ops to clip down their int parameters
 878             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 879             vspecies().checkValue(e);  // for exception
 880         }
 881         return lanewise(op, e1, m);
 882     }
 883 
 884     /*package-private*/
 885     abstract ShortVector
 886     lanewiseShift(VectorOperators.Binary op, int e);
 887 
 888     /*package-private*/
 889     @ForceInline
 890     final ShortVector
 891     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 892         // Special handling for these.  FIXME: Refactor?
 893         assert(opKind(op, VO_SHIFT));
 894         // As per shift specification for Java, mask the shift count.
 895         e &= SHIFT_MASK;
 896         int opc = opCode(op);
 897         return VectorSupport.broadcastInt(
 898             opc, getClass(), null, short.class, length(),
 899             this, e, null,
 900             BIN_INT_IMPL.find(op, opc, ShortVector::broadcastIntOperations));
 901     }
 902 
 903     /*package-private*/
 904     abstract ShortVector
 905     lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Short> m);
 906 
 907     /*package-private*/
 908     @ForceInline
 909     final ShortVector
 910     lanewiseShiftTemplate(VectorOperators.Binary op,
 911                           Class<? extends VectorMask<Short>> maskClass,
 912                           int e, VectorMask<Short> m) {
 913         m.check(maskClass, this);
 914         assert(opKind(op, VO_SHIFT));
 915         // As per shift specification for Java, mask the shift count.
 916         e &= SHIFT_MASK;
 917         int opc = opCode(op);
 918         return VectorSupport.broadcastInt(
 919             opc, getClass(), maskClass, short.class, length(),
 920             this, e, m,
 921             BIN_INT_IMPL.find(op, opc, ShortVector::broadcastIntOperations));
 922     }
 923 
 924     private static final
 925     ImplCache<Binary,VectorBroadcastIntOp<ShortVector, VectorMask<Short>>> BIN_INT_IMPL
 926         = new ImplCache<>(Binary.class, ShortVector.class);
 927 
 928     private static VectorBroadcastIntOp<ShortVector, VectorMask<Short>> broadcastIntOperations(int opc_) {
 929         switch (opc_) {
 930             case VECTOR_OP_LSHIFT: return (v, n, m) ->
 931                     v.uOp(m, (i, a) -> (short)(a << n));
 932             case VECTOR_OP_RSHIFT: return (v, n, m) ->
 933                     v.uOp(m, (i, a) -> (short)(a >> n));
 934             case VECTOR_OP_URSHIFT: return (v, n, m) ->
 935                     v.uOp(m, (i, a) -> (short)((a & LSHR_SETUP_MASK) >>> n));
 936             case VECTOR_OP_LROTATE: return (v, n, m) ->
 937                     v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
 938             case VECTOR_OP_RROTATE: return (v, n, m) ->
 939                     v.uOp(m, (i, a) -> rotateRight(a, (int)n));
 940             default: return null;
 941         }
 942     }
 943 
 944     // As per shift specification for Java, mask the shift count.
 945     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 946     // The latter two maskings go beyond the JLS, but seem reasonable
 947     // since our lane types are first-class types, not just dressed
 948     // up ints.
 949     private static final int SHIFT_MASK = (Short.SIZE - 1);
 950     // Also simulate >>> on sub-word variables with a mask.
 951     private static final int LSHR_SETUP_MASK = ((1 << Short.SIZE) - 1);
 952 
 953     // Ternary lanewise support
 954 
 955     // Ternary operators come in eight variations:
 956     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 957     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 958 
 959     // It is annoying to support all of these variations of masking
 960     // and broadcast, but it would be more surprising not to continue
 961     // the obvious pattern started by unary and binary.
 962 
 963    /**
 964      * {@inheritDoc} <!--workaround-->
 965      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
 966      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
 967      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
 968      * @see #lanewise(VectorOperators.Ternary,short,short)
 969      * @see #lanewise(VectorOperators.Ternary,Vector,short)
 970      * @see #lanewise(VectorOperators.Ternary,short,Vector)
 971      */
 972     @Override
 973     public abstract
 974     ShortVector lanewise(VectorOperators.Ternary op,
 975                                                   Vector<Short> v1,
 976                                                   Vector<Short> v2);
 977     @ForceInline
 978     final
 979     ShortVector lanewiseTemplate(VectorOperators.Ternary op,
 980                                           Vector<Short> v1,
 981                                           Vector<Short> v2) {
 982         ShortVector that = (ShortVector) v1;
 983         ShortVector tother = (ShortVector) v2;
 984         // It's a word: https://www.dictionary.com/browse/tother
 985         // See also Chapter 11 of Dickens, Our Mutual Friend:
 986         // "Totherest Governor," replied Mr Riderhood...
 987         that.check(this);
 988         tother.check(this);
 989         if (op == BITWISE_BLEND) {
 990             // FIXME: Support this in the JIT.
 991             that = this.lanewise(XOR, that).lanewise(AND, tother);
 992             return this.lanewise(XOR, that);
 993         }
 994         int opc = opCode(op);
 995         return VectorSupport.ternaryOp(
 996             opc, getClass(), null, short.class, length(),
 997             this, that, tother, null,
 998             TERN_IMPL.find(op, opc, ShortVector::ternaryOperations));
 999     }
1000 
1001     /**
1002      * {@inheritDoc} <!--workaround-->
1003      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1004      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
1005      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
1006      */
1007     @Override
1008     public abstract
1009     ShortVector lanewise(VectorOperators.Ternary op,
1010                                   Vector<Short> v1,
1011                                   Vector<Short> v2,
1012                                   VectorMask<Short> m);
1013     @ForceInline
1014     final
1015     ShortVector lanewiseTemplate(VectorOperators.Ternary op,
1016                                           Class<? extends VectorMask<Short>> maskClass,
1017                                           Vector<Short> v1,
1018                                           Vector<Short> v2,
1019                                           VectorMask<Short> m) {
1020         ShortVector that = (ShortVector) v1;
1021         ShortVector tother = (ShortVector) v2;
1022         // It's a word: https://www.dictionary.com/browse/tother
1023         // See also Chapter 11 of Dickens, Our Mutual Friend:
1024         // "Totherest Governor," replied Mr Riderhood...
1025         that.check(this);
1026         tother.check(this);
1027         m.check(maskClass, this);
1028 
1029         if (op == BITWISE_BLEND) {
1030             // FIXME: Support this in the JIT.
1031             that = this.lanewise(XOR, that).lanewise(AND, tother);
1032             return this.lanewise(XOR, that, m);
1033         }
1034         int opc = opCode(op);
1035         return VectorSupport.ternaryOp(
1036             opc, getClass(), maskClass, short.class, length(),
1037             this, that, tother, m,
1038             TERN_IMPL.find(op, opc, ShortVector::ternaryOperations));
1039     }
1040 
1041     private static final
1042     ImplCache<Ternary, TernaryOperation<ShortVector, VectorMask<Short>>>
1043         TERN_IMPL = new ImplCache<>(Ternary.class, ShortVector.class);
1044 
1045     private static TernaryOperation<ShortVector, VectorMask<Short>> ternaryOperations(int opc_) {
1046         switch (opc_) {
1047             default: return null;
1048         }
1049     }
1050 
1051     /**
1052      * Combines the lane values of this vector
1053      * with the values of two broadcast scalars.
1054      *
1055      * This is a lane-wise ternary operation which applies
1056      * the selected operation to each lane.
1057      * The return value will be equal to this expression:
1058      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
1059      *
1060      * @param op the operation used to combine lane values
1061      * @param e1 the first input scalar
1062      * @param e2 the second input scalar
1063      * @return the result of applying the operation lane-wise
1064      *         to the input vector and the scalars
1065      * @throws UnsupportedOperationException if this vector does
1066      *         not support the requested operation
1067      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1068      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1069      */
1070     @ForceInline
1071     public final
1072     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
1073                                   short e1,
1074                                   short e2) {
1075         return lanewise(op, broadcast(e1), broadcast(e2));
1076     }
1077 
1078     /**
1079      * Combines the lane values of this vector
1080      * with the values of two broadcast scalars,
1081      * with selection of lane elements controlled by a mask.
1082      *
1083      * This is a masked lane-wise ternary operation which applies
1084      * the selected operation to each lane.
1085      * The return value will be equal to this expression:
1086      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1087      *
1088      * @param op the operation used to combine lane values
1089      * @param e1 the first input scalar
1090      * @param e2 the second input scalar
1091      * @param m the mask controlling lane selection
1092      * @return the result of applying the operation lane-wise
1093      *         to the input vector and the scalars
1094      * @throws UnsupportedOperationException if this vector does
1095      *         not support the requested operation
1096      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1097      * @see #lanewise(VectorOperators.Ternary,short,short)
1098      */
1099     @ForceInline
1100     public final
1101     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1102                                   short e1,
1103                                   short e2,
1104                                   VectorMask<Short> m) {
1105         return lanewise(op, broadcast(e1), broadcast(e2), m);
1106     }
1107 
1108     /**
1109      * Combines the lane values of this vector
1110      * with the values of another vector and a broadcast scalar.
1111      *
1112      * This is a lane-wise ternary operation which applies
1113      * the selected operation to each lane.
1114      * The return value will be equal to this expression:
1115      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1116      *
1117      * @param op the operation used to combine lane values
1118      * @param v1 the other input vector
1119      * @param e2 the input scalar
1120      * @return the result of applying the operation lane-wise
1121      *         to the input vectors and the scalar
1122      * @throws UnsupportedOperationException if this vector does
1123      *         not support the requested operation
1124      * @see #lanewise(VectorOperators.Ternary,short,short)
1125      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
1126      */
1127     @ForceInline
1128     public final
1129     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1130                                   Vector<Short> v1,
1131                                   short e2) {
1132         return lanewise(op, v1, broadcast(e2));
1133     }
1134 
1135     /**
1136      * Combines the lane values of this vector
1137      * with the values of another vector and a broadcast scalar,
1138      * with selection of lane elements controlled by a mask.
1139      *
1140      * This is a masked lane-wise ternary operation which applies
1141      * the selected operation to each lane.
1142      * The return value will be equal to this expression:
1143      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1144      *
1145      * @param op the operation used to combine lane values
1146      * @param v1 the other input vector
1147      * @param e2 the input scalar
1148      * @param m the mask controlling lane selection
1149      * @return the result of applying the operation lane-wise
1150      *         to the input vectors and the scalar
1151      * @throws UnsupportedOperationException if this vector does
1152      *         not support the requested operation
1153      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1154      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1155      * @see #lanewise(VectorOperators.Ternary,Vector,short)
1156      */
1157     @ForceInline
1158     public final
1159     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1160                                   Vector<Short> v1,
1161                                   short e2,
1162                                   VectorMask<Short> m) {
1163         return lanewise(op, v1, broadcast(e2), m);
1164     }
1165 
1166     /**
1167      * Combines the lane values of this vector
1168      * with the values of another vector and a broadcast scalar.
1169      *
1170      * This is a lane-wise ternary operation which applies
1171      * the selected operation to each lane.
1172      * The return value will be equal to this expression:
1173      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1174      *
1175      * @param op the operation used to combine lane values
1176      * @param e1 the input scalar
1177      * @param v2 the other input vector
1178      * @return the result of applying the operation lane-wise
1179      *         to the input vectors and the scalar
1180      * @throws UnsupportedOperationException if this vector does
1181      *         not support the requested operation
1182      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1183      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
1184      */
1185     @ForceInline
1186     public final
1187     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1188                                   short e1,
1189                                   Vector<Short> v2) {
1190         return lanewise(op, broadcast(e1), v2);
1191     }
1192 
1193     /**
1194      * Combines the lane values of this vector
1195      * with the values of another vector and a broadcast scalar,
1196      * with selection of lane elements controlled by a mask.
1197      *
1198      * This is a masked lane-wise ternary operation which applies
1199      * the selected operation to each lane.
1200      * The return value will be equal to this expression:
1201      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1202      *
1203      * @param op the operation used to combine lane values
1204      * @param e1 the input scalar
1205      * @param v2 the other input vector
1206      * @param m the mask controlling lane selection
1207      * @return the result of applying the operation lane-wise
1208      *         to the input vectors and the scalar
1209      * @throws UnsupportedOperationException if this vector does
1210      *         not support the requested operation
1211      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1212      * @see #lanewise(VectorOperators.Ternary,short,Vector)
1213      */
1214     @ForceInline
1215     public final
1216     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1217                                   short e1,
1218                                   Vector<Short> v2,
1219                                   VectorMask<Short> m) {
1220         return lanewise(op, broadcast(e1), v2, m);
1221     }
1222 
1223     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1224     // https://en.wikipedia.org/wiki/Ogdoad
1225 
1226     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1227     //
1228     // These include masked and non-masked versions.
1229     // This subclass adds broadcast (masked or not).
1230 
1231     /**
1232      * {@inheritDoc} <!--workaround-->
1233      * @see #add(short)
1234      */
1235     @Override
1236     @ForceInline
1237     public final ShortVector add(Vector<Short> v) {
1238         return lanewise(ADD, v);
1239     }
1240 
1241     /**
1242      * Adds this vector to the broadcast of an input scalar.
1243      *
1244      * This is a lane-wise binary operation which applies
1245      * the primitive addition operation ({@code +}) to each lane.
1246      *
1247      * This method is also equivalent to the expression
1248      * {@link #lanewise(VectorOperators.Binary,short)
1249      *    lanewise}{@code (}{@link VectorOperators#ADD
1250      *    ADD}{@code , e)}.
1251      *
1252      * @param e the input scalar
1253      * @return the result of adding each lane of this vector to the scalar
1254      * @see #add(Vector)
1255      * @see #broadcast(short)
1256      * @see #add(short,VectorMask)
1257      * @see VectorOperators#ADD
1258      * @see #lanewise(VectorOperators.Binary,Vector)
1259      * @see #lanewise(VectorOperators.Binary,short)
1260      */
1261     @ForceInline
1262     public final
1263     ShortVector add(short e) {
1264         return lanewise(ADD, e);
1265     }
1266 
1267     /**
1268      * {@inheritDoc} <!--workaround-->
1269      * @see #add(short,VectorMask)
1270      */
1271     @Override
1272     @ForceInline
1273     public final ShortVector add(Vector<Short> v,
1274                                           VectorMask<Short> m) {
1275         return lanewise(ADD, v, m);
1276     }
1277 
1278     /**
1279      * Adds this vector to the broadcast of an input scalar,
1280      * selecting lane elements controlled by a mask.
1281      *
1282      * This is a masked lane-wise binary operation which applies
1283      * the primitive addition operation ({@code +}) to each lane.
1284      *
1285      * This method is also equivalent to the expression
1286      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1287      *    lanewise}{@code (}{@link VectorOperators#ADD
1288      *    ADD}{@code , s, m)}.
1289      *
1290      * @param e the input scalar
1291      * @param m the mask controlling lane selection
1292      * @return the result of adding each lane of this vector to the scalar
1293      * @see #add(Vector,VectorMask)
1294      * @see #broadcast(short)
1295      * @see #add(short)
1296      * @see VectorOperators#ADD
1297      * @see #lanewise(VectorOperators.Binary,Vector)
1298      * @see #lanewise(VectorOperators.Binary,short)
1299      */
1300     @ForceInline
1301     public final ShortVector add(short e,
1302                                           VectorMask<Short> m) {
1303         return lanewise(ADD, e, m);
1304     }
1305 
1306     /**
1307      * {@inheritDoc} <!--workaround-->
1308      * @see #sub(short)
1309      */
1310     @Override
1311     @ForceInline
1312     public final ShortVector sub(Vector<Short> v) {
1313         return lanewise(SUB, v);
1314     }
1315 
1316     /**
1317      * Subtracts an input scalar from this vector.
1318      *
1319      * This is a masked lane-wise binary operation which applies
1320      * the primitive subtraction operation ({@code -}) to each lane.
1321      *
1322      * This method is also equivalent to the expression
1323      * {@link #lanewise(VectorOperators.Binary,short)
1324      *    lanewise}{@code (}{@link VectorOperators#SUB
1325      *    SUB}{@code , e)}.
1326      *
1327      * @param e the input scalar
1328      * @return the result of subtracting the scalar from each lane of this vector
1329      * @see #sub(Vector)
1330      * @see #broadcast(short)
1331      * @see #sub(short,VectorMask)
1332      * @see VectorOperators#SUB
1333      * @see #lanewise(VectorOperators.Binary,Vector)
1334      * @see #lanewise(VectorOperators.Binary,short)
1335      */
1336     @ForceInline
1337     public final ShortVector sub(short e) {
1338         return lanewise(SUB, e);
1339     }
1340 
1341     /**
1342      * {@inheritDoc} <!--workaround-->
1343      * @see #sub(short,VectorMask)
1344      */
1345     @Override
1346     @ForceInline
1347     public final ShortVector sub(Vector<Short> v,
1348                                           VectorMask<Short> m) {
1349         return lanewise(SUB, v, m);
1350     }
1351 
1352     /**
1353      * Subtracts an input scalar from this vector
1354      * under the control of a mask.
1355      *
1356      * This is a masked lane-wise binary operation which applies
1357      * the primitive subtraction operation ({@code -}) to each lane.
1358      *
1359      * This method is also equivalent to the expression
1360      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1361      *    lanewise}{@code (}{@link VectorOperators#SUB
1362      *    SUB}{@code , s, m)}.
1363      *
1364      * @param e the input scalar
1365      * @param m the mask controlling lane selection
1366      * @return the result of subtracting the scalar from each lane of this vector
1367      * @see #sub(Vector,VectorMask)
1368      * @see #broadcast(short)
1369      * @see #sub(short)
1370      * @see VectorOperators#SUB
1371      * @see #lanewise(VectorOperators.Binary,Vector)
1372      * @see #lanewise(VectorOperators.Binary,short)
1373      */
1374     @ForceInline
1375     public final ShortVector sub(short e,
1376                                           VectorMask<Short> m) {
1377         return lanewise(SUB, e, m);
1378     }
1379 
1380     /**
1381      * {@inheritDoc} <!--workaround-->
1382      * @see #mul(short)
1383      */
1384     @Override
1385     @ForceInline
1386     public final ShortVector mul(Vector<Short> v) {
1387         return lanewise(MUL, v);
1388     }
1389 
1390     /**
1391      * Multiplies this vector by the broadcast of an input scalar.
1392      *
1393      * This is a lane-wise binary operation which applies
1394      * the primitive multiplication operation ({@code *}) to each lane.
1395      *
1396      * This method is also equivalent to the expression
1397      * {@link #lanewise(VectorOperators.Binary,short)
1398      *    lanewise}{@code (}{@link VectorOperators#MUL
1399      *    MUL}{@code , e)}.
1400      *
1401      * @param e the input scalar
1402      * @return the result of multiplying this vector by the given scalar
1403      * @see #mul(Vector)
1404      * @see #broadcast(short)
1405      * @see #mul(short,VectorMask)
1406      * @see VectorOperators#MUL
1407      * @see #lanewise(VectorOperators.Binary,Vector)
1408      * @see #lanewise(VectorOperators.Binary,short)
1409      */
1410     @ForceInline
1411     public final ShortVector mul(short e) {
1412         return lanewise(MUL, e);
1413     }
1414 
1415     /**
1416      * {@inheritDoc} <!--workaround-->
1417      * @see #mul(short,VectorMask)
1418      */
1419     @Override
1420     @ForceInline
1421     public final ShortVector mul(Vector<Short> v,
1422                                           VectorMask<Short> m) {
1423         return lanewise(MUL, v, m);
1424     }
1425 
1426     /**
1427      * Multiplies this vector by the broadcast of an input scalar,
1428      * selecting lane elements controlled by a mask.
1429      *
1430      * This is a masked lane-wise binary operation which applies
1431      * the primitive multiplication operation ({@code *}) to each lane.
1432      *
1433      * This method is also equivalent to the expression
1434      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1435      *    lanewise}{@code (}{@link VectorOperators#MUL
1436      *    MUL}{@code , s, m)}.
1437      *
1438      * @param e the input scalar
1439      * @param m the mask controlling lane selection
1440      * @return the result of muling each lane of this vector to the scalar
1441      * @see #mul(Vector,VectorMask)
1442      * @see #broadcast(short)
1443      * @see #mul(short)
1444      * @see VectorOperators#MUL
1445      * @see #lanewise(VectorOperators.Binary,Vector)
1446      * @see #lanewise(VectorOperators.Binary,short)
1447      */
1448     @ForceInline
1449     public final ShortVector mul(short e,
1450                                           VectorMask<Short> m) {
1451         return lanewise(MUL, e, m);
1452     }
1453 
1454     /**
1455      * {@inheritDoc} <!--workaround-->
1456      * @apiNote If there is a zero divisor, {@code
1457      * ArithmeticException} will be thrown.
1458      */
1459     @Override
1460     @ForceInline
1461     public final ShortVector div(Vector<Short> v) {
1462         return lanewise(DIV, v);
1463     }
1464 
1465     /**
1466      * Divides this vector by the broadcast of an input scalar.
1467      *
1468      * This is a lane-wise binary operation which applies
1469      * the primitive division operation ({@code /}) to each lane.
1470      *
1471      * This method is also equivalent to the expression
1472      * {@link #lanewise(VectorOperators.Binary,short)
1473      *    lanewise}{@code (}{@link VectorOperators#DIV
1474      *    DIV}{@code , e)}.
1475      *
1476      * @apiNote If there is a zero divisor, {@code
1477      * ArithmeticException} will be thrown.
1478      *
1479      * @param e the input scalar
1480      * @return the result of dividing each lane of this vector by the scalar
1481      * @see #div(Vector)
1482      * @see #broadcast(short)
1483      * @see #div(short,VectorMask)
1484      * @see VectorOperators#DIV
1485      * @see #lanewise(VectorOperators.Binary,Vector)
1486      * @see #lanewise(VectorOperators.Binary,short)
1487      */
1488     @ForceInline
1489     public final ShortVector div(short e) {
1490         return lanewise(DIV, e);
1491     }
1492 
1493     /**
1494      * {@inheritDoc} <!--workaround-->
1495      * @see #div(short,VectorMask)
1496      * @apiNote If there is a zero divisor, {@code
1497      * ArithmeticException} will be thrown.
1498      */
1499     @Override
1500     @ForceInline
1501     public final ShortVector div(Vector<Short> v,
1502                                           VectorMask<Short> m) {
1503         return lanewise(DIV, v, m);
1504     }
1505 
1506     /**
1507      * Divides this vector by the broadcast of an input scalar,
1508      * selecting lane elements controlled by a mask.
1509      *
1510      * This is a masked lane-wise binary operation which applies
1511      * the primitive division operation ({@code /}) to each lane.
1512      *
1513      * This method is also equivalent to the expression
1514      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1515      *    lanewise}{@code (}{@link VectorOperators#DIV
1516      *    DIV}{@code , s, m)}.
1517      *
1518      * @apiNote If there is a zero divisor, {@code
1519      * ArithmeticException} will be thrown.
1520      *
1521      * @param e the input scalar
1522      * @param m the mask controlling lane selection
1523      * @return the result of dividing each lane of this vector by the scalar
1524      * @see #div(Vector,VectorMask)
1525      * @see #broadcast(short)
1526      * @see #div(short)
1527      * @see VectorOperators#DIV
1528      * @see #lanewise(VectorOperators.Binary,Vector)
1529      * @see #lanewise(VectorOperators.Binary,short)
1530      */
1531     @ForceInline
1532     public final ShortVector div(short e,
1533                                           VectorMask<Short> m) {
1534         return lanewise(DIV, e, m);
1535     }
1536 
1537     /// END OF FULL-SERVICE BINARY METHODS
1538 
1539     /// SECOND-TIER BINARY METHODS
1540     //
1541     // There are no masked versions.
1542 
1543     /**
1544      * {@inheritDoc} <!--workaround-->
1545      */
1546     @Override
1547     @ForceInline
1548     public final ShortVector min(Vector<Short> v) {
1549         return lanewise(MIN, v);
1550     }
1551 
1552     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1553     /**
1554      * Computes the smaller of this vector and the broadcast of an input scalar.
1555      *
1556      * This is a lane-wise binary operation which applies the
1557      * operation {@code Math.min()} to each pair of
1558      * corresponding lane values.
1559      *
1560      * This method is also equivalent to the expression
1561      * {@link #lanewise(VectorOperators.Binary,short)
1562      *    lanewise}{@code (}{@link VectorOperators#MIN
1563      *    MIN}{@code , e)}.
1564      *
1565      * @param e the input scalar
1566      * @return the result of multiplying this vector by the given scalar
1567      * @see #min(Vector)
1568      * @see #broadcast(short)
1569      * @see VectorOperators#MIN
1570      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1571      */
1572     @ForceInline
1573     public final ShortVector min(short e) {
1574         return lanewise(MIN, e);
1575     }
1576 
1577     /**
1578      * {@inheritDoc} <!--workaround-->
1579      */
1580     @Override
1581     @ForceInline
1582     public final ShortVector max(Vector<Short> v) {
1583         return lanewise(MAX, v);
1584     }
1585 
1586     /**
1587      * Computes the larger of this vector and the broadcast of an input scalar.
1588      *
1589      * This is a lane-wise binary operation which applies the
1590      * operation {@code Math.max()} to each pair of
1591      * corresponding lane values.
1592      *
1593      * This method is also equivalent to the expression
1594      * {@link #lanewise(VectorOperators.Binary,short)
1595      *    lanewise}{@code (}{@link VectorOperators#MAX
1596      *    MAX}{@code , e)}.
1597      *
1598      * @param e the input scalar
1599      * @return the result of multiplying this vector by the given scalar
1600      * @see #max(Vector)
1601      * @see #broadcast(short)
1602      * @see VectorOperators#MAX
1603      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1604      */
1605     @ForceInline
1606     public final ShortVector max(short e) {
1607         return lanewise(MAX, e);
1608     }
1609 
1610     // common bitwise operators: and, or, not (with scalar versions)
1611     /**
1612      * Computes the bitwise logical conjunction ({@code &})
1613      * of this vector and a second input vector.
1614      *
1615      * This is a lane-wise binary operation which applies the
1616      * the primitive bitwise "and" operation ({@code &})
1617      * to each pair of corresponding lane values.
1618      *
1619      * This method is also equivalent to the expression
1620      * {@link #lanewise(VectorOperators.Binary,Vector)
1621      *    lanewise}{@code (}{@link VectorOperators#AND
1622      *    AND}{@code , v)}.
1623      *
1624      * <p>
1625      * This is not a full-service named operation like
1626      * {@link #add(Vector) add}.  A masked version of
1627      * this operation is not directly available
1628      * but may be obtained via the masked version of
1629      * {@code lanewise}.
1630      *
1631      * @param v a second input vector
1632      * @return the bitwise {@code &} of this vector and the second input vector
1633      * @see #and(short)
1634      * @see #or(Vector)
1635      * @see #not()
1636      * @see VectorOperators#AND
1637      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1638      */
1639     @ForceInline
1640     public final ShortVector and(Vector<Short> v) {
1641         return lanewise(AND, v);
1642     }
1643 
1644     /**
1645      * Computes the bitwise logical conjunction ({@code &})
1646      * of this vector and a scalar.
1647      *
1648      * This is a lane-wise binary operation which applies the
1649      * the primitive bitwise "and" operation ({@code &})
1650      * to each pair of corresponding lane values.
1651      *
1652      * This method is also equivalent to the expression
1653      * {@link #lanewise(VectorOperators.Binary,Vector)
1654      *    lanewise}{@code (}{@link VectorOperators#AND
1655      *    AND}{@code , e)}.
1656      *
1657      * @param e an input scalar
1658      * @return the bitwise {@code &} of this vector and scalar
1659      * @see #and(Vector)
1660      * @see VectorOperators#AND
1661      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1662      */
1663     @ForceInline
1664     public final ShortVector and(short e) {
1665         return lanewise(AND, e);
1666     }
1667 
1668     /**
1669      * Computes the bitwise logical disjunction ({@code |})
1670      * of this vector and a second input vector.
1671      *
1672      * This is a lane-wise binary operation which applies the
1673      * the primitive bitwise "or" operation ({@code |})
1674      * to each pair of corresponding lane values.
1675      *
1676      * This method is also equivalent to the expression
1677      * {@link #lanewise(VectorOperators.Binary,Vector)
1678      *    lanewise}{@code (}{@link VectorOperators#OR
1679      *    AND}{@code , v)}.
1680      *
1681      * <p>
1682      * This is not a full-service named operation like
1683      * {@link #add(Vector) add}.  A masked version of
1684      * this operation is not directly available
1685      * but may be obtained via the masked version of
1686      * {@code lanewise}.
1687      *
1688      * @param v a second input vector
1689      * @return the bitwise {@code |} of this vector and the second input vector
1690      * @see #or(short)
1691      * @see #and(Vector)
1692      * @see #not()
1693      * @see VectorOperators#OR
1694      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1695      */
1696     @ForceInline
1697     public final ShortVector or(Vector<Short> v) {
1698         return lanewise(OR, v);
1699     }
1700 
1701     /**
1702      * Computes the bitwise logical disjunction ({@code |})
1703      * of this vector and a scalar.
1704      *
1705      * This is a lane-wise binary operation which applies the
1706      * the primitive bitwise "or" operation ({@code |})
1707      * to each pair of corresponding lane values.
1708      *
1709      * This method is also equivalent to the expression
1710      * {@link #lanewise(VectorOperators.Binary,Vector)
1711      *    lanewise}{@code (}{@link VectorOperators#OR
1712      *    OR}{@code , e)}.
1713      *
1714      * @param e an input scalar
1715      * @return the bitwise {@code |} of this vector and scalar
1716      * @see #or(Vector)
1717      * @see VectorOperators#OR
1718      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1719      */
1720     @ForceInline
1721     public final ShortVector or(short e) {
1722         return lanewise(OR, e);
1723     }
1724 
1725 
1726 
1727     /// UNARY METHODS
1728 
1729     /**
1730      * {@inheritDoc} <!--workaround-->
1731      */
1732     @Override
1733     @ForceInline
1734     public final
1735     ShortVector neg() {
1736         return lanewise(NEG);
1737     }
1738 
1739     /**
1740      * {@inheritDoc} <!--workaround-->
1741      */
1742     @Override
1743     @ForceInline
1744     public final
1745     ShortVector abs() {
1746         return lanewise(ABS);
1747     }
1748 
1749     // not (~)
1750     /**
1751      * Computes the bitwise logical complement ({@code ~})
1752      * of this vector.
1753      *
1754      * This is a lane-wise binary operation which applies the
1755      * the primitive bitwise "not" operation ({@code ~})
1756      * to each lane value.
1757      *
1758      * This method is also equivalent to the expression
1759      * {@link #lanewise(VectorOperators.Unary)
1760      *    lanewise}{@code (}{@link VectorOperators#NOT
1761      *    NOT}{@code )}.
1762      *
1763      * <p>
1764      * This is not a full-service named operation like
1765      * {@link #add(Vector) add}.  A masked version of
1766      * this operation is not directly available
1767      * but may be obtained via the masked version of
1768      * {@code lanewise}.
1769      *
1770      * @return the bitwise complement {@code ~} of this vector
1771      * @see #and(Vector)
1772      * @see VectorOperators#NOT
1773      * @see #lanewise(VectorOperators.Unary,VectorMask)
1774      */
1775     @ForceInline
1776     public final ShortVector not() {
1777         return lanewise(NOT);
1778     }
1779 
1780 
1781     /// COMPARISONS
1782 
1783     /**
1784      * {@inheritDoc} <!--workaround-->
1785      */
1786     @Override
1787     @ForceInline
1788     public final
1789     VectorMask<Short> eq(Vector<Short> v) {
1790         return compare(EQ, v);
1791     }
1792 
1793     /**
1794      * Tests if this vector is equal to an input scalar.
1795      *
1796      * This is a lane-wise binary test operation which applies
1797      * the primitive equals operation ({@code ==}) to each lane.
1798      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1799      *
1800      * @param e the input scalar
1801      * @return the result mask of testing if this vector
1802      *         is equal to {@code e}
1803      * @see #compare(VectorOperators.Comparison,short)
1804      */
1805     @ForceInline
1806     public final
1807     VectorMask<Short> eq(short e) {
1808         return compare(EQ, e);
1809     }
1810 
1811     /**
1812      * {@inheritDoc} <!--workaround-->
1813      */
1814     @Override
1815     @ForceInline
1816     public final
1817     VectorMask<Short> lt(Vector<Short> v) {
1818         return compare(LT, v);
1819     }
1820 
1821     /**
1822      * Tests if this vector is less than an input scalar.
1823      *
1824      * This is a lane-wise binary test operation which applies
1825      * the primitive less than operation ({@code <}) to each lane.
1826      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1827      *
1828      * @param e the input scalar
1829      * @return the mask result of testing if this vector
1830      *         is less than the input scalar
1831      * @see #compare(VectorOperators.Comparison,short)
1832      */
1833     @ForceInline
1834     public final
1835     VectorMask<Short> lt(short e) {
1836         return compare(LT, e);
1837     }
1838 
1839     /**
1840      * {@inheritDoc} <!--workaround-->
1841      */
1842     @Override
1843     public abstract
1844     VectorMask<Short> test(VectorOperators.Test op);
1845 
1846     /*package-private*/
1847     @ForceInline
1848     final
1849     <M extends VectorMask<Short>>
1850     M testTemplate(Class<M> maskType, Test op) {
1851         ShortSpecies vsp = vspecies();
1852         if (opKind(op, VO_SPECIAL)) {
1853             VectorMask<Short> m;
1854             if (op == IS_DEFAULT) {
1855                 m = compare(EQ, (short) 0);
1856             } else if (op == IS_NEGATIVE) {
1857                 m = compare(LT, (short) 0);
1858             }
1859             else {
1860                 throw new AssertionError(op);
1861             }
1862             return maskType.cast(m);
1863         }
1864         int opc = opCode(op);
1865         throw new AssertionError(op);
1866     }
1867 
1868     /**
1869      * {@inheritDoc} <!--workaround-->
1870      */
1871     @Override
1872     public abstract
1873     VectorMask<Short> test(VectorOperators.Test op,
1874                                   VectorMask<Short> m);
1875 
1876     /*package-private*/
1877     @ForceInline
1878     final
1879     <M extends VectorMask<Short>>
1880     M testTemplate(Class<M> maskType, Test op, M mask) {
1881         ShortSpecies vsp = vspecies();
1882         mask.check(maskType, this);
1883         if (opKind(op, VO_SPECIAL)) {
1884             VectorMask<Short> m = mask;
1885             if (op == IS_DEFAULT) {
1886                 m = compare(EQ, (short) 0, m);
1887             } else if (op == IS_NEGATIVE) {
1888                 m = compare(LT, (short) 0, m);
1889             }
1890             else {
1891                 throw new AssertionError(op);
1892             }
1893             return maskType.cast(m);
1894         }
1895         int opc = opCode(op);
1896         throw new AssertionError(op);
1897     }
1898 
1899     /**
1900      * {@inheritDoc} <!--workaround-->
1901      */
1902     @Override
1903     public abstract
1904     VectorMask<Short> compare(VectorOperators.Comparison op, Vector<Short> v);
1905 
1906     /*package-private*/
1907     @ForceInline
1908     final
1909     <M extends VectorMask<Short>>
1910     M compareTemplate(Class<M> maskType, Comparison op, Vector<Short> v) {
1911         ShortVector that = (ShortVector) v;
1912         that.check(this);
1913         int opc = opCode(op);
1914         return VectorSupport.compare(
1915             opc, getClass(), maskType, short.class, length(),
1916             this, that, null,
1917             (cond, v0, v1, m1) -> {
1918                 AbstractMask<Short> m
1919                     = v0.bTest(cond, v1, (cond_, i, a, b)
1920                                -> compareWithOp(cond, a, b));
1921                 @SuppressWarnings("unchecked")
1922                 M m2 = (M) m;
1923                 return m2;
1924             });
1925     }
1926 
1927     /*package-private*/
1928     @ForceInline
1929     final
1930     <M extends VectorMask<Short>>
1931     M compareTemplate(Class<M> maskType, Comparison op, Vector<Short> v, M m) {
1932         ShortVector that = (ShortVector) v;
1933         that.check(this);
1934         m.check(maskType, this);
1935         int opc = opCode(op);
1936         return VectorSupport.compare(
1937             opc, getClass(), maskType, short.class, length(),
1938             this, that, m,
1939             (cond, v0, v1, m1) -> {
1940                 AbstractMask<Short> cmpM
1941                     = v0.bTest(cond, v1, (cond_, i, a, b)
1942                                -> compareWithOp(cond, a, b));
1943                 @SuppressWarnings("unchecked")
1944                 M m2 = (M) cmpM.and(m1);
1945                 return m2;
1946             });
1947     }
1948 
1949     @ForceInline
1950     private static boolean compareWithOp(int cond, short a, short b) {
1951         return switch (cond) {
1952             case BT_eq -> a == b;
1953             case BT_ne -> a != b;
1954             case BT_lt -> a < b;
1955             case BT_le -> a <= b;
1956             case BT_gt -> a > b;
1957             case BT_ge -> a >= b;
1958             case BT_ult -> Short.compareUnsigned(a, b) < 0;
1959             case BT_ule -> Short.compareUnsigned(a, b) <= 0;
1960             case BT_ugt -> Short.compareUnsigned(a, b) > 0;
1961             case BT_uge -> Short.compareUnsigned(a, b) >= 0;
1962             default -> throw new AssertionError();
1963         };
1964     }
1965 
1966     /**
1967      * Tests this vector by comparing it with an input scalar,
1968      * according to the given comparison operation.
1969      *
1970      * This is a lane-wise binary test operation which applies
1971      * the comparison operation to each lane.
1972      * <p>
1973      * The result is the same as
1974      * {@code compare(op, broadcast(species(), e))}.
1975      * That is, the scalar may be regarded as broadcast to
1976      * a vector of the same species, and then compared
1977      * against the original vector, using the selected
1978      * comparison operation.
1979      *
1980      * @param op the operation used to compare lane values
1981      * @param e the input scalar
1982      * @return the mask result of testing lane-wise if this vector
1983      *         compares to the input, according to the selected
1984      *         comparison operator
1985      * @see ShortVector#compare(VectorOperators.Comparison,Vector)
1986      * @see #eq(short)
1987      * @see #lt(short)
1988      */
1989     public abstract
1990     VectorMask<Short> compare(Comparison op, short e);
1991 
1992     /*package-private*/
1993     @ForceInline
1994     final
1995     <M extends VectorMask<Short>>
1996     M compareTemplate(Class<M> maskType, Comparison op, short e) {
1997         return compareTemplate(maskType, op, broadcast(e));
1998     }
1999 
2000     /**
2001      * Tests this vector by comparing it with an input scalar,
2002      * according to the given comparison operation,
2003      * in lanes selected by a mask.
2004      *
2005      * This is a masked lane-wise binary test operation which applies
2006      * to each pair of corresponding lane values.
2007      *
2008      * The returned result is equal to the expression
2009      * {@code compare(op,s).and(m)}.
2010      *
2011      * @param op the operation used to compare lane values
2012      * @param e the input scalar
2013      * @param m the mask controlling lane selection
2014      * @return the mask result of testing lane-wise if this vector
2015      *         compares to the input, according to the selected
2016      *         comparison operator,
2017      *         and only in the lanes selected by the mask
2018      * @see ShortVector#compare(VectorOperators.Comparison,Vector,VectorMask)
2019      */
2020     @ForceInline
2021     public final VectorMask<Short> compare(VectorOperators.Comparison op,
2022                                                short e,
2023                                                VectorMask<Short> m) {
2024         return compare(op, broadcast(e), m);
2025     }
2026 
2027     /**
2028      * {@inheritDoc} <!--workaround-->
2029      */
2030     @Override
2031     public abstract
2032     VectorMask<Short> compare(Comparison op, long e);
2033 
2034     /*package-private*/
2035     @ForceInline
2036     final
2037     <M extends VectorMask<Short>>
2038     M compareTemplate(Class<M> maskType, Comparison op, long e) {
2039         return compareTemplate(maskType, op, broadcast(e));
2040     }
2041 
2042     /**
2043      * {@inheritDoc} <!--workaround-->
2044      */
2045     @Override
2046     @ForceInline
2047     public final
2048     VectorMask<Short> compare(Comparison op, long e, VectorMask<Short> m) {
2049         return compare(op, broadcast(e), m);
2050     }
2051 
2052 
2053 
2054     /**
2055      * {@inheritDoc} <!--workaround-->
2056      */
2057     @Override public abstract
2058     ShortVector blend(Vector<Short> v, VectorMask<Short> m);
2059 
2060     /*package-private*/
2061     @ForceInline
2062     final
2063     <M extends VectorMask<Short>>
2064     ShortVector
2065     blendTemplate(Class<M> maskType, ShortVector v, M m) {
2066         v.check(this);
2067         return VectorSupport.blend(
2068             getClass(), maskType, short.class, length(),
2069             this, v, m,
2070             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
2071     }
2072 
2073     /**
2074      * {@inheritDoc} <!--workaround-->
2075      */
2076     @Override public abstract ShortVector addIndex(int scale);
2077 
2078     /*package-private*/
2079     @ForceInline
2080     final ShortVector addIndexTemplate(int scale) {
2081         ShortSpecies vsp = vspecies();
2082         // make sure VLENGTH*scale doesn't overflow:
2083         vsp.checkScale(scale);
2084         return VectorSupport.indexVector(
2085             getClass(), short.class, length(),
2086             this, scale, vsp,
2087             (v, scale_, s)
2088             -> {
2089                 // If the platform doesn't support an INDEX
2090                 // instruction directly, load IOTA from memory
2091                 // and multiply.
2092                 ShortVector iota = s.iota();
2093                 short sc = (short) scale_;
2094                 return v.add(sc == 1 ? iota : iota.mul(sc));
2095             });
2096     }
2097 
2098     /**
2099      * Replaces selected lanes of this vector with
2100      * a scalar value
2101      * under the control of a mask.
2102      *
2103      * This is a masked lane-wise binary operation which
2104      * selects each lane value from one or the other input.
2105      *
2106      * The returned result is equal to the expression
2107      * {@code blend(broadcast(e),m)}.
2108      *
2109      * @param e the input scalar, containing the replacement lane value
2110      * @param m the mask controlling lane selection of the scalar
2111      * @return the result of blending the lane elements of this vector with
2112      *         the scalar value
2113      */
2114     @ForceInline
2115     public final ShortVector blend(short e,
2116                                             VectorMask<Short> m) {
2117         return blend(broadcast(e), m);
2118     }
2119 
2120     /**
2121      * Replaces selected lanes of this vector with
2122      * a scalar value
2123      * under the control of a mask.
2124      *
2125      * This is a masked lane-wise binary operation which
2126      * selects each lane value from one or the other input.
2127      *
2128      * The returned result is equal to the expression
2129      * {@code blend(broadcast(e),m)}.
2130      *
2131      * @param e the input scalar, containing the replacement lane value
2132      * @param m the mask controlling lane selection of the scalar
2133      * @return the result of blending the lane elements of this vector with
2134      *         the scalar value
2135      */
2136     @ForceInline
2137     public final ShortVector blend(long e,
2138                                             VectorMask<Short> m) {
2139         return blend(broadcast(e), m);
2140     }
2141 
2142     /**
2143      * {@inheritDoc} <!--workaround-->
2144      */
2145     @Override
2146     public abstract
2147     ShortVector slice(int origin, Vector<Short> v1);
2148 
2149     /*package-private*/
2150     final
2151     @ForceInline
2152     ShortVector sliceTemplate(int origin, Vector<Short> v1) {
2153         ShortVector that = (ShortVector) v1;
2154         that.check(this);
2155         Objects.checkIndex(origin, length() + 1);
2156         VectorShuffle<Short> iota = iotaShuffle();
2157         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
2158         iota = iotaShuffle(origin, 1, true);
2159         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
2160     }
2161 
2162     /**
2163      * {@inheritDoc} <!--workaround-->
2164      */
2165     @Override
2166     @ForceInline
2167     public final
2168     ShortVector slice(int origin,
2169                                Vector<Short> w,
2170                                VectorMask<Short> m) {
2171         return broadcast(0).blend(slice(origin, w), m);
2172     }
2173 
2174     /**
2175      * {@inheritDoc} <!--workaround-->
2176      */
2177     @Override
2178     public abstract
2179     ShortVector slice(int origin);
2180 
2181     /*package-private*/
2182     final
2183     @ForceInline
2184     ShortVector sliceTemplate(int origin) {
2185         Objects.checkIndex(origin, length() + 1);
2186         VectorShuffle<Short> iota = iotaShuffle();
2187         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
2188         iota = iotaShuffle(origin, 1, true);
2189         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2190     }
2191 
2192     /**
2193      * {@inheritDoc} <!--workaround-->
2194      */
2195     @Override
2196     public abstract
2197     ShortVector unslice(int origin, Vector<Short> w, int part);
2198 
2199     /*package-private*/
2200     final
2201     @ForceInline
2202     ShortVector
2203     unsliceTemplate(int origin, Vector<Short> w, int part) {
2204         ShortVector that = (ShortVector) w;
2205         that.check(this);
2206         Objects.checkIndex(origin, length() + 1);
2207         VectorShuffle<Short> iota = iotaShuffle();
2208         VectorMask<Short> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
2209                                                                   (broadcast((short)(origin))));
2210         iota = iotaShuffle(-origin, 1, true);
2211         return that.blend(this.rearrange(iota), blendMask);
2212     }
2213 
2214     /*package-private*/
2215     final
2216     @ForceInline
2217     <M extends VectorMask<Short>>
2218     ShortVector
2219     unsliceTemplate(Class<M> maskType, int origin, Vector<Short> w, int part, M m) {
2220         ShortVector that = (ShortVector) w;
2221         that.check(this);
2222         ShortVector slice = that.sliceTemplate(origin, that);
2223         slice = slice.blendTemplate(maskType, this, m);
2224         return slice.unsliceTemplate(origin, w, part);
2225     }
2226 
2227     /**
2228      * {@inheritDoc} <!--workaround-->
2229      */
2230     @Override
2231     public abstract
2232     ShortVector unslice(int origin, Vector<Short> w, int part, VectorMask<Short> m);
2233 
2234     /**
2235      * {@inheritDoc} <!--workaround-->
2236      */
2237     @Override
2238     public abstract
2239     ShortVector unslice(int origin);
2240 
2241     /*package-private*/
2242     final
2243     @ForceInline
2244     ShortVector
2245     unsliceTemplate(int origin) {
2246         Objects.checkIndex(origin, length() + 1);
2247         VectorShuffle<Short> iota = iotaShuffle();
2248         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.GE,
2249                                                                   (broadcast((short)(origin))));
2250         iota = iotaShuffle(-origin, 1, true);
2251         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2252     }
2253 
2254     private ArrayIndexOutOfBoundsException
2255     wrongPartForSlice(int part) {
2256         String msg = String.format("bad part number %d for slice operation",
2257                                    part);
2258         return new ArrayIndexOutOfBoundsException(msg);
2259     }
2260 
2261     /**
2262      * {@inheritDoc} <!--workaround-->
2263      */
2264     @Override
2265     public abstract
2266     ShortVector rearrange(VectorShuffle<Short> m);
2267 
2268     /*package-private*/
2269     @ForceInline
2270     final
2271     <S extends VectorShuffle<Short>>
2272     ShortVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2273         shuffle.checkIndexes();
2274         return VectorSupport.rearrangeOp(
2275             getClass(), shuffletype, null, short.class, length(),
2276             this, shuffle, null,
2277             (v1, s_, m_) -> v1.uOp((i, a) -> {
2278                 int ei = s_.laneSource(i);
2279                 return v1.lane(ei);
2280             }));
2281     }
2282 
2283     /**
2284      * {@inheritDoc} <!--workaround-->
2285      */
2286     @Override
2287     public abstract
2288     ShortVector rearrange(VectorShuffle<Short> s,
2289                                    VectorMask<Short> m);
2290 
2291     /*package-private*/
2292     @ForceInline
2293     final
2294     <S extends VectorShuffle<Short>, M extends VectorMask<Short>>
2295     ShortVector rearrangeTemplate(Class<S> shuffletype,
2296                                            Class<M> masktype,
2297                                            S shuffle,
2298                                            M m) {
2299 
2300         m.check(masktype, this);
2301         VectorMask<Short> valid = shuffle.laneIsValid();
2302         if (m.andNot(valid).anyTrue()) {
2303             shuffle.checkIndexes();
2304             throw new AssertionError();
2305         }
2306         return VectorSupport.rearrangeOp(
2307                    getClass(), shuffletype, masktype, short.class, length(),
2308                    this, shuffle, m,
2309                    (v1, s_, m_) -> v1.uOp((i, a) -> {
2310                         int ei = s_.laneSource(i);
2311                         return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
2312                    }));
2313     }
2314 
2315     /**
2316      * {@inheritDoc} <!--workaround-->
2317      */
2318     @Override
2319     public abstract
2320     ShortVector rearrange(VectorShuffle<Short> s,
2321                                    Vector<Short> v);
2322 
2323     /*package-private*/
2324     @ForceInline
2325     final
2326     <S extends VectorShuffle<Short>>
2327     ShortVector rearrangeTemplate(Class<S> shuffletype,
2328                                            S shuffle,
2329                                            ShortVector v) {
2330         VectorMask<Short> valid = shuffle.laneIsValid();
2331         @SuppressWarnings("unchecked")
2332         S ws = (S) shuffle.wrapIndexes();
2333         ShortVector r0 =
2334             VectorSupport.rearrangeOp(
2335                 getClass(), shuffletype, null, short.class, length(),
2336                 this, ws, null,
2337                 (v0, s_, m_) -> v0.uOp((i, a) -> {
2338                     int ei = s_.laneSource(i);
2339                     return v0.lane(ei);
2340                 }));
2341         ShortVector r1 =
2342             VectorSupport.rearrangeOp(
2343                 getClass(), shuffletype, null, short.class, length(),
2344                 v, ws, null,
2345                 (v1, s_, m_) -> v1.uOp((i, a) -> {
2346                     int ei = s_.laneSource(i);
2347                     return v1.lane(ei);
2348                 }));
2349         return r1.blend(r0, valid);
2350     }
2351 
2352     @ForceInline
2353     private final
2354     VectorShuffle<Short> toShuffle0(ShortSpecies dsp) {
2355         short[] a = toArray();
2356         int[] sa = new int[a.length];
2357         for (int i = 0; i < a.length; i++) {
2358             sa[i] = (int) a[i];
2359         }
2360         return VectorShuffle.fromArray(dsp, sa, 0);
2361     }
2362 
2363     /*package-private*/
2364     @ForceInline
2365     final
2366     VectorShuffle<Short> toShuffleTemplate(Class<?> shuffleType) {
2367         ShortSpecies vsp = vspecies();
2368         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2369                                      getClass(), short.class, length(),
2370                                      shuffleType, byte.class, length(),
2371                                      this, vsp,
2372                                      ShortVector::toShuffle0);
2373     }
2374 
2375     /**
2376      * {@inheritDoc} <!--workaround-->
2377      */
2378     @Override
2379     public abstract
2380     ShortVector selectFrom(Vector<Short> v);
2381 
2382     /*package-private*/
2383     @ForceInline
2384     final ShortVector selectFromTemplate(ShortVector v) {
2385         return v.rearrange(this.toShuffle());
2386     }
2387 
2388     /**
2389      * {@inheritDoc} <!--workaround-->
2390      */
2391     @Override
2392     public abstract
2393     ShortVector selectFrom(Vector<Short> s, VectorMask<Short> m);
2394 
2395     /*package-private*/
2396     @ForceInline
2397     final ShortVector selectFromTemplate(ShortVector v,
2398                                                   AbstractMask<Short> m) {
2399         return v.rearrange(this.toShuffle(), m);
2400     }
2401 
2402     /// Ternary operations
2403 
2404     /**
2405      * Blends together the bits of two vectors under
2406      * the control of a third, which supplies mask bits.
2407      *
2408      * This is a lane-wise ternary operation which performs
2409      * a bitwise blending operation {@code (a&~c)|(b&c)}
2410      * to each lane.
2411      *
2412      * This method is also equivalent to the expression
2413      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2414      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2415      *    BITWISE_BLEND}{@code , bits, mask)}.
2416      *
2417      * @param bits input bits to blend into the current vector
2418      * @param mask a bitwise mask to enable blending of the input bits
2419      * @return the bitwise blend of the given bits into the current vector,
2420      *         under control of the bitwise mask
2421      * @see #bitwiseBlend(short,short)
2422      * @see #bitwiseBlend(short,Vector)
2423      * @see #bitwiseBlend(Vector,short)
2424      * @see VectorOperators#BITWISE_BLEND
2425      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2426      */
2427     @ForceInline
2428     public final
2429     ShortVector bitwiseBlend(Vector<Short> bits, Vector<Short> mask) {
2430         return lanewise(BITWISE_BLEND, bits, mask);
2431     }
2432 
2433     /**
2434      * Blends together the bits of a vector and a scalar under
2435      * the control of another scalar, which supplies mask bits.
2436      *
2437      * This is a lane-wise ternary operation which performs
2438      * a bitwise blending operation {@code (a&~c)|(b&c)}
2439      * to each lane.
2440      *
2441      * This method is also equivalent to the expression
2442      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2443      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2444      *    BITWISE_BLEND}{@code , bits, mask)}.
2445      *
2446      * @param bits input bits to blend into the current vector
2447      * @param mask a bitwise mask to enable blending of the input bits
2448      * @return the bitwise blend of the given bits into the current vector,
2449      *         under control of the bitwise mask
2450      * @see #bitwiseBlend(Vector,Vector)
2451      * @see VectorOperators#BITWISE_BLEND
2452      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
2453      */
2454     @ForceInline
2455     public final
2456     ShortVector bitwiseBlend(short bits, short mask) {
2457         return lanewise(BITWISE_BLEND, bits, mask);
2458     }
2459 
2460     /**
2461      * Blends together the bits of a vector and a scalar under
2462      * the control of another vector, which supplies mask bits.
2463      *
2464      * This is a lane-wise ternary operation which performs
2465      * a bitwise blending operation {@code (a&~c)|(b&c)}
2466      * to each lane.
2467      *
2468      * This method is also equivalent to the expression
2469      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2470      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2471      *    BITWISE_BLEND}{@code , bits, mask)}.
2472      *
2473      * @param bits input bits to blend into the current vector
2474      * @param mask a bitwise mask to enable blending of the input bits
2475      * @return the bitwise blend of the given bits into the current vector,
2476      *         under control of the bitwise mask
2477      * @see #bitwiseBlend(Vector,Vector)
2478      * @see VectorOperators#BITWISE_BLEND
2479      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
2480      */
2481     @ForceInline
2482     public final
2483     ShortVector bitwiseBlend(short bits, Vector<Short> mask) {
2484         return lanewise(BITWISE_BLEND, bits, mask);
2485     }
2486 
2487     /**
2488      * Blends together the bits of two vectors under
2489      * the control of a scalar, which supplies mask bits.
2490      *
2491      * This is a lane-wise ternary operation which performs
2492      * a bitwise blending operation {@code (a&~c)|(b&c)}
2493      * to each lane.
2494      *
2495      * This method is also equivalent to the expression
2496      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2497      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2498      *    BITWISE_BLEND}{@code , bits, mask)}.
2499      *
2500      * @param bits input bits to blend into the current vector
2501      * @param mask a bitwise mask to enable blending of the input bits
2502      * @return the bitwise blend of the given bits into the current vector,
2503      *         under control of the bitwise mask
2504      * @see #bitwiseBlend(Vector,Vector)
2505      * @see VectorOperators#BITWISE_BLEND
2506      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
2507      */
2508     @ForceInline
2509     public final
2510     ShortVector bitwiseBlend(Vector<Short> bits, short mask) {
2511         return lanewise(BITWISE_BLEND, bits, mask);
2512     }
2513 
2514 
2515     // Type specific horizontal reductions
2516 
2517     /**
2518      * Returns a value accumulated from all the lanes of this vector.
2519      *
2520      * This is an associative cross-lane reduction operation which
2521      * applies the specified operation to all the lane elements.
2522      * <p>
2523      * A few reduction operations do not support arbitrary reordering
2524      * of their operands, yet are included here because of their
2525      * usefulness.
2526      * <ul>
2527      * <li>
2528      * In the case of {@code FIRST_NONZERO}, the reduction returns
2529      * the value from the lowest-numbered non-zero lane.
2530      * <li>
2531      * All other reduction operations are fully commutative and
2532      * associative.  The implementation can choose any order of
2533      * processing, yet it will always produce the same result.
2534      * </ul>
2535      *
2536      * @param op the operation used to combine lane values
2537      * @return the accumulated result
2538      * @throws UnsupportedOperationException if this vector does
2539      *         not support the requested operation
2540      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2541      * @see #add(Vector)
2542      * @see #mul(Vector)
2543      * @see #min(Vector)
2544      * @see #max(Vector)
2545      * @see #and(Vector)
2546      * @see #or(Vector)
2547      * @see VectorOperators#XOR
2548      * @see VectorOperators#FIRST_NONZERO
2549      */
2550     public abstract short reduceLanes(VectorOperators.Associative op);
2551 
2552     /**
2553      * Returns a value accumulated from selected lanes of this vector,
2554      * controlled by a mask.
2555      *
2556      * This is an associative cross-lane reduction operation which
2557      * applies the specified operation to the selected lane elements.
2558      * <p>
2559      * If no elements are selected, an operation-specific identity
2560      * value is returned.
2561      * <ul>
2562      * <li>
2563      * If the operation is
2564      *  {@code ADD}, {@code XOR}, {@code OR},
2565      * or {@code FIRST_NONZERO},
2566      * then the identity value is zero, the default {@code short} value.
2567      * <li>
2568      * If the operation is {@code MUL},
2569      * then the identity value is one.
2570      * <li>
2571      * If the operation is {@code AND},
2572      * then the identity value is minus one (all bits set).
2573      * <li>
2574      * If the operation is {@code MAX},
2575      * then the identity value is {@code Short.MIN_VALUE}.
2576      * <li>
2577      * If the operation is {@code MIN},
2578      * then the identity value is {@code Short.MAX_VALUE}.
2579      * </ul>
2580      * <p>
2581      * A few reduction operations do not support arbitrary reordering
2582      * of their operands, yet are included here because of their
2583      * usefulness.
2584      * <ul>
2585      * <li>
2586      * In the case of {@code FIRST_NONZERO}, the reduction returns
2587      * the value from the lowest-numbered non-zero lane.
2588      * <li>
2589      * All other reduction operations are fully commutative and
2590      * associative.  The implementation can choose any order of
2591      * processing, yet it will always produce the same result.
2592      * </ul>
2593      *
2594      * @param op the operation used to combine lane values
2595      * @param m the mask controlling lane selection
2596      * @return the reduced result accumulated from the selected lane values
2597      * @throws UnsupportedOperationException if this vector does
2598      *         not support the requested operation
2599      * @see #reduceLanes(VectorOperators.Associative)
2600      */
2601     public abstract short reduceLanes(VectorOperators.Associative op,
2602                                        VectorMask<Short> m);
2603 
2604     /*package-private*/
2605     @ForceInline
2606     final
2607     short reduceLanesTemplate(VectorOperators.Associative op,
2608                                Class<? extends VectorMask<Short>> maskClass,
2609                                VectorMask<Short> m) {
2610         m.check(maskClass, this);
2611         if (op == FIRST_NONZERO) {
2612             // FIXME:  The JIT should handle this.
2613             ShortVector v = broadcast((short) 0).blend(this, m);
2614             return v.reduceLanesTemplate(op);
2615         }
2616         int opc = opCode(op);
2617         return fromBits(VectorSupport.reductionCoerced(
2618             opc, getClass(), maskClass, short.class, length(),
2619             this, m,
2620             REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations)));
2621     }
2622 
2623     /*package-private*/
2624     @ForceInline
2625     final
2626     short reduceLanesTemplate(VectorOperators.Associative op) {
2627         if (op == FIRST_NONZERO) {
2628             // FIXME:  The JIT should handle this.
2629             VectorMask<Short> thisNZ
2630                 = this.viewAsIntegralLanes().compare(NE, (short) 0);
2631             int ft = thisNZ.firstTrue();
2632             return ft < length() ? this.lane(ft) : (short) 0;
2633         }
2634         int opc = opCode(op);
2635         return fromBits(VectorSupport.reductionCoerced(
2636             opc, getClass(), null, short.class, length(),
2637             this, null,
2638             REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations)));
2639     }
2640 
2641     private static final
2642     ImplCache<Associative, ReductionOperation<ShortVector, VectorMask<Short>>>
2643         REDUCE_IMPL = new ImplCache<>(Associative.class, ShortVector.class);
2644 
2645     private static ReductionOperation<ShortVector, VectorMask<Short>> reductionOperations(int opc_) {
2646         switch (opc_) {
2647             case VECTOR_OP_ADD: return (v, m) ->
2648                     toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a + b)));
2649             case VECTOR_OP_MUL: return (v, m) ->
2650                     toBits(v.rOp((short)1, m, (i, a, b) -> (short)(a * b)));
2651             case VECTOR_OP_MIN: return (v, m) ->
2652                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (short) Math.min(a, b)));
2653             case VECTOR_OP_MAX: return (v, m) ->
2654                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (short) Math.max(a, b)));
2655             case VECTOR_OP_AND: return (v, m) ->
2656                     toBits(v.rOp((short)-1, m, (i, a, b) -> (short)(a & b)));
2657             case VECTOR_OP_OR: return (v, m) ->
2658                     toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a | b)));
2659             case VECTOR_OP_XOR: return (v, m) ->
2660                     toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a ^ b)));
2661             default: return null;
2662         }
2663     }
2664 
2665     private static final short MIN_OR_INF = Short.MIN_VALUE;
2666     private static final short MAX_OR_INF = Short.MAX_VALUE;
2667 
2668     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2669     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2670                                                      VectorMask<Short> m);
2671 
2672     // Type specific accessors
2673 
2674     /**
2675      * Gets the lane element at lane index {@code i}
2676      *
2677      * @param i the lane index
2678      * @return the lane element at lane index {@code i}
2679      * @throws IllegalArgumentException if the index is is out of range
2680      * ({@code < 0 || >= length()})
2681      */
2682     public abstract short lane(int i);
2683 
2684     /**
2685      * Replaces the lane element of this vector at lane index {@code i} with
2686      * value {@code e}.
2687      *
2688      * This is a cross-lane operation and behaves as if it returns the result
2689      * of blending this vector with an input vector that is the result of
2690      * broadcasting {@code e} and a mask that has only one lane set at lane
2691      * index {@code i}.
2692      *
2693      * @param i the lane index of the lane element to be replaced
2694      * @param e the value to be placed
2695      * @return the result of replacing the lane element of this vector at lane
2696      * index {@code i} with value {@code e}.
2697      * @throws IllegalArgumentException if the index is is out of range
2698      * ({@code < 0 || >= length()})
2699      */
2700     public abstract ShortVector withLane(int i, short e);
2701 
2702     // Memory load operations
2703 
2704     /**
2705      * Returns an array of type {@code short[]}
2706      * containing all the lane values.
2707      * The array length is the same as the vector length.
2708      * The array elements are stored in lane order.
2709      * <p>
2710      * This method behaves as if it stores
2711      * this vector into an allocated array
2712      * (using {@link #intoArray(short[], int) intoArray})
2713      * and returns the array as follows:
2714      * <pre>{@code
2715      *   short[] a = new short[this.length()];
2716      *   this.intoArray(a, 0);
2717      *   return a;
2718      * }</pre>
2719      *
2720      * @return an array containing the lane values of this vector
2721      */
2722     @ForceInline
2723     @Override
2724     public final short[] toArray() {
2725         short[] a = new short[vspecies().laneCount()];
2726         intoArray(a, 0);
2727         return a;
2728     }
2729 
2730     /** {@inheritDoc} <!--workaround-->
2731      * @implNote
2732      * When this method is used on used on vectors
2733      * of type {@code ShortVector},
2734      * there will be no loss of precision or range,
2735      * and so no {@code UnsupportedOperationException} will
2736      * be thrown.
2737      */
2738     @ForceInline
2739     @Override
2740     public final int[] toIntArray() {
2741         short[] a = toArray();
2742         int[] res = new int[a.length];
2743         for (int i = 0; i < a.length; i++) {
2744             short e = a[i];
2745             res[i] = (int) ShortSpecies.toIntegralChecked(e, true);
2746         }
2747         return res;
2748     }
2749 
2750     /** {@inheritDoc} <!--workaround-->
2751      * @implNote
2752      * When this method is used on used on vectors
2753      * of type {@code ShortVector},
2754      * there will be no loss of precision or range,
2755      * and so no {@code UnsupportedOperationException} will
2756      * be thrown.
2757      */
2758     @ForceInline
2759     @Override
2760     public final long[] toLongArray() {
2761         short[] a = toArray();
2762         long[] res = new long[a.length];
2763         for (int i = 0; i < a.length; i++) {
2764             short e = a[i];
2765             res[i] = ShortSpecies.toIntegralChecked(e, false);
2766         }
2767         return res;
2768     }
2769 
2770     /** {@inheritDoc} <!--workaround-->
2771      * @implNote
2772      * When this method is used on used on vectors
2773      * of type {@code ShortVector},
2774      * there will be no loss of precision.
2775      */
2776     @ForceInline
2777     @Override
2778     public final double[] toDoubleArray() {
2779         short[] a = toArray();
2780         double[] res = new double[a.length];
2781         for (int i = 0; i < a.length; i++) {
2782             res[i] = (double) a[i];
2783         }
2784         return res;
2785     }
2786 
2787     /**
2788      * Loads a vector from a byte array starting at an offset.
2789      * Bytes are composed into primitive lane elements according
2790      * to the specified byte order.
2791      * The vector is arranged into lanes according to
2792      * <a href="Vector.html#lane-order">memory ordering</a>.
2793      * <p>
2794      * This method behaves as if it returns the result of calling
2795      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2796      * fromByteBuffer()} as follows:
2797      * <pre>{@code
2798      * var bb = ByteBuffer.wrap(a);
2799      * var m = species.maskAll(true);
2800      * return fromByteBuffer(species, bb, offset, bo, m);
2801      * }</pre>
2802      *
2803      * @param species species of desired vector
2804      * @param a the byte array
2805      * @param offset the offset into the array
2806      * @param bo the intended byte order
2807      * @return a vector loaded from a byte array
2808      * @throws IndexOutOfBoundsException
2809      *         if {@code offset+N*ESIZE < 0}
2810      *         or {@code offset+(N+1)*ESIZE > a.length}
2811      *         for any lane {@code N} in the vector
2812      */
2813     @ForceInline
2814     public static
2815     ShortVector fromByteArray(VectorSpecies<Short> species,
2816                                        byte[] a, int offset,
2817                                        ByteOrder bo) {
2818         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
2819         ShortSpecies vsp = (ShortSpecies) species;
2820         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
2821     }
2822 
2823     /**
2824      * Loads a vector from a byte array starting at an offset
2825      * and using a mask.
2826      * Lanes where the mask is unset are filled with the default
2827      * value of {@code short} (zero).
2828      * Bytes are composed into primitive lane elements according
2829      * to the specified byte order.
2830      * The vector is arranged into lanes according to
2831      * <a href="Vector.html#lane-order">memory ordering</a>.
2832      * <p>
2833      * This method behaves as if it returns the result of calling
2834      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2835      * fromByteBuffer()} as follows:
2836      * <pre>{@code
2837      * var bb = ByteBuffer.wrap(a);
2838      * return fromByteBuffer(species, bb, offset, bo, m);
2839      * }</pre>
2840      *
2841      * @param species species of desired vector
2842      * @param a the byte array
2843      * @param offset the offset into the array
2844      * @param bo the intended byte order
2845      * @param m the mask controlling lane selection
2846      * @return a vector loaded from a byte array
2847      * @throws IndexOutOfBoundsException
2848      *         if {@code offset+N*ESIZE < 0}
2849      *         or {@code offset+(N+1)*ESIZE > a.length}
2850      *         for any lane {@code N} in the vector
2851      *         where the mask is set
2852      */
2853     @ForceInline
2854     public static
2855     ShortVector fromByteArray(VectorSpecies<Short> species,
2856                                        byte[] a, int offset,
2857                                        ByteOrder bo,
2858                                        VectorMask<Short> m) {
2859         ShortSpecies vsp = (ShortSpecies) species;
2860         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2861             return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
2862         }
2863 
2864         // FIXME: optimize
2865         checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
2866         ByteBuffer wb = wrapper(a, bo);
2867         return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
2868                    (wb_, o, i)  -> wb_.getShort(o + i * 2));
2869     }
2870 
2871     /**
2872      * Loads a vector from an array of type {@code short[]}
2873      * starting at an offset.
2874      * For each vector lane, where {@code N} is the vector lane index, the
2875      * array element at index {@code offset + N} is placed into the
2876      * resulting vector at lane index {@code N}.
2877      *
2878      * @param species species of desired vector
2879      * @param a the array
2880      * @param offset the offset into the array
2881      * @return the vector loaded from an array
2882      * @throws IndexOutOfBoundsException
2883      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2884      *         for any lane {@code N} in the vector
2885      */
2886     @ForceInline
2887     public static
2888     ShortVector fromArray(VectorSpecies<Short> species,
2889                                    short[] a, int offset) {
2890         offset = checkFromIndexSize(offset, species.length(), a.length);
2891         ShortSpecies vsp = (ShortSpecies) species;
2892         return vsp.dummyVector().fromArray0(a, offset);
2893     }
2894 
2895     /**
2896      * Loads a vector from an array of type {@code short[]}
2897      * starting at an offset and using a mask.
2898      * Lanes where the mask is unset are filled with the default
2899      * value of {@code short} (zero).
2900      * For each vector lane, where {@code N} is the vector lane index,
2901      * if the mask lane at index {@code N} is set then the array element at
2902      * index {@code offset + N} is placed into the resulting vector at lane index
2903      * {@code N}, otherwise the default element value is placed into the
2904      * resulting vector at lane index {@code N}.
2905      *
2906      * @param species species of desired vector
2907      * @param a the array
2908      * @param offset the offset into the array
2909      * @param m the mask controlling lane selection
2910      * @return the vector loaded from an array
2911      * @throws IndexOutOfBoundsException
2912      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2913      *         for any lane {@code N} in the vector
2914      *         where the mask is set
2915      */
2916     @ForceInline
2917     public static
2918     ShortVector fromArray(VectorSpecies<Short> species,
2919                                    short[] a, int offset,
2920                                    VectorMask<Short> m) {
2921         ShortSpecies vsp = (ShortSpecies) species;
2922         if (offset >= 0 && offset <= (a.length - species.length())) {
2923             return vsp.dummyVector().fromArray0(a, offset, m);
2924         }
2925 
2926         // FIXME: optimize
2927         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2928         return vsp.vOp(m, i -> a[offset + i]);
2929     }
2930 
2931     /**
2932      * Gathers a new vector composed of elements from an array of type
2933      * {@code short[]},
2934      * using indexes obtained by adding a fixed {@code offset} to a
2935      * series of secondary offsets from an <em>index map</em>.
2936      * The index map is a contiguous sequence of {@code VLENGTH}
2937      * elements in a second array of {@code int}s, starting at a given
2938      * {@code mapOffset}.
2939      * <p>
2940      * For each vector lane, where {@code N} is the vector lane index,
2941      * the lane is loaded from the array
2942      * element {@code a[f(N)]}, where {@code f(N)} is the
2943      * index mapping expression
2944      * {@code offset + indexMap[mapOffset + N]]}.
2945      *
2946      * @param species species of desired vector
2947      * @param a the array
2948      * @param offset the offset into the array, may be negative if relative
2949      * indexes in the index map compensate to produce a value within the
2950      * array bounds
2951      * @param indexMap the index map
2952      * @param mapOffset the offset into the index map
2953      * @return the vector loaded from the indexed elements of the array
2954      * @throws IndexOutOfBoundsException
2955      *         if {@code mapOffset+N < 0}
2956      *         or if {@code mapOffset+N >= indexMap.length},
2957      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2958      *         is an invalid index into {@code a},
2959      *         for any lane {@code N} in the vector
2960      * @see ShortVector#toIntArray()
2961      */
2962     @ForceInline
2963     public static
2964     ShortVector fromArray(VectorSpecies<Short> species,
2965                                    short[] a, int offset,
2966                                    int[] indexMap, int mapOffset) {
2967         ShortSpecies vsp = (ShortSpecies) species;
2968         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
2969     }
2970 
2971     /**
2972      * Gathers a new vector composed of elements from an array of type
2973      * {@code short[]},
2974      * under the control of a mask, and
2975      * using indexes obtained by adding a fixed {@code offset} to a
2976      * series of secondary offsets from an <em>index map</em>.
2977      * The index map is a contiguous sequence of {@code VLENGTH}
2978      * elements in a second array of {@code int}s, starting at a given
2979      * {@code mapOffset}.
2980      * <p>
2981      * For each vector lane, where {@code N} is the vector lane index,
2982      * if the lane is set in the mask,
2983      * the lane is loaded from the array
2984      * element {@code a[f(N)]}, where {@code f(N)} is the
2985      * index mapping expression
2986      * {@code offset + indexMap[mapOffset + N]]}.
2987      * Unset lanes in the resulting vector are set to zero.
2988      *
2989      * @param species species of desired vector
2990      * @param a the array
2991      * @param offset the offset into the array, may be negative if relative
2992      * indexes in the index map compensate to produce a value within the
2993      * array bounds
2994      * @param indexMap the index map
2995      * @param mapOffset the offset into the index map
2996      * @param m the mask controlling lane selection
2997      * @return the vector loaded from the indexed elements of the array
2998      * @throws IndexOutOfBoundsException
2999      *         if {@code mapOffset+N < 0}
3000      *         or if {@code mapOffset+N >= indexMap.length},
3001      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3002      *         is an invalid index into {@code a},
3003      *         for any lane {@code N} in the vector
3004      *         where the mask is set
3005      * @see ShortVector#toIntArray()
3006      */
3007     @ForceInline
3008     public static
3009     ShortVector fromArray(VectorSpecies<Short> species,
3010                                    short[] a, int offset,
3011                                    int[] indexMap, int mapOffset,
3012                                    VectorMask<Short> m) {
3013         ShortSpecies vsp = (ShortSpecies) species;
3014         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
3015     }
3016 
3017     /**
3018      * Loads a vector from an array of type {@code char[]}
3019      * starting at an offset.
3020      * For each vector lane, where {@code N} is the vector lane index, the
3021      * array element at index {@code offset + N}
3022      * is first cast to a {@code short} value and then
3023      * placed into the resulting vector at lane index {@code N}.
3024      *
3025      * @param species species of desired vector
3026      * @param a the array
3027      * @param offset the offset into the array
3028      * @return the vector loaded from an array
3029      * @throws IndexOutOfBoundsException
3030      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3031      *         for any lane {@code N} in the vector
3032      */
3033     @ForceInline
3034     public static
3035     ShortVector fromCharArray(VectorSpecies<Short> species,
3036                                        char[] a, int offset) {
3037         offset = checkFromIndexSize(offset, species.length(), a.length);
3038         ShortSpecies vsp = (ShortSpecies) species;
3039         return vsp.dummyVector().fromCharArray0(a, offset);
3040     }
3041 
3042     /**
3043      * Loads a vector from an array of type {@code char[]}
3044      * starting at an offset and using a mask.
3045      * Lanes where the mask is unset are filled with the default
3046      * value of {@code short} (zero).
3047      * For each vector lane, where {@code N} is the vector lane index,
3048      * if the mask lane at index {@code N} is set then the array element at
3049      * index {@code offset + N}
3050      * is first cast to a {@code short} value and then
3051      * placed into the resulting vector at lane index
3052      * {@code N}, otherwise the default element value is placed into the
3053      * resulting vector at lane index {@code N}.
3054      *
3055      * @param species species of desired vector
3056      * @param a the array
3057      * @param offset the offset into the array
3058      * @param m the mask controlling lane selection
3059      * @return the vector loaded from an array
3060      * @throws IndexOutOfBoundsException
3061      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3062      *         for any lane {@code N} in the vector
3063      *         where the mask is set
3064      */
3065     @ForceInline
3066     public static
3067     ShortVector fromCharArray(VectorSpecies<Short> species,
3068                                        char[] a, int offset,
3069                                        VectorMask<Short> m) {
3070         ShortSpecies vsp = (ShortSpecies) species;
3071         if (offset >= 0 && offset <= (a.length - species.length())) {
3072             return vsp.dummyVector().fromCharArray0(a, offset, m);
3073         }
3074 
3075         // FIXME: optimize
3076         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3077         return vsp.vOp(m, i -> (short) a[offset + i]);
3078     }
3079 
3080     /**
3081      * Gathers a new vector composed of elements from an array of type
3082      * {@code char[]},
3083      * using indexes obtained by adding a fixed {@code offset} to a
3084      * series of secondary offsets from an <em>index map</em>.
3085      * The index map is a contiguous sequence of {@code VLENGTH}
3086      * elements in a second array of {@code int}s, starting at a given
3087      * {@code mapOffset}.
3088      * <p>
3089      * For each vector lane, where {@code N} is the vector lane index,
3090      * the lane is loaded from the expression
3091      * {@code (short) a[f(N)]}, where {@code f(N)} is the
3092      * index mapping expression
3093      * {@code offset + indexMap[mapOffset + N]]}.
3094      *
3095      * @param species species of desired vector
3096      * @param a the array
3097      * @param offset the offset into the array, may be negative if relative
3098      * indexes in the index map compensate to produce a value within the
3099      * array bounds
3100      * @param indexMap the index map
3101      * @param mapOffset the offset into the index map
3102      * @return the vector loaded from the indexed elements of the array
3103      * @throws IndexOutOfBoundsException
3104      *         if {@code mapOffset+N < 0}
3105      *         or if {@code mapOffset+N >= indexMap.length},
3106      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3107      *         is an invalid index into {@code a},
3108      *         for any lane {@code N} in the vector
3109      * @see ShortVector#toIntArray()
3110      */
3111     @ForceInline
3112     public static
3113     ShortVector fromCharArray(VectorSpecies<Short> species,
3114                                        char[] a, int offset,
3115                                        int[] indexMap, int mapOffset) {
3116         // FIXME: optimize
3117         ShortSpecies vsp = (ShortSpecies) species;
3118         return vsp.vOp(n -> (short) a[offset + indexMap[mapOffset + n]]);
3119     }
3120 
3121     /**
3122      * Gathers a new vector composed of elements from an array of type
3123      * {@code char[]},
3124      * under the control of a mask, and
3125      * using indexes obtained by adding a fixed {@code offset} to a
3126      * series of secondary offsets from an <em>index map</em>.
3127      * The index map is a contiguous sequence of {@code VLENGTH}
3128      * elements in a second array of {@code int}s, starting at a given
3129      * {@code mapOffset}.
3130      * <p>
3131      * For each vector lane, where {@code N} is the vector lane index,
3132      * if the lane is set in the mask,
3133      * the lane is loaded from the expression
3134      * {@code (short) a[f(N)]}, where {@code f(N)} is the
3135      * index mapping expression
3136      * {@code offset + indexMap[mapOffset + N]]}.
3137      * Unset lanes in the resulting vector are set to zero.
3138      *
3139      * @param species species of desired vector
3140      * @param a the array
3141      * @param offset the offset into the array, may be negative if relative
3142      * indexes in the index map compensate to produce a value within the
3143      * array bounds
3144      * @param indexMap the index map
3145      * @param mapOffset the offset into the index map
3146      * @param m the mask controlling lane selection
3147      * @return the vector loaded from the indexed elements of the array
3148      * @throws IndexOutOfBoundsException
3149      *         if {@code mapOffset+N < 0}
3150      *         or if {@code mapOffset+N >= indexMap.length},
3151      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3152      *         is an invalid index into {@code a},
3153      *         for any lane {@code N} in the vector
3154      *         where the mask is set
3155      * @see ShortVector#toIntArray()
3156      */
3157     @ForceInline
3158     public static
3159     ShortVector fromCharArray(VectorSpecies<Short> species,
3160                                        char[] a, int offset,
3161                                        int[] indexMap, int mapOffset,
3162                                        VectorMask<Short> m) {
3163         // FIXME: optimize
3164         ShortSpecies vsp = (ShortSpecies) species;
3165         return vsp.vOp(m, n -> (short) a[offset + indexMap[mapOffset + n]]);
3166     }
3167 
3168 
3169     /**
3170      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3171      * starting at an offset into the byte buffer.
3172      * Bytes are composed into primitive lane elements according
3173      * to the specified byte order.
3174      * The vector is arranged into lanes according to
3175      * <a href="Vector.html#lane-order">memory ordering</a>.
3176      * <p>
3177      * This method behaves as if it returns the result of calling
3178      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3179      * fromByteBuffer()} as follows:
3180      * <pre>{@code
3181      * var m = species.maskAll(true);
3182      * return fromByteBuffer(species, bb, offset, bo, m);
3183      * }</pre>
3184      *
3185      * @param species species of desired vector
3186      * @param bb the byte buffer
3187      * @param offset the offset into the byte buffer
3188      * @param bo the intended byte order
3189      * @return a vector loaded from a byte buffer
3190      * @throws IndexOutOfBoundsException
3191      *         if {@code offset+N*2 < 0}
3192      *         or {@code offset+N*2 >= bb.limit()}
3193      *         for any lane {@code N} in the vector
3194      */
3195     @ForceInline
3196     public static
3197     ShortVector fromByteBuffer(VectorSpecies<Short> species,
3198                                         ByteBuffer bb, int offset,
3199                                         ByteOrder bo) {
3200         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
3201         ShortSpecies vsp = (ShortSpecies) species;
3202         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
3203     }
3204 
3205     /**
3206      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3207      * starting at an offset into the byte buffer
3208      * and using a mask.
3209      * Lanes where the mask is unset are filled with the default
3210      * value of {@code short} (zero).
3211      * Bytes are composed into primitive lane elements according
3212      * to the specified byte order.
3213      * The vector is arranged into lanes according to
3214      * <a href="Vector.html#lane-order">memory ordering</a>.
3215      * <p>
3216      * The following pseudocode illustrates the behavior:
3217      * <pre>{@code
3218      * ShortBuffer eb = bb.duplicate()
3219      *     .position(offset)
3220      *     .order(bo).asShortBuffer();
3221      * short[] ar = new short[species.length()];
3222      * for (int n = 0; n < ar.length; n++) {
3223      *     if (m.laneIsSet(n)) {
3224      *         ar[n] = eb.get(n);
3225      *     }
3226      * }
3227      * ShortVector r = ShortVector.fromArray(species, ar, 0);
3228      * }</pre>
3229      * @implNote
3230      * This operation is likely to be more efficient if
3231      * the specified byte order is the same as
3232      * {@linkplain ByteOrder#nativeOrder()
3233      * the platform native order},
3234      * since this method will not need to reorder
3235      * the bytes of lane values.
3236      *
3237      * @param species species of desired vector
3238      * @param bb the byte buffer
3239      * @param offset the offset into the byte buffer
3240      * @param bo the intended byte order
3241      * @param m the mask controlling lane selection
3242      * @return a vector loaded from a byte buffer
3243      * @throws IndexOutOfBoundsException
3244      *         if {@code offset+N*2 < 0}
3245      *         or {@code offset+N*2 >= bb.limit()}
3246      *         for any lane {@code N} in the vector
3247      *         where the mask is set
3248      */
3249     @ForceInline
3250     public static
3251     ShortVector fromByteBuffer(VectorSpecies<Short> species,
3252                                         ByteBuffer bb, int offset,
3253                                         ByteOrder bo,
3254                                         VectorMask<Short> m) {
3255         ShortSpecies vsp = (ShortSpecies) species;
3256         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
3257             return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
3258         }
3259 
3260         // FIXME: optimize
3261         checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
3262         ByteBuffer wb = wrapper(bb, bo);
3263         return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
3264                    (wb_, o, i)  -> wb_.getShort(o + i * 2));
3265     }
3266 
3267     // Memory store operations
3268 
3269     /**
3270      * Stores this vector into an array of type {@code short[]}
3271      * starting at an offset.
3272      * <p>
3273      * For each vector lane, where {@code N} is the vector lane index,
3274      * the lane element at index {@code N} is stored into the array
3275      * element {@code a[offset+N]}.
3276      *
3277      * @param a the array, of type {@code short[]}
3278      * @param offset the offset into the array
3279      * @throws IndexOutOfBoundsException
3280      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3281      *         for any lane {@code N} in the vector
3282      */
3283     @ForceInline
3284     public final
3285     void intoArray(short[] a, int offset) {
3286         offset = checkFromIndexSize(offset, length(), a.length);
3287         ShortSpecies vsp = vspecies();
3288         VectorSupport.store(
3289             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3290             a, arrayAddress(a, offset),
3291             this,
3292             a, offset,
3293             (arr, off, v)
3294             -> v.stOp(arr, off,
3295                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3296     }
3297 
3298     /**
3299      * Stores this vector into an array of type {@code short[]}
3300      * starting at offset and using a mask.
3301      * <p>
3302      * For each vector lane, where {@code N} is the vector lane index,
3303      * the lane element at index {@code N} is stored into the array
3304      * element {@code a[offset+N]}.
3305      * If the mask lane at {@code N} is unset then the corresponding
3306      * array element {@code a[offset+N]} is left unchanged.
3307      * <p>
3308      * Array range checking is done for lanes where the mask is set.
3309      * Lanes where the mask is unset are not stored and do not need
3310      * to correspond to legitimate elements of {@code a}.
3311      * That is, unset lanes may correspond to array indexes less than
3312      * zero or beyond the end of the array.
3313      *
3314      * @param a the array, of type {@code short[]}
3315      * @param offset the offset into the array
3316      * @param m the mask controlling lane storage
3317      * @throws IndexOutOfBoundsException
3318      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3319      *         for any lane {@code N} in the vector
3320      *         where the mask is set
3321      */
3322     @ForceInline
3323     public final
3324     void intoArray(short[] a, int offset,
3325                    VectorMask<Short> m) {
3326         if (m.allTrue()) {
3327             intoArray(a, offset);
3328         } else {
3329             ShortSpecies vsp = vspecies();
3330             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3331             intoArray0(a, offset, m);
3332         }
3333     }
3334 
3335     /**
3336      * Scatters this vector into an array of type {@code short[]}
3337      * using indexes obtained by adding a fixed {@code offset} to a
3338      * series of secondary offsets from an <em>index map</em>.
3339      * The index map is a contiguous sequence of {@code VLENGTH}
3340      * elements in a second array of {@code int}s, starting at a given
3341      * {@code mapOffset}.
3342      * <p>
3343      * For each vector lane, where {@code N} is the vector lane index,
3344      * the lane element at index {@code N} is stored into the array
3345      * element {@code a[f(N)]}, where {@code f(N)} is the
3346      * index mapping expression
3347      * {@code offset + indexMap[mapOffset + N]]}.
3348      *
3349      * @param a the array
3350      * @param offset an offset to combine with the index map offsets
3351      * @param indexMap the index map
3352      * @param mapOffset the offset into the index map
3353      * @throws IndexOutOfBoundsException
3354      *         if {@code mapOffset+N < 0}
3355      *         or if {@code mapOffset+N >= indexMap.length},
3356      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3357      *         is an invalid index into {@code a},
3358      *         for any lane {@code N} in the vector
3359      * @see ShortVector#toIntArray()
3360      */
3361     @ForceInline
3362     public final
3363     void intoArray(short[] a, int offset,
3364                    int[] indexMap, int mapOffset) {
3365         stOp(a, offset,
3366              (arr, off, i, e) -> {
3367                  int j = indexMap[mapOffset + i];
3368                  arr[off + j] = e;
3369              });
3370     }
3371 
3372     /**
3373      * Scatters this vector into an array of type {@code short[]},
3374      * under the control of a mask, and
3375      * using indexes obtained by adding a fixed {@code offset} to a
3376      * series of secondary offsets from an <em>index map</em>.
3377      * The index map is a contiguous sequence of {@code VLENGTH}
3378      * elements in a second array of {@code int}s, starting at a given
3379      * {@code mapOffset}.
3380      * <p>
3381      * For each vector lane, where {@code N} is the vector lane index,
3382      * if the mask lane at index {@code N} is set then
3383      * the lane element at index {@code N} is stored into the array
3384      * element {@code a[f(N)]}, where {@code f(N)} is the
3385      * index mapping expression
3386      * {@code offset + indexMap[mapOffset + N]]}.
3387      *
3388      * @param a the array
3389      * @param offset an offset to combine with the index map offsets
3390      * @param indexMap the index map
3391      * @param mapOffset the offset into the index map
3392      * @param m the mask
3393      * @throws IndexOutOfBoundsException
3394      *         if {@code mapOffset+N < 0}
3395      *         or if {@code mapOffset+N >= indexMap.length},
3396      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3397      *         is an invalid index into {@code a},
3398      *         for any lane {@code N} in the vector
3399      *         where the mask is set
3400      * @see ShortVector#toIntArray()
3401      */
3402     @ForceInline
3403     public final
3404     void intoArray(short[] a, int offset,
3405                    int[] indexMap, int mapOffset,
3406                    VectorMask<Short> m) {
3407         stOp(a, offset, m,
3408              (arr, off, i, e) -> {
3409                  int j = indexMap[mapOffset + i];
3410                  arr[off + j] = e;
3411              });
3412     }
3413 
3414     /**
3415      * Stores this vector into an array of type {@code char[]}
3416      * starting at an offset.
3417      * <p>
3418      * For each vector lane, where {@code N} is the vector lane index,
3419      * the lane element at index {@code N}
3420      * is first cast to a {@code char} value and then
3421      * stored into the array element {@code a[offset+N]}.
3422      *
3423      * @param a the array, of type {@code char[]}
3424      * @param offset the offset into the array
3425      * @throws IndexOutOfBoundsException
3426      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3427      *         for any lane {@code N} in the vector
3428      */
3429     @ForceInline
3430     public final
3431     void intoCharArray(char[] a, int offset) {
3432         offset = checkFromIndexSize(offset, length(), a.length);
3433         ShortSpecies vsp = vspecies();
3434         VectorSupport.store(
3435             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3436             a, charArrayAddress(a, offset),
3437             this,
3438             a, offset,
3439             (arr, off, v)
3440             -> v.stOp(arr, off,
3441                       (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
3442     }
3443 
3444     /**
3445      * Stores this vector into an array of type {@code char[]}
3446      * starting at offset and using a mask.
3447      * <p>
3448      * For each vector lane, where {@code N} is the vector lane index,
3449      * the lane element at index {@code N}
3450      * is first cast to a {@code char} value and then
3451      * stored into the array element {@code a[offset+N]}.
3452      * If the mask lane at {@code N} is unset then the corresponding
3453      * array element {@code a[offset+N]} is left unchanged.
3454      * <p>
3455      * Array range checking is done for lanes where the mask is set.
3456      * Lanes where the mask is unset are not stored and do not need
3457      * to correspond to legitimate elements of {@code a}.
3458      * That is, unset lanes may correspond to array indexes less than
3459      * zero or beyond the end of the array.
3460      *
3461      * @param a the array, of type {@code char[]}
3462      * @param offset the offset into the array
3463      * @param m the mask controlling lane storage
3464      * @throws IndexOutOfBoundsException
3465      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3466      *         for any lane {@code N} in the vector
3467      *         where the mask is set
3468      */
3469     @ForceInline
3470     public final
3471     void intoCharArray(char[] a, int offset,
3472                        VectorMask<Short> m) {
3473         if (m.allTrue()) {
3474             intoCharArray(a, offset);
3475         } else {
3476             ShortSpecies vsp = vspecies();
3477             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3478             intoCharArray0(a, offset, m);
3479         }
3480     }
3481 
3482     /**
3483      * Scatters this vector into an array of type {@code char[]}
3484      * using indexes obtained by adding a fixed {@code offset} to a
3485      * series of secondary offsets from an <em>index map</em>.
3486      * The index map is a contiguous sequence of {@code VLENGTH}
3487      * elements in a second array of {@code int}s, starting at a given
3488      * {@code mapOffset}.
3489      * <p>
3490      * For each vector lane, where {@code N} is the vector lane index,
3491      * the lane element at index {@code N}
3492      * is first cast to a {@code char} value and then
3493      * stored into the array
3494      * element {@code a[f(N)]}, where {@code f(N)} is the
3495      * index mapping expression
3496      * {@code offset + indexMap[mapOffset + N]]}.
3497      *
3498      * @param a the array
3499      * @param offset an offset to combine with the index map offsets
3500      * @param indexMap the index map
3501      * @param mapOffset the offset into the index map
3502      * @throws IndexOutOfBoundsException
3503      *         if {@code mapOffset+N < 0}
3504      *         or if {@code mapOffset+N >= indexMap.length},
3505      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3506      *         is an invalid index into {@code a},
3507      *         for any lane {@code N} in the vector
3508      * @see ShortVector#toIntArray()
3509      */
3510     @ForceInline
3511     public final
3512     void intoCharArray(char[] a, int offset,
3513                        int[] indexMap, int mapOffset) {
3514         // FIXME: optimize
3515         stOp(a, offset,
3516              (arr, off, i, e) -> {
3517                  int j = indexMap[mapOffset + i];
3518                  arr[off + j] = (char) e;
3519              });
3520     }
3521 
3522     /**
3523      * Scatters this vector into an array of type {@code char[]},
3524      * under the control of a mask, and
3525      * using indexes obtained by adding a fixed {@code offset} to a
3526      * series of secondary offsets from an <em>index map</em>.
3527      * The index map is a contiguous sequence of {@code VLENGTH}
3528      * elements in a second array of {@code int}s, starting at a given
3529      * {@code mapOffset}.
3530      * <p>
3531      * For each vector lane, where {@code N} is the vector lane index,
3532      * if the mask lane at index {@code N} is set then
3533      * the lane element at index {@code N}
3534      * is first cast to a {@code char} value and then
3535      * stored into the array
3536      * element {@code a[f(N)]}, where {@code f(N)} is the
3537      * index mapping expression
3538      * {@code offset + indexMap[mapOffset + N]]}.
3539      *
3540      * @param a the array
3541      * @param offset an offset to combine with the index map offsets
3542      * @param indexMap the index map
3543      * @param mapOffset the offset into the index map
3544      * @param m the mask
3545      * @throws IndexOutOfBoundsException
3546      *         if {@code mapOffset+N < 0}
3547      *         or if {@code mapOffset+N >= indexMap.length},
3548      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3549      *         is an invalid index into {@code a},
3550      *         for any lane {@code N} in the vector
3551      *         where the mask is set
3552      * @see ShortVector#toIntArray()
3553      */
3554     @ForceInline
3555     public final
3556     void intoCharArray(char[] a, int offset,
3557                        int[] indexMap, int mapOffset,
3558                        VectorMask<Short> m) {
3559         // FIXME: optimize
3560         stOp(a, offset, m,
3561              (arr, off, i, e) -> {
3562                  int j = indexMap[mapOffset + i];
3563                  arr[off + j] = (char) e;
3564              });
3565     }
3566 
3567 
3568     /**
3569      * {@inheritDoc} <!--workaround-->
3570      */
3571     @Override
3572     @ForceInline
3573     public final
3574     void intoByteArray(byte[] a, int offset,
3575                        ByteOrder bo) {
3576         offset = checkFromIndexSize(offset, byteSize(), a.length);
3577         maybeSwap(bo).intoByteArray0(a, offset);
3578     }
3579 
3580     /**
3581      * {@inheritDoc} <!--workaround-->
3582      */
3583     @Override
3584     @ForceInline
3585     public final
3586     void intoByteArray(byte[] a, int offset,
3587                        ByteOrder bo,
3588                        VectorMask<Short> m) {
3589         if (m.allTrue()) {
3590             intoByteArray(a, offset, bo);
3591         } else {
3592             ShortSpecies vsp = vspecies();
3593             checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
3594             maybeSwap(bo).intoByteArray0(a, offset, m);
3595         }
3596     }
3597 
3598     /**
3599      * {@inheritDoc} <!--workaround-->
3600      */
3601     @Override
3602     @ForceInline
3603     public final
3604     void intoByteBuffer(ByteBuffer bb, int offset,
3605                         ByteOrder bo) {
3606         if (ScopedMemoryAccess.isReadOnly(bb)) {
3607             throw new ReadOnlyBufferException();
3608         }
3609         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3610         maybeSwap(bo).intoByteBuffer0(bb, offset);
3611     }
3612 
3613     /**
3614      * {@inheritDoc} <!--workaround-->
3615      */
3616     @Override
3617     @ForceInline
3618     public final
3619     void intoByteBuffer(ByteBuffer bb, int offset,
3620                         ByteOrder bo,
3621                         VectorMask<Short> m) {
3622         if (m.allTrue()) {
3623             intoByteBuffer(bb, offset, bo);
3624         } else {
3625             if (bb.isReadOnly()) {
3626                 throw new ReadOnlyBufferException();
3627             }
3628             ShortSpecies vsp = vspecies();
3629             checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
3630             maybeSwap(bo).intoByteBuffer0(bb, offset, m);
3631         }
3632     }
3633 
3634     // ================================================
3635 
3636     // Low-level memory operations.
3637     //
3638     // Note that all of these operations *must* inline into a context
3639     // where the exact species of the involved vector is a
3640     // compile-time constant.  Otherwise, the intrinsic generation
3641     // will fail and performance will suffer.
3642     //
3643     // In many cases this is achieved by re-deriving a version of the
3644     // method in each concrete subclass (per species).  The re-derived
3645     // method simply calls one of these generic methods, with exact
3646     // parameters for the controlling metadata, which is either a
3647     // typed vector or constant species instance.
3648 
3649     // Unchecked loading operations in native byte order.
3650     // Caller is responsible for applying index checks, masking, and
3651     // byte swapping.
3652 
3653     /*package-private*/
3654     abstract
3655     ShortVector fromArray0(short[] a, int offset);
3656     @ForceInline
3657     final
3658     ShortVector fromArray0Template(short[] a, int offset) {
3659         ShortSpecies vsp = vspecies();
3660         return VectorSupport.load(
3661             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3662             a, arrayAddress(a, offset),
3663             a, offset, vsp,
3664             (arr, off, s) -> s.ldOp(arr, off,
3665                                     (arr_, off_, i) -> arr_[off_ + i]));
3666     }
3667 
3668     /*package-private*/
3669     abstract
3670     ShortVector fromArray0(short[] a, int offset, VectorMask<Short> m);
3671     @ForceInline
3672     final
3673     <M extends VectorMask<Short>>
3674     ShortVector fromArray0Template(Class<M> maskClass, short[] a, int offset, M m) {
3675         m.check(species());
3676         ShortSpecies vsp = vspecies();
3677         return VectorSupport.loadMasked(
3678             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3679             a, arrayAddress(a, offset), m,
3680             a, offset, vsp,
3681             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3682                                         (arr_, off_, i) -> arr_[off_ + i]));
3683     }
3684 
3685 
3686     /*package-private*/
3687     abstract
3688     ShortVector fromCharArray0(char[] a, int offset);
3689     @ForceInline
3690     final
3691     ShortVector fromCharArray0Template(char[] a, int offset) {
3692         ShortSpecies vsp = vspecies();
3693         return VectorSupport.load(
3694             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3695             a, charArrayAddress(a, offset),
3696             a, offset, vsp,
3697             (arr, off, s) -> s.ldOp(arr, off,
3698                                     (arr_, off_, i) -> (short) arr_[off_ + i]));
3699     }
3700 
3701     /*package-private*/
3702     abstract
3703     ShortVector fromCharArray0(char[] a, int offset, VectorMask<Short> m);
3704     @ForceInline
3705     final
3706     <M extends VectorMask<Short>>
3707     ShortVector fromCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
3708         m.check(species());
3709         ShortSpecies vsp = vspecies();
3710         return VectorSupport.loadMasked(
3711                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3712                 a, charArrayAddress(a, offset), m,
3713                 a, offset, vsp,
3714                 (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3715                                             (arr_, off_, i) -> (short) arr_[off_ + i]));
3716     }
3717 
3718 
3719     @Override
3720     abstract
3721     ShortVector fromByteArray0(byte[] a, int offset);
3722     @ForceInline
3723     final
3724     ShortVector fromByteArray0Template(byte[] a, int offset) {
3725         ShortSpecies vsp = vspecies();
3726         return VectorSupport.load(
3727             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3728             a, byteArrayAddress(a, offset),
3729             a, offset, vsp,
3730             (arr, off, s) -> {
3731                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3732                 return s.ldOp(wb, off,
3733                         (wb_, o, i) -> wb_.getShort(o + i * 2));
3734             });
3735     }
3736 
3737     abstract
3738     ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m);
3739     @ForceInline
3740     final
3741     <M extends VectorMask<Short>>
3742     ShortVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3743         ShortSpecies vsp = vspecies();
3744         m.check(vsp);
3745         return VectorSupport.loadMasked(
3746             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3747             a, byteArrayAddress(a, offset), m,
3748             a, offset, vsp,
3749             (arr, off, s, vm) -> {
3750                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3751                 return s.ldOp(wb, off, vm,
3752                         (wb_, o, i) -> wb_.getShort(o + i * 2));
3753             });
3754     }
3755 
3756     abstract
3757     ShortVector fromByteBuffer0(ByteBuffer bb, int offset);
3758     @ForceInline
3759     final
3760     ShortVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3761         ShortSpecies vsp = vspecies();
3762         return ScopedMemoryAccess.loadFromByteBuffer(
3763                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3764                 bb, offset, vsp,
3765                 (buf, off, s) -> {
3766                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3767                     return s.ldOp(wb, off,
3768                             (wb_, o, i) -> wb_.getShort(o + i * 2));
3769                 });
3770     }
3771 
3772     abstract
3773     ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m);
3774     @ForceInline
3775     final
3776     <M extends VectorMask<Short>>
3777     ShortVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3778         ShortSpecies vsp = vspecies();
3779         m.check(vsp);
3780         return ScopedMemoryAccess.loadFromByteBufferMasked(
3781                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3782                 bb, offset, m, vsp,
3783                 (buf, off, s, vm) -> {
3784                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3785                     return s.ldOp(wb, off, vm,
3786                             (wb_, o, i) -> wb_.getShort(o + i * 2));
3787                 });
3788     }
3789 
3790     // Unchecked storing operations in native byte order.
3791     // Caller is responsible for applying index checks, masking, and
3792     // byte swapping.
3793 
3794     abstract
3795     void intoArray0(short[] a, int offset);
3796     @ForceInline
3797     final
3798     void intoArray0Template(short[] a, int offset) {
3799         ShortSpecies vsp = vspecies();
3800         VectorSupport.store(
3801             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3802             a, arrayAddress(a, offset),
3803             this, a, offset,
3804             (arr, off, v)
3805             -> v.stOp(arr, off,
3806                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3807     }
3808 
3809     abstract
3810     void intoArray0(short[] a, int offset, VectorMask<Short> m);
3811     @ForceInline
3812     final
3813     <M extends VectorMask<Short>>
3814     void intoArray0Template(Class<M> maskClass, short[] a, int offset, M m) {
3815         m.check(species());
3816         ShortSpecies vsp = vspecies();
3817         VectorSupport.storeMasked(
3818             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3819             a, arrayAddress(a, offset),
3820             this, m, a, offset,
3821             (arr, off, v, vm)
3822             -> v.stOp(arr, off, vm,
3823                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3824     }
3825 
3826 
3827 
3828     abstract
3829     void intoByteArray0(byte[] a, int offset);
3830     @ForceInline
3831     final
3832     void intoByteArray0Template(byte[] a, int offset) {
3833         ShortSpecies vsp = vspecies();
3834         VectorSupport.store(
3835             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3836             a, byteArrayAddress(a, offset),
3837             this, a, offset,
3838             (arr, off, v) -> {
3839                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3840                 v.stOp(wb, off,
3841                         (tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
3842             });
3843     }
3844 
3845     abstract
3846     void intoByteArray0(byte[] a, int offset, VectorMask<Short> m);
3847     @ForceInline
3848     final
3849     <M extends VectorMask<Short>>
3850     void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3851         ShortSpecies vsp = vspecies();
3852         m.check(vsp);
3853         VectorSupport.storeMasked(
3854             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3855             a, byteArrayAddress(a, offset),
3856             this, m, a, offset,
3857             (arr, off, v, vm) -> {
3858                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3859                 v.stOp(wb, off, vm,
3860                         (tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
3861             });
3862     }
3863 
3864     @ForceInline
3865     final
3866     void intoByteBuffer0(ByteBuffer bb, int offset) {
3867         ShortSpecies vsp = vspecies();
3868         ScopedMemoryAccess.storeIntoByteBuffer(
3869                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3870                 this, bb, offset,
3871                 (buf, off, v) -> {
3872                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3873                     v.stOp(wb, off,
3874                             (wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
3875                 });
3876     }
3877 
3878     abstract
3879     void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m);
3880     @ForceInline
3881     final
3882     <M extends VectorMask<Short>>
3883     void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3884         ShortSpecies vsp = vspecies();
3885         m.check(vsp);
3886         ScopedMemoryAccess.storeIntoByteBufferMasked(
3887                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3888                 this, m, bb, offset,
3889                 (buf, off, v, vm) -> {
3890                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3891                     v.stOp(wb, off, vm,
3892                             (wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
3893                 });
3894     }
3895 
3896     /*package-private*/
3897     abstract
3898     void intoCharArray0(char[] a, int offset, VectorMask<Short> m);
3899     @ForceInline
3900     final
3901     <M extends VectorMask<Short>>
3902     void intoCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
3903         m.check(species());
3904         ShortSpecies vsp = vspecies();
3905         VectorSupport.storeMasked(
3906             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3907             a, charArrayAddress(a, offset),
3908             this, m, a, offset,
3909             (arr, off, v, vm)
3910             -> v.stOp(arr, off, vm,
3911                       (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
3912     }
3913 
3914     // End of low-level memory operations.
3915 
3916     private static
3917     void checkMaskFromIndexSize(int offset,
3918                                 ShortSpecies vsp,
3919                                 VectorMask<Short> m,
3920                                 int scale,
3921                                 int limit) {
3922         ((AbstractMask<Short>)m)
3923             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3924     }
3925 
3926     @ForceInline
3927     private void conditionalStoreNYI(int offset,
3928                                      ShortSpecies vsp,
3929                                      VectorMask<Short> m,
3930                                      int scale,
3931                                      int limit) {
3932         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3933             String msg =
3934                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3935                               offset, limit, m, vsp);
3936             throw new AssertionError(msg);
3937         }
3938     }
3939 
3940     /*package-private*/
3941     @Override
3942     @ForceInline
3943     final
3944     ShortVector maybeSwap(ByteOrder bo) {
3945         if (bo != NATIVE_ENDIAN) {
3946             return this.reinterpretAsBytes()
3947                 .rearrange(swapBytesShuffle())
3948                 .reinterpretAsShorts();
3949         }
3950         return this;
3951     }
3952 
3953     static final int ARRAY_SHIFT =
3954         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_SHORT_INDEX_SCALE);
3955     static final long ARRAY_BASE =
3956         Unsafe.ARRAY_SHORT_BASE_OFFSET;
3957 
3958     @ForceInline
3959     static long arrayAddress(short[] a, int index) {
3960         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3961     }
3962 
3963     static final int ARRAY_CHAR_SHIFT =
3964             31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_CHAR_INDEX_SCALE);
3965     static final long ARRAY_CHAR_BASE =
3966             Unsafe.ARRAY_CHAR_BASE_OFFSET;
3967 
3968     @ForceInline
3969     static long charArrayAddress(char[] a, int index) {
3970         return ARRAY_CHAR_BASE + (((long)index) << ARRAY_CHAR_SHIFT);
3971     }
3972 
3973 
3974     @ForceInline
3975     static long byteArrayAddress(byte[] a, int index) {
3976         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3977     }
3978 
3979     // ================================================
3980 
3981     /// Reinterpreting view methods:
3982     //   lanewise reinterpret: viewAsXVector()
3983     //   keep shape, redraw lanes: reinterpretAsEs()
3984 
3985     /**
3986      * {@inheritDoc} <!--workaround-->
3987      */
3988     @ForceInline
3989     @Override
3990     public final ByteVector reinterpretAsBytes() {
3991          // Going to ByteVector, pay close attention to byte order.
3992          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
3993          return asByteVectorRaw();
3994          //return asByteVectorRaw().rearrange(swapBytesShuffle());
3995     }
3996 
3997     /**
3998      * {@inheritDoc} <!--workaround-->
3999      */
4000     @ForceInline
4001     @Override
4002     public final ShortVector viewAsIntegralLanes() {
4003         return this;
4004     }
4005 
4006     /**
4007      * {@inheritDoc} <!--workaround-->
4008      *
4009      * @implNote This method always throws
4010      * {@code UnsupportedOperationException}, because there is no floating
4011      * point type of the same size as {@code short}.  The return type
4012      * of this method is arbitrarily designated as
4013      * {@code Vector<?>}.  Future versions of this API may change the return
4014      * type if additional floating point types become available.
4015      */
4016     @ForceInline
4017     @Override
4018     public final
4019     Vector<?>
4020     viewAsFloatingLanes() {
4021         LaneType flt = LaneType.SHORT.asFloating();
4022         // asFloating() will throw UnsupportedOperationException for the unsupported type short
4023         throw new AssertionError("Cannot reach here");
4024     }
4025 
4026     // ================================================
4027 
4028     /// Object methods: toString, equals, hashCode
4029     //
4030     // Object methods are defined as if via Arrays.toString, etc.,
4031     // is applied to the array of elements.  Two equal vectors
4032     // are required to have equal species and equal lane values.
4033 
4034     /**
4035      * Returns a string representation of this vector, of the form
4036      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
4037      * in lane order.
4038      *
4039      * The string is produced as if by a call to {@link
4040      * java.util.Arrays#toString(short[]) Arrays.toString()},
4041      * as appropriate to the {@code short} array returned by
4042      * {@link #toArray this.toArray()}.
4043      *
4044      * @return a string of the form {@code "[0,1,2...]"}
4045      * reporting the lane values of this vector
4046      */
4047     @Override
4048     @ForceInline
4049     public final
4050     String toString() {
4051         // now that toArray is strongly typed, we can define this
4052         return Arrays.toString(toArray());
4053     }
4054 
4055     /**
4056      * {@inheritDoc} <!--workaround-->
4057      */
4058     @Override
4059     @ForceInline
4060     public final
4061     boolean equals(Object obj) {
4062         if (obj instanceof Vector) {
4063             Vector<?> that = (Vector<?>) obj;
4064             if (this.species().equals(that.species())) {
4065                 return this.eq(that.check(this.species())).allTrue();
4066             }
4067         }
4068         return false;
4069     }
4070 
4071     /**
4072      * {@inheritDoc} <!--workaround-->
4073      */
4074     @Override
4075     @ForceInline
4076     public final
4077     int hashCode() {
4078         // now that toArray is strongly typed, we can define this
4079         return Objects.hash(species(), Arrays.hashCode(toArray()));
4080     }
4081 
4082     // ================================================
4083 
4084     // Species
4085 
4086     /**
4087      * Class representing {@link ShortVector}'s of the same {@link VectorShape VectorShape}.
4088      */
4089     /*package-private*/
4090     static final class ShortSpecies extends AbstractSpecies<Short> {
4091         private ShortSpecies(VectorShape shape,
4092                 Class<? extends ShortVector> vectorType,
4093                 Class<? extends AbstractMask<Short>> maskType,
4094                 Function<Object, ShortVector> vectorFactory) {
4095             super(shape, LaneType.of(short.class),
4096                   vectorType, maskType,
4097                   vectorFactory);
4098             assert(this.elementSize() == Short.SIZE);
4099         }
4100 
4101         // Specializing overrides:
4102 
4103         @Override
4104         @ForceInline
4105         public final Class<Short> elementType() {
4106             return short.class;
4107         }
4108 
4109         @Override
4110         @ForceInline
4111         final Class<Short> genericElementType() {
4112             return Short.class;
4113         }
4114 
4115         @SuppressWarnings("unchecked")
4116         @Override
4117         @ForceInline
4118         public final Class<? extends ShortVector> vectorType() {
4119             return (Class<? extends ShortVector>) vectorType;
4120         }
4121 
4122         @Override
4123         @ForceInline
4124         public final long checkValue(long e) {
4125             longToElementBits(e);  // only for exception
4126             return e;
4127         }
4128 
4129         /*package-private*/
4130         @Override
4131         @ForceInline
4132         final ShortVector broadcastBits(long bits) {
4133             return (ShortVector)
4134                 VectorSupport.fromBitsCoerced(
4135                     vectorType, short.class, laneCount,
4136                     bits, MODE_BROADCAST, this,
4137                     (bits_, s_) -> s_.rvOp(i -> bits_));
4138         }
4139 
4140         /*package-private*/
4141         @ForceInline
4142         final ShortVector broadcast(short e) {
4143             return broadcastBits(toBits(e));
4144         }
4145 
4146         @Override
4147         @ForceInline
4148         public final ShortVector broadcast(long e) {
4149             return broadcastBits(longToElementBits(e));
4150         }
4151 
4152         /*package-private*/
4153         final @Override
4154         @ForceInline
4155         long longToElementBits(long value) {
4156             // Do the conversion, and then test it for failure.
4157             short e = (short) value;
4158             if ((long) e != value) {
4159                 throw badElementBits(value, e);
4160             }
4161             return toBits(e);
4162         }
4163 
4164         /*package-private*/
4165         @ForceInline
4166         static long toIntegralChecked(short e, boolean convertToInt) {
4167             long value = convertToInt ? (int) e : (long) e;
4168             if ((short) value != e) {
4169                 throw badArrayBits(e, convertToInt, value);
4170             }
4171             return value;
4172         }
4173 
4174         /* this non-public one is for internal conversions */
4175         @Override
4176         @ForceInline
4177         final ShortVector fromIntValues(int[] values) {
4178             VectorIntrinsics.requireLength(values.length, laneCount);
4179             short[] va = new short[laneCount()];
4180             for (int i = 0; i < va.length; i++) {
4181                 int lv = values[i];
4182                 short v = (short) lv;
4183                 va[i] = v;
4184                 if ((int)v != lv) {
4185                     throw badElementBits(lv, v);
4186                 }
4187             }
4188             return dummyVector().fromArray0(va, 0);
4189         }
4190 
4191         // Virtual constructors
4192 
4193         @ForceInline
4194         @Override final
4195         public ShortVector fromArray(Object a, int offset) {
4196             // User entry point:  Be careful with inputs.
4197             return ShortVector
4198                 .fromArray(this, (short[]) a, offset);
4199         }
4200 
4201         @ForceInline
4202         @Override final
4203         ShortVector dummyVector() {
4204             return (ShortVector) super.dummyVector();
4205         }
4206 
4207         /*package-private*/
4208         final @Override
4209         @ForceInline
4210         ShortVector rvOp(RVOp f) {
4211             short[] res = new short[laneCount()];
4212             for (int i = 0; i < res.length; i++) {
4213                 short bits = (short) f.apply(i);
4214                 res[i] = fromBits(bits);
4215             }
4216             return dummyVector().vectorFactory(res);
4217         }
4218 
4219         ShortVector vOp(FVOp f) {
4220             short[] res = new short[laneCount()];
4221             for (int i = 0; i < res.length; i++) {
4222                 res[i] = f.apply(i);
4223             }
4224             return dummyVector().vectorFactory(res);
4225         }
4226 
4227         ShortVector vOp(VectorMask<Short> m, FVOp f) {
4228             short[] res = new short[laneCount()];
4229             boolean[] mbits = ((AbstractMask<Short>)m).getBits();
4230             for (int i = 0; i < res.length; i++) {
4231                 if (mbits[i]) {
4232                     res[i] = f.apply(i);
4233                 }
4234             }
4235             return dummyVector().vectorFactory(res);
4236         }
4237 
4238         /*package-private*/
4239         @ForceInline
4240         <M> ShortVector ldOp(M memory, int offset,
4241                                       FLdOp<M> f) {
4242             return dummyVector().ldOp(memory, offset, f);
4243         }
4244 
4245         /*package-private*/
4246         @ForceInline
4247         <M> ShortVector ldOp(M memory, int offset,
4248                                       VectorMask<Short> m,
4249                                       FLdOp<M> f) {
4250             return dummyVector().ldOp(memory, offset, m, f);
4251         }
4252 
4253         /*package-private*/
4254         @ForceInline
4255         <M> void stOp(M memory, int offset, FStOp<M> f) {
4256             dummyVector().stOp(memory, offset, f);
4257         }
4258 
4259         /*package-private*/
4260         @ForceInline
4261         <M> void stOp(M memory, int offset,
4262                       AbstractMask<Short> m,
4263                       FStOp<M> f) {
4264             dummyVector().stOp(memory, offset, m, f);
4265         }
4266 
4267         // N.B. Make sure these constant vectors and
4268         // masks load up correctly into registers.
4269         //
4270         // Also, see if we can avoid all that switching.
4271         // Could we cache both vectors and both masks in
4272         // this species object?
4273 
4274         // Zero and iota vector access
4275         @Override
4276         @ForceInline
4277         public final ShortVector zero() {
4278             if ((Class<?>) vectorType() == ShortMaxVector.class)
4279                 return ShortMaxVector.ZERO;
4280             switch (vectorBitSize()) {
4281                 case 64: return Short64Vector.ZERO;
4282                 case 128: return Short128Vector.ZERO;
4283                 case 256: return Short256Vector.ZERO;
4284                 case 512: return Short512Vector.ZERO;
4285             }
4286             throw new AssertionError();
4287         }
4288 
4289         @Override
4290         @ForceInline
4291         public final ShortVector iota() {
4292             if ((Class<?>) vectorType() == ShortMaxVector.class)
4293                 return ShortMaxVector.IOTA;
4294             switch (vectorBitSize()) {
4295                 case 64: return Short64Vector.IOTA;
4296                 case 128: return Short128Vector.IOTA;
4297                 case 256: return Short256Vector.IOTA;
4298                 case 512: return Short512Vector.IOTA;
4299             }
4300             throw new AssertionError();
4301         }
4302 
4303         // Mask access
4304         @Override
4305         @ForceInline
4306         public final VectorMask<Short> maskAll(boolean bit) {
4307             if ((Class<?>) vectorType() == ShortMaxVector.class)
4308                 return ShortMaxVector.ShortMaxMask.maskAll(bit);
4309             switch (vectorBitSize()) {
4310                 case 64: return Short64Vector.Short64Mask.maskAll(bit);
4311                 case 128: return Short128Vector.Short128Mask.maskAll(bit);
4312                 case 256: return Short256Vector.Short256Mask.maskAll(bit);
4313                 case 512: return Short512Vector.Short512Mask.maskAll(bit);
4314             }
4315             throw new AssertionError();
4316         }
4317     }
4318 
4319     /**
4320      * Finds a species for an element type of {@code short} and shape.
4321      *
4322      * @param s the shape
4323      * @return a species for an element type of {@code short} and shape
4324      * @throws IllegalArgumentException if no such species exists for the shape
4325      */
4326     static ShortSpecies species(VectorShape s) {
4327         Objects.requireNonNull(s);
4328         switch (s.switchKey) {
4329             case VectorShape.SK_64_BIT: return (ShortSpecies) SPECIES_64;
4330             case VectorShape.SK_128_BIT: return (ShortSpecies) SPECIES_128;
4331             case VectorShape.SK_256_BIT: return (ShortSpecies) SPECIES_256;
4332             case VectorShape.SK_512_BIT: return (ShortSpecies) SPECIES_512;
4333             case VectorShape.SK_Max_BIT: return (ShortSpecies) SPECIES_MAX;
4334             default: throw new IllegalArgumentException("Bad shape: " + s);
4335         }
4336     }
4337 
4338     /** Species representing {@link ShortVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
4339     public static final VectorSpecies<Short> SPECIES_64
4340         = new ShortSpecies(VectorShape.S_64_BIT,
4341                             Short64Vector.class,
4342                             Short64Vector.Short64Mask.class,
4343                             Short64Vector::new);
4344 
4345     /** Species representing {@link ShortVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
4346     public static final VectorSpecies<Short> SPECIES_128
4347         = new ShortSpecies(VectorShape.S_128_BIT,
4348                             Short128Vector.class,
4349                             Short128Vector.Short128Mask.class,
4350                             Short128Vector::new);
4351 
4352     /** Species representing {@link ShortVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
4353     public static final VectorSpecies<Short> SPECIES_256
4354         = new ShortSpecies(VectorShape.S_256_BIT,
4355                             Short256Vector.class,
4356                             Short256Vector.Short256Mask.class,
4357                             Short256Vector::new);
4358 
4359     /** Species representing {@link ShortVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
4360     public static final VectorSpecies<Short> SPECIES_512
4361         = new ShortSpecies(VectorShape.S_512_BIT,
4362                             Short512Vector.class,
4363                             Short512Vector.Short512Mask.class,
4364                             Short512Vector::new);
4365 
4366     /** Species representing {@link ShortVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
4367     public static final VectorSpecies<Short> SPECIES_MAX
4368         = new ShortSpecies(VectorShape.S_Max_BIT,
4369                             ShortMaxVector.class,
4370                             ShortMaxVector.ShortMaxMask.class,
4371                             ShortMaxVector::new);
4372 
4373     /**
4374      * Preferred species for {@link ShortVector}s.
4375      * A preferred species is a species of maximal bit-size for the platform.
4376      */
4377     public static final VectorSpecies<Short> SPECIES_PREFERRED
4378         = (ShortSpecies) VectorSpecies.ofPreferred(short.class);
4379 }