1 /*
   2  * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.BinaryOperator;
  33 import java.util.function.Function;
  34 import java.util.function.UnaryOperator;
  35 
  36 import jdk.internal.misc.ScopedMemoryAccess;
  37 import jdk.internal.misc.Unsafe;
  38 import jdk.internal.vm.annotation.ForceInline;
  39 import jdk.internal.vm.vector.VectorSupport;
  40 
  41 import static jdk.internal.vm.vector.VectorSupport.*;
  42 import static jdk.incubator.vector.VectorIntrinsics.*;
  43 
  44 import static jdk.incubator.vector.VectorOperators.*;
  45 
  46 // -- This file was mechanically generated: Do not edit! -- //
  47 
  48 /**
  49  * A specialized {@link Vector} representing an ordered immutable sequence of
  50  * {@code short} values.
  51  */
  52 @SuppressWarnings("cast")  // warning: redundant cast
  53 public abstract class ShortVector extends AbstractVector<Short> {
  54 
  55     ShortVector(short[] vec) {
  56         super(vec);
  57     }
  58 
  59     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  60 
  61     @ForceInline
  62     static int opCode(Operator op) {
  63         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  64     }
  65     @ForceInline
  66     static int opCode(Operator op, int requireKind) {
  67         requireKind |= VO_OPCODE_VALID;
  68         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  69     }
  70     @ForceInline
  71     static boolean opKind(Operator op, int bit) {
  72         return VectorOperators.opKind(op, bit);
  73     }
  74 
  75     // Virtualized factories and operators,
  76     // coded with portable definitions.
  77     // These are all @ForceInline in case
  78     // they need to be used performantly.
  79     // The various shape-specific subclasses
  80     // also specialize them by wrapping
  81     // them in a call like this:
  82     //    return (Byte128Vector)
  83     //       super.bOp((Byte128Vector) o);
  84     // The purpose of that is to forcibly inline
  85     // the generic definition from this file
  86     // into a sharply type- and size-specific
  87     // wrapper in the subclass file, so that
  88     // the JIT can specialize the code.
  89     // The code is only inlined and expanded
  90     // if it gets hot.  Think of it as a cheap
  91     // and lazy version of C++ templates.
  92 
  93     // Virtualized getter
  94 
  95     /*package-private*/
  96     abstract short[] vec();
  97 
  98     // Virtualized constructors
  99 
 100     /**
 101      * Build a vector directly using my own constructor.
 102      * It is an error if the array is aliased elsewhere.
 103      */
 104     /*package-private*/
 105     abstract ShortVector vectorFactory(short[] vec);
 106 
 107     /**
 108      * Build a mask directly using my species.
 109      * It is an error if the array is aliased elsewhere.
 110      */
 111     /*package-private*/
 112     @ForceInline
 113     final
 114     AbstractMask<Short> maskFactory(boolean[] bits) {
 115         return vspecies().maskFactory(bits);
 116     }
 117 
 118     // Constant loader (takes dummy as vector arg)
 119     interface FVOp {
 120         short apply(int i);
 121     }
 122 
 123     /*package-private*/
 124     @ForceInline
 125     final
 126     ShortVector vOp(FVOp f) {
 127         short[] res = new short[length()];
 128         for (int i = 0; i < res.length; i++) {
 129             res[i] = f.apply(i);
 130         }
 131         return vectorFactory(res);
 132     }
 133 
 134     @ForceInline
 135     final
 136     ShortVector vOp(VectorMask<Short> m, FVOp f) {
 137         short[] res = new short[length()];
 138         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 139         for (int i = 0; i < res.length; i++) {
 140             if (mbits[i]) {
 141                 res[i] = f.apply(i);
 142             }
 143         }
 144         return vectorFactory(res);
 145     }
 146 
 147     // Unary operator
 148 
 149     /*package-private*/
 150     interface FUnOp {
 151         short apply(int i, short a);
 152     }
 153 
 154     /*package-private*/
 155     abstract
 156     ShortVector uOp(FUnOp f);
 157     @ForceInline
 158     final
 159     ShortVector uOpTemplate(FUnOp f) {
 160         short[] vec = vec();
 161         short[] res = new short[length()];
 162         for (int i = 0; i < res.length; i++) {
 163             res[i] = f.apply(i, vec[i]);
 164         }
 165         return vectorFactory(res);
 166     }
 167 
 168     /*package-private*/
 169     abstract
 170     ShortVector uOp(VectorMask<Short> m,
 171                              FUnOp f);
 172     @ForceInline
 173     final
 174     ShortVector uOpTemplate(VectorMask<Short> m,
 175                                      FUnOp f) {
 176         short[] vec = vec();
 177         short[] res = new short[length()];
 178         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 179         for (int i = 0; i < res.length; i++) {
 180             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 181         }
 182         return vectorFactory(res);
 183     }
 184 
 185     // Binary operator
 186 
 187     /*package-private*/
 188     interface FBinOp {
 189         short apply(int i, short a, short b);
 190     }
 191 
 192     /*package-private*/
 193     abstract
 194     ShortVector bOp(Vector<Short> o,
 195                              FBinOp f);
 196     @ForceInline
 197     final
 198     ShortVector bOpTemplate(Vector<Short> o,
 199                                      FBinOp f) {
 200         short[] res = new short[length()];
 201         short[] vec1 = this.vec();
 202         short[] vec2 = ((ShortVector)o).vec();
 203         for (int i = 0; i < res.length; i++) {
 204             res[i] = f.apply(i, vec1[i], vec2[i]);
 205         }
 206         return vectorFactory(res);
 207     }
 208 
 209     /*package-private*/
 210     abstract
 211     ShortVector bOp(Vector<Short> o,
 212                              VectorMask<Short> m,
 213                              FBinOp f);
 214     @ForceInline
 215     final
 216     ShortVector bOpTemplate(Vector<Short> o,
 217                                      VectorMask<Short> m,
 218                                      FBinOp f) {
 219         short[] res = new short[length()];
 220         short[] vec1 = this.vec();
 221         short[] vec2 = ((ShortVector)o).vec();
 222         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 223         for (int i = 0; i < res.length; i++) {
 224             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 225         }
 226         return vectorFactory(res);
 227     }
 228 
 229     // Ternary operator
 230 
 231     /*package-private*/
 232     interface FTriOp {
 233         short apply(int i, short a, short b, short c);
 234     }
 235 
 236     /*package-private*/
 237     abstract
 238     ShortVector tOp(Vector<Short> o1,
 239                              Vector<Short> o2,
 240                              FTriOp f);
 241     @ForceInline
 242     final
 243     ShortVector tOpTemplate(Vector<Short> o1,
 244                                      Vector<Short> o2,
 245                                      FTriOp f) {
 246         short[] res = new short[length()];
 247         short[] vec1 = this.vec();
 248         short[] vec2 = ((ShortVector)o1).vec();
 249         short[] vec3 = ((ShortVector)o2).vec();
 250         for (int i = 0; i < res.length; i++) {
 251             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 252         }
 253         return vectorFactory(res);
 254     }
 255 
 256     /*package-private*/
 257     abstract
 258     ShortVector tOp(Vector<Short> o1,
 259                              Vector<Short> o2,
 260                              VectorMask<Short> m,
 261                              FTriOp f);
 262     @ForceInline
 263     final
 264     ShortVector tOpTemplate(Vector<Short> o1,
 265                                      Vector<Short> o2,
 266                                      VectorMask<Short> m,
 267                                      FTriOp f) {
 268         short[] res = new short[length()];
 269         short[] vec1 = this.vec();
 270         short[] vec2 = ((ShortVector)o1).vec();
 271         short[] vec3 = ((ShortVector)o2).vec();
 272         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 273         for (int i = 0; i < res.length; i++) {
 274             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 275         }
 276         return vectorFactory(res);
 277     }
 278 
 279     // Reduction operator
 280 
 281     /*package-private*/
 282     abstract
 283     short rOp(short v, FBinOp f);
 284     @ForceInline
 285     final
 286     short rOpTemplate(short v, FBinOp f) {
 287         short[] vec = vec();
 288         for (int i = 0; i < vec.length; i++) {
 289             v = f.apply(i, v, vec[i]);
 290         }
 291         return v;
 292     }
 293 
 294     // Memory reference
 295 
 296     /*package-private*/
 297     interface FLdOp<M> {
 298         short apply(M memory, int offset, int i);
 299     }
 300 
 301     /*package-private*/
 302     @ForceInline
 303     final
 304     <M> ShortVector ldOp(M memory, int offset,
 305                                   FLdOp<M> f) {
 306         //dummy; no vec = vec();
 307         short[] res = new short[length()];
 308         for (int i = 0; i < res.length; i++) {
 309             res[i] = f.apply(memory, offset, i);
 310         }
 311         return vectorFactory(res);
 312     }
 313 
 314     /*package-private*/
 315     @ForceInline
 316     final
 317     <M> ShortVector ldOp(M memory, int offset,
 318                                   VectorMask<Short> m,
 319                                   FLdOp<M> f) {
 320         //short[] vec = vec();
 321         short[] res = new short[length()];
 322         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 323         for (int i = 0; i < res.length; i++) {
 324             if (mbits[i]) {
 325                 res[i] = f.apply(memory, offset, i);
 326             }
 327         }
 328         return vectorFactory(res);
 329     }
 330 
 331     interface FStOp<M> {
 332         void apply(M memory, int offset, int i, short a);
 333     }
 334 
 335     /*package-private*/
 336     @ForceInline
 337     final
 338     <M> void stOp(M memory, int offset,
 339                   FStOp<M> f) {
 340         short[] vec = vec();
 341         for (int i = 0; i < vec.length; i++) {
 342             f.apply(memory, offset, i, vec[i]);
 343         }
 344     }
 345 
 346     /*package-private*/
 347     @ForceInline
 348     final
 349     <M> void stOp(M memory, int offset,
 350                   VectorMask<Short> m,
 351                   FStOp<M> f) {
 352         short[] vec = vec();
 353         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 354         for (int i = 0; i < vec.length; i++) {
 355             if (mbits[i]) {
 356                 f.apply(memory, offset, i, vec[i]);
 357             }
 358         }
 359     }
 360 
 361     // Binary test
 362 
 363     /*package-private*/
 364     interface FBinTest {
 365         boolean apply(int cond, int i, short a, short b);
 366     }
 367 
 368     /*package-private*/
 369     @ForceInline
 370     final
 371     AbstractMask<Short> bTest(int cond,
 372                                   Vector<Short> o,
 373                                   FBinTest f) {
 374         short[] vec1 = vec();
 375         short[] vec2 = ((ShortVector)o).vec();
 376         boolean[] bits = new boolean[length()];
 377         for (int i = 0; i < length(); i++){
 378             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 379         }
 380         return maskFactory(bits);
 381     }
 382 
 383     /*package-private*/
 384     @ForceInline
 385     static short rotateLeft(short a, int n) {
 386         return (short)(((((short)a) & Short.toUnsignedInt((short)-1)) << (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) >>> (Short.SIZE - (n & Short.SIZE-1))));
 387     }
 388 
 389     /*package-private*/
 390     @ForceInline
 391     static short rotateRight(short a, int n) {
 392         return (short)(((((short)a) & Short.toUnsignedInt((short)-1)) >>> (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) << (Short.SIZE - (n & Short.SIZE-1))));
 393     }
 394 
 395     /*package-private*/
 396     @Override
 397     abstract ShortSpecies vspecies();
 398 
 399     /*package-private*/
 400     @ForceInline
 401     static long toBits(short e) {
 402         return  e;
 403     }
 404 
 405     /*package-private*/
 406     @ForceInline
 407     static short fromBits(long bits) {
 408         return ((short)bits);
 409     }
 410 
 411     // Static factories (other than memory operations)
 412 
 413     // Note: A surprising behavior in javadoc
 414     // sometimes makes a lone /** {@inheritDoc} */
 415     // comment drop the method altogether,
 416     // apparently if the method mentions an
 417     // parameter or return type of Vector<Short>
 418     // instead of Vector<E> as originally specified.
 419     // Adding an empty HTML fragment appears to
 420     // nudge javadoc into providing the desired
 421     // inherited documentation.  We use the HTML
 422     // comment <!--workaround--> for this.
 423 
 424     /**
 425      * Returns a vector of the given species
 426      * where all lane elements are set to
 427      * zero, the default primitive value.
 428      *
 429      * @param species species of the desired zero vector
 430      * @return a zero vector
 431      */
 432     @ForceInline
 433     public static ShortVector zero(VectorSpecies<Short> species) {
 434         ShortSpecies vsp = (ShortSpecies) species;
 435         return VectorSupport.broadcastCoerced(vsp.vectorType(), short.class, species.length(),
 436                                 0, vsp,
 437                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 438     }
 439 
 440     /**
 441      * Returns a vector of the same species as this one
 442      * where all lane elements are set to
 443      * the primitive value {@code e}.
 444      *
 445      * The contents of the current vector are discarded;
 446      * only the species is relevant to this operation.
 447      *
 448      * <p> This method returns the value of this expression:
 449      * {@code ShortVector.broadcast(this.species(), e)}.
 450      *
 451      * @apiNote
 452      * Unlike the similar method named {@code broadcast()}
 453      * in the supertype {@code Vector}, this method does not
 454      * need to validate its argument, and cannot throw
 455      * {@code IllegalArgumentException}.  This method is
 456      * therefore preferable to the supertype method.
 457      *
 458      * @param e the value to broadcast
 459      * @return a vector where all lane elements are set to
 460      *         the primitive value {@code e}
 461      * @see #broadcast(VectorSpecies,long)
 462      * @see Vector#broadcast(long)
 463      * @see VectorSpecies#broadcast(long)
 464      */
 465     public abstract ShortVector broadcast(short e);
 466 
 467     /**
 468      * Returns a vector of the given species
 469      * where all lane elements are set to
 470      * the primitive value {@code e}.
 471      *
 472      * @param species species of the desired vector
 473      * @param e the value to broadcast
 474      * @return a vector where all lane elements are set to
 475      *         the primitive value {@code e}
 476      * @see #broadcast(long)
 477      * @see Vector#broadcast(long)
 478      * @see VectorSpecies#broadcast(long)
 479      */
 480     @ForceInline
 481     public static ShortVector broadcast(VectorSpecies<Short> species, short e) {
 482         ShortSpecies vsp = (ShortSpecies) species;
 483         return vsp.broadcast(e);
 484     }
 485 
 486     /*package-private*/
 487     @ForceInline
 488     final ShortVector broadcastTemplate(short e) {
 489         ShortSpecies vsp = vspecies();
 490         return vsp.broadcast(e);
 491     }
 492 
 493     /**
 494      * {@inheritDoc} <!--workaround-->
 495      * @apiNote
 496      * When working with vector subtypes like {@code ShortVector},
 497      * {@linkplain #broadcast(short) the more strongly typed method}
 498      * is typically selected.  It can be explicitly selected
 499      * using a cast: {@code v.broadcast((short)e)}.
 500      * The two expressions will produce numerically identical results.
 501      */
 502     @Override
 503     public abstract ShortVector broadcast(long e);
 504 
 505     /**
 506      * Returns a vector of the given species
 507      * where all lane elements are set to
 508      * the primitive value {@code e}.
 509      *
 510      * The {@code long} value must be accurately representable
 511      * by the {@code ETYPE} of the vector species, so that
 512      * {@code e==(long)(ETYPE)e}.
 513      *
 514      * @param species species of the desired vector
 515      * @param e the value to broadcast
 516      * @return a vector where all lane elements are set to
 517      *         the primitive value {@code e}
 518      * @throws IllegalArgumentException
 519      *         if the given {@code long} value cannot
 520      *         be represented by the vector's {@code ETYPE}
 521      * @see #broadcast(VectorSpecies,short)
 522      * @see VectorSpecies#checkValue(long)
 523      */
 524     @ForceInline
 525     public static ShortVector broadcast(VectorSpecies<Short> species, long e) {
 526         ShortSpecies vsp = (ShortSpecies) species;
 527         return vsp.broadcast(e);
 528     }
 529 
 530     /*package-private*/
 531     @ForceInline
 532     final ShortVector broadcastTemplate(long e) {
 533         return vspecies().broadcast(e);
 534     }
 535 
 536     // Unary lanewise support
 537 
 538     /**
 539      * {@inheritDoc} <!--workaround-->
 540      */
 541     public abstract
 542     ShortVector lanewise(VectorOperators.Unary op);
 543 
 544     @ForceInline
 545     final
 546     ShortVector lanewiseTemplate(VectorOperators.Unary op) {
 547         if (opKind(op, VO_SPECIAL)) {
 548             if (op == ZOMO) {
 549                 return blend(broadcast(-1), compare(NE, 0));
 550             }
 551             if (op == NOT) {
 552                 return broadcast(-1).lanewiseTemplate(XOR, this);
 553             } else if (op == NEG) {
 554                 // FIXME: Support this in the JIT.
 555                 return broadcast(0).lanewiseTemplate(SUB, this);
 556             }
 557         }
 558         int opc = opCode(op);
 559         return VectorSupport.unaryOp(
 560             opc, getClass(), short.class, length(),
 561             this,
 562             UN_IMPL.find(op, opc, (opc_) -> {
 563               switch (opc_) {
 564                 case VECTOR_OP_NEG: return v0 ->
 565                         v0.uOp((i, a) -> (short) -a);
 566                 case VECTOR_OP_ABS: return v0 ->
 567                         v0.uOp((i, a) -> (short) Math.abs(a));
 568                 default: return null;
 569               }}));
 570     }
 571     private static final
 572     ImplCache<Unary,UnaryOperator<ShortVector>> UN_IMPL
 573         = new ImplCache<>(Unary.class, ShortVector.class);
 574 
 575     /**
 576      * {@inheritDoc} <!--workaround-->
 577      */
 578     @ForceInline
 579     public final
 580     ShortVector lanewise(VectorOperators.Unary op,
 581                                   VectorMask<Short> m) {
 582         return blend(lanewise(op), m);
 583     }
 584 
 585     // Binary lanewise support
 586 
 587     /**
 588      * {@inheritDoc} <!--workaround-->
 589      * @see #lanewise(VectorOperators.Binary,short)
 590      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 591      */
 592     @Override
 593     public abstract
 594     ShortVector lanewise(VectorOperators.Binary op,
 595                                   Vector<Short> v);
 596     @ForceInline
 597     final
 598     ShortVector lanewiseTemplate(VectorOperators.Binary op,
 599                                           Vector<Short> v) {
 600         ShortVector that = (ShortVector) v;
 601         that.check(this);
 602         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 603             if (op == FIRST_NONZERO) {
 604                 // FIXME: Support this in the JIT.
 605                 VectorMask<Short> thisNZ
 606                     = this.viewAsIntegralLanes().compare(NE, (short) 0);
 607                 that = that.blend((short) 0, thisNZ.cast(vspecies()));
 608                 op = OR_UNCHECKED;
 609             }
 610             if (opKind(op, VO_SHIFT)) {
 611                 // As per shift specification for Java, mask the shift count.
 612                 // This allows the JIT to ignore some ISA details.
 613                 that = that.lanewise(AND, SHIFT_MASK);
 614             }
 615             if (op == AND_NOT) {
 616                 // FIXME: Support this in the JIT.
 617                 that = that.lanewise(NOT);
 618                 op = AND;
 619             } else if (op == DIV) {
 620                 VectorMask<Short> eqz = that.eq((short)0);
 621                 if (eqz.anyTrue()) {
 622                     throw that.divZeroException();
 623                 }
 624             }
 625         }
 626         int opc = opCode(op);
 627         return VectorSupport.binaryOp(
 628             opc, getClass(), short.class, length(),
 629             this, that,
 630             BIN_IMPL.find(op, opc, (opc_) -> {
 631               switch (opc_) {
 632                 case VECTOR_OP_ADD: return (v0, v1) ->
 633                         v0.bOp(v1, (i, a, b) -> (short)(a + b));
 634                 case VECTOR_OP_SUB: return (v0, v1) ->
 635                         v0.bOp(v1, (i, a, b) -> (short)(a - b));
 636                 case VECTOR_OP_MUL: return (v0, v1) ->
 637                         v0.bOp(v1, (i, a, b) -> (short)(a * b));
 638                 case VECTOR_OP_DIV: return (v0, v1) ->
 639                         v0.bOp(v1, (i, a, b) -> (short)(a / b));
 640                 case VECTOR_OP_MAX: return (v0, v1) ->
 641                         v0.bOp(v1, (i, a, b) -> (short)Math.max(a, b));
 642                 case VECTOR_OP_MIN: return (v0, v1) ->
 643                         v0.bOp(v1, (i, a, b) -> (short)Math.min(a, b));
 644                 case VECTOR_OP_AND: return (v0, v1) ->
 645                         v0.bOp(v1, (i, a, b) -> (short)(a & b));
 646                 case VECTOR_OP_OR: return (v0, v1) ->
 647                         v0.bOp(v1, (i, a, b) -> (short)(a | b));
 648                 case VECTOR_OP_XOR: return (v0, v1) ->
 649                         v0.bOp(v1, (i, a, b) -> (short)(a ^ b));
 650                 case VECTOR_OP_LSHIFT: return (v0, v1) ->
 651                         v0.bOp(v1, (i, a, n) -> (short)(a << n));
 652                 case VECTOR_OP_RSHIFT: return (v0, v1) ->
 653                         v0.bOp(v1, (i, a, n) -> (short)(a >> n));
 654                 case VECTOR_OP_URSHIFT: return (v0, v1) ->
 655                         v0.bOp(v1, (i, a, n) -> (short)((a & LSHR_SETUP_MASK) >>> n));
 656                 case VECTOR_OP_LROTATE: return (v0, v1) ->
 657                         v0.bOp(v1, (i, a, n) -> rotateLeft(a, (int)n));
 658                 case VECTOR_OP_RROTATE: return (v0, v1) ->
 659                         v0.bOp(v1, (i, a, n) -> rotateRight(a, (int)n));
 660                 default: return null;
 661                 }}));
 662     }
 663     private static final
 664     ImplCache<Binary,BinaryOperator<ShortVector>> BIN_IMPL
 665         = new ImplCache<>(Binary.class, ShortVector.class);
 666 
 667     /**
 668      * {@inheritDoc} <!--workaround-->
 669      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 670      */
 671     @ForceInline
 672     public final
 673     ShortVector lanewise(VectorOperators.Binary op,
 674                                   Vector<Short> v,
 675                                   VectorMask<Short> m) {
 676         ShortVector that = (ShortVector) v;
 677         if (op == DIV) {
 678             VectorMask<Short> eqz = that.eq((short)0);
 679             if (eqz.and(m).anyTrue()) {
 680                 throw that.divZeroException();
 681             }
 682             // suppress div/0 exceptions in unset lanes
 683             that = that.lanewise(NOT, eqz);
 684             return blend(lanewise(DIV, that), m);
 685         }
 686         return blend(lanewise(op, v), m);
 687     }
 688     // FIXME: Maybe all of the public final methods in this file (the
 689     // simple ones that just call lanewise) should be pushed down to
 690     // the X-VectorBits template.  They can't optimize properly at
 691     // this level, and must rely on inlining.  Does it work?
 692     // (If it works, of course keep the code here.)
 693 
 694     /**
 695      * Combines the lane values of this vector
 696      * with the value of a broadcast scalar.
 697      *
 698      * This is a lane-wise binary operation which applies
 699      * the selected operation to each lane.
 700      * The return value will be equal to this expression:
 701      * {@code this.lanewise(op, this.broadcast(e))}.
 702      *
 703      * @param op the operation used to process lane values
 704      * @param e the input scalar
 705      * @return the result of applying the operation lane-wise
 706      *         to the two input vectors
 707      * @throws UnsupportedOperationException if this vector does
 708      *         not support the requested operation
 709      * @see #lanewise(VectorOperators.Binary,Vector)
 710      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 711      */
 712     @ForceInline
 713     public final
 714     ShortVector lanewise(VectorOperators.Binary op,
 715                                   short e) {
 716         if (opKind(op, VO_SHIFT) && (short)(int)e == e) {
 717             return lanewiseShift(op, (int) e);
 718         }
 719         if (op == AND_NOT) {
 720             op = AND; e = (short) ~e;
 721         }
 722         return lanewise(op, broadcast(e));
 723     }
 724 
 725     /**
 726      * Combines the lane values of this vector
 727      * with the value of a broadcast scalar,
 728      * with selection of lane elements controlled by a mask.
 729      *
 730      * This is a masked lane-wise binary operation which applies
 731      * the selected operation to each lane.
 732      * The return value will be equal to this expression:
 733      * {@code this.lanewise(op, this.broadcast(e), m)}.
 734      *
 735      * @param op the operation used to process lane values
 736      * @param e the input scalar
 737      * @param m the mask controlling lane selection
 738      * @return the result of applying the operation lane-wise
 739      *         to the input vector and the scalar
 740      * @throws UnsupportedOperationException if this vector does
 741      *         not support the requested operation
 742      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 743      * @see #lanewise(VectorOperators.Binary,short)
 744      */
 745     @ForceInline
 746     public final
 747     ShortVector lanewise(VectorOperators.Binary op,
 748                                   short e,
 749                                   VectorMask<Short> m) {
 750         return blend(lanewise(op, e), m);
 751     }
 752 
 753     /**
 754      * {@inheritDoc} <!--workaround-->
 755      * @apiNote
 756      * When working with vector subtypes like {@code ShortVector},
 757      * {@linkplain #lanewise(VectorOperators.Binary,short)
 758      * the more strongly typed method}
 759      * is typically selected.  It can be explicitly selected
 760      * using a cast: {@code v.lanewise(op,(short)e)}.
 761      * The two expressions will produce numerically identical results.
 762      */
 763     @ForceInline
 764     public final
 765     ShortVector lanewise(VectorOperators.Binary op,
 766                                   long e) {
 767         short e1 = (short) e;
 768         if ((long)e1 != e
 769             // allow shift ops to clip down their int parameters
 770             && !(opKind(op, VO_SHIFT) && (int)e1 == e)
 771             ) {
 772             vspecies().checkValue(e);  // for exception
 773         }
 774         return lanewise(op, e1);
 775     }
 776 
 777     /**
 778      * {@inheritDoc} <!--workaround-->
 779      * @apiNote
 780      * When working with vector subtypes like {@code ShortVector},
 781      * {@linkplain #lanewise(VectorOperators.Binary,short,VectorMask)
 782      * the more strongly typed method}
 783      * is typically selected.  It can be explicitly selected
 784      * using a cast: {@code v.lanewise(op,(short)e,m)}.
 785      * The two expressions will produce numerically identical results.
 786      */
 787     @ForceInline
 788     public final
 789     ShortVector lanewise(VectorOperators.Binary op,
 790                                   long e, VectorMask<Short> m) {
 791         return blend(lanewise(op, e), m);
 792     }
 793 
 794     /*package-private*/
 795     abstract ShortVector
 796     lanewiseShift(VectorOperators.Binary op, int e);
 797 
 798     /*package-private*/
 799     @ForceInline
 800     final ShortVector
 801     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 802         // Special handling for these.  FIXME: Refactor?
 803         assert(opKind(op, VO_SHIFT));
 804         // As per shift specification for Java, mask the shift count.
 805         e &= SHIFT_MASK;
 806         int opc = opCode(op);
 807         return VectorSupport.broadcastInt(
 808             opc, getClass(), short.class, length(),
 809             this, e,
 810             BIN_INT_IMPL.find(op, opc, (opc_) -> {
 811               switch (opc_) {
 812                 case VECTOR_OP_LSHIFT: return (v, n) ->
 813                         v.uOp((i, a) -> (short)(a << n));
 814                 case VECTOR_OP_RSHIFT: return (v, n) ->
 815                         v.uOp((i, a) -> (short)(a >> n));
 816                 case VECTOR_OP_URSHIFT: return (v, n) ->
 817                         v.uOp((i, a) -> (short)((a & LSHR_SETUP_MASK) >>> n));
 818                 case VECTOR_OP_LROTATE: return (v, n) ->
 819                         v.uOp((i, a) -> rotateLeft(a, (int)n));
 820                 case VECTOR_OP_RROTATE: return (v, n) ->
 821                         v.uOp((i, a) -> rotateRight(a, (int)n));
 822                 default: return null;
 823                 }}));
 824     }
 825     private static final
 826     ImplCache<Binary,VectorBroadcastIntOp<ShortVector>> BIN_INT_IMPL
 827         = new ImplCache<>(Binary.class, ShortVector.class);
 828 
 829     // As per shift specification for Java, mask the shift count.
 830     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 831     // The latter two maskings go beyond the JLS, but seem reasonable
 832     // since our lane types are first-class types, not just dressed
 833     // up ints.
 834     private static final int SHIFT_MASK = (Short.SIZE - 1);
 835     // Also simulate >>> on sub-word variables with a mask.
 836     private static final int LSHR_SETUP_MASK = ((1 << Short.SIZE) - 1);
 837 
 838     // Ternary lanewise support
 839 
 840     // Ternary operators come in eight variations:
 841     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 842     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 843 
 844     // It is annoying to support all of these variations of masking
 845     // and broadcast, but it would be more surprising not to continue
 846     // the obvious pattern started by unary and binary.
 847 
 848    /**
 849      * {@inheritDoc} <!--workaround-->
 850      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
 851      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
 852      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
 853      * @see #lanewise(VectorOperators.Ternary,short,short)
 854      * @see #lanewise(VectorOperators.Ternary,Vector,short)
 855      * @see #lanewise(VectorOperators.Ternary,short,Vector)
 856      */
 857     @Override
 858     public abstract
 859     ShortVector lanewise(VectorOperators.Ternary op,
 860                                                   Vector<Short> v1,
 861                                                   Vector<Short> v2);
 862     @ForceInline
 863     final
 864     ShortVector lanewiseTemplate(VectorOperators.Ternary op,
 865                                           Vector<Short> v1,
 866                                           Vector<Short> v2) {
 867         ShortVector that = (ShortVector) v1;
 868         ShortVector tother = (ShortVector) v2;
 869         // It's a word: https://www.dictionary.com/browse/tother
 870         // See also Chapter 11 of Dickens, Our Mutual Friend:
 871         // "Totherest Governor," replied Mr Riderhood...
 872         that.check(this);
 873         tother.check(this);
 874         if (op == BITWISE_BLEND) {
 875             // FIXME: Support this in the JIT.
 876             that = this.lanewise(XOR, that).lanewise(AND, tother);
 877             return this.lanewise(XOR, that);
 878         }
 879         int opc = opCode(op);
 880         return VectorSupport.ternaryOp(
 881             opc, getClass(), short.class, length(),
 882             this, that, tother,
 883             TERN_IMPL.find(op, opc, (opc_) -> {
 884               switch (opc_) {
 885                 default: return null;
 886                 }}));
 887     }
 888     private static final
 889     ImplCache<Ternary,TernaryOperation<ShortVector>> TERN_IMPL
 890         = new ImplCache<>(Ternary.class, ShortVector.class);
 891 
 892     /**
 893      * {@inheritDoc} <!--workaround-->
 894      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
 895      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
 896      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
 897      */
 898     @ForceInline
 899     public final
 900     ShortVector lanewise(VectorOperators.Ternary op,
 901                                   Vector<Short> v1,
 902                                   Vector<Short> v2,
 903                                   VectorMask<Short> m) {
 904         return blend(lanewise(op, v1, v2), m);
 905     }
 906 
 907     /**
 908      * Combines the lane values of this vector
 909      * with the values of two broadcast scalars.
 910      *
 911      * This is a lane-wise ternary operation which applies
 912      * the selected operation to each lane.
 913      * The return value will be equal to this expression:
 914      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
 915      *
 916      * @param op the operation used to combine lane values
 917      * @param e1 the first input scalar
 918      * @param e2 the second input scalar
 919      * @return the result of applying the operation lane-wise
 920      *         to the input vector and the scalars
 921      * @throws UnsupportedOperationException if this vector does
 922      *         not support the requested operation
 923      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 924      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
 925      */
 926     @ForceInline
 927     public final
 928     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
 929                                   short e1,
 930                                   short e2) {
 931         return lanewise(op, broadcast(e1), broadcast(e2));
 932     }
 933 
 934     /**
 935      * Combines the lane values of this vector
 936      * with the values of two broadcast scalars,
 937      * with selection of lane elements controlled by a mask.
 938      *
 939      * This is a masked lane-wise ternary operation which applies
 940      * the selected operation to each lane.
 941      * The return value will be equal to this expression:
 942      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
 943      *
 944      * @param op the operation used to combine lane values
 945      * @param e1 the first input scalar
 946      * @param e2 the second input scalar
 947      * @param m the mask controlling lane selection
 948      * @return the result of applying the operation lane-wise
 949      *         to the input vector and the scalars
 950      * @throws UnsupportedOperationException if this vector does
 951      *         not support the requested operation
 952      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
 953      * @see #lanewise(VectorOperators.Ternary,short,short)
 954      */
 955     @ForceInline
 956     public final
 957     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
 958                                   short e1,
 959                                   short e2,
 960                                   VectorMask<Short> m) {
 961         return blend(lanewise(op, e1, e2), m);
 962     }
 963 
 964     /**
 965      * Combines the lane values of this vector
 966      * with the values of another vector and a broadcast scalar.
 967      *
 968      * This is a lane-wise ternary operation which applies
 969      * the selected operation to each lane.
 970      * The return value will be equal to this expression:
 971      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
 972      *
 973      * @param op the operation used to combine lane values
 974      * @param v1 the other input vector
 975      * @param e2 the input scalar
 976      * @return the result of applying the operation lane-wise
 977      *         to the input vectors and the scalar
 978      * @throws UnsupportedOperationException if this vector does
 979      *         not support the requested operation
 980      * @see #lanewise(VectorOperators.Ternary,short,short)
 981      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
 982      */
 983     @ForceInline
 984     public final
 985     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
 986                                   Vector<Short> v1,
 987                                   short e2) {
 988         return lanewise(op, v1, broadcast(e2));
 989     }
 990 
 991     /**
 992      * Combines the lane values of this vector
 993      * with the values of another vector and a broadcast scalar,
 994      * with selection of lane elements controlled by a mask.
 995      *
 996      * This is a masked lane-wise ternary operation which applies
 997      * the selected operation to each lane.
 998      * The return value will be equal to this expression:
 999      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1000      *
1001      * @param op the operation used to combine lane values
1002      * @param v1 the other input vector
1003      * @param e2 the input scalar
1004      * @param m the mask controlling lane selection
1005      * @return the result of applying the operation lane-wise
1006      *         to the input vectors and the scalar
1007      * @throws UnsupportedOperationException if this vector does
1008      *         not support the requested operation
1009      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1010      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1011      * @see #lanewise(VectorOperators.Ternary,Vector,short)
1012      */
1013     @ForceInline
1014     public final
1015     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1016                                   Vector<Short> v1,
1017                                   short e2,
1018                                   VectorMask<Short> m) {
1019         return blend(lanewise(op, v1, e2), m);
1020     }
1021 
1022     /**
1023      * Combines the lane values of this vector
1024      * with the values of another vector and a broadcast scalar.
1025      *
1026      * This is a lane-wise ternary operation which applies
1027      * the selected operation to each lane.
1028      * The return value will be equal to this expression:
1029      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1030      *
1031      * @param op the operation used to combine lane values
1032      * @param e1 the input scalar
1033      * @param v2 the other input vector
1034      * @return the result of applying the operation lane-wise
1035      *         to the input vectors and the scalar
1036      * @throws UnsupportedOperationException if this vector does
1037      *         not support the requested operation
1038      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1039      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
1040      */
1041     @ForceInline
1042     public final
1043     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1044                                   short e1,
1045                                   Vector<Short> v2) {
1046         return lanewise(op, broadcast(e1), v2);
1047     }
1048 
1049     /**
1050      * Combines the lane values of this vector
1051      * with the values of another vector and a broadcast scalar,
1052      * with selection of lane elements controlled by a mask.
1053      *
1054      * This is a masked lane-wise ternary operation which applies
1055      * the selected operation to each lane.
1056      * The return value will be equal to this expression:
1057      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1058      *
1059      * @param op the operation used to combine lane values
1060      * @param e1 the input scalar
1061      * @param v2 the other input vector
1062      * @param m the mask controlling lane selection
1063      * @return the result of applying the operation lane-wise
1064      *         to the input vectors and the scalar
1065      * @throws UnsupportedOperationException if this vector does
1066      *         not support the requested operation
1067      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1068      * @see #lanewise(VectorOperators.Ternary,short,Vector)
1069      */
1070     @ForceInline
1071     public final
1072     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1073                                   short e1,
1074                                   Vector<Short> v2,
1075                                   VectorMask<Short> m) {
1076         return blend(lanewise(op, e1, v2), m);
1077     }
1078 
1079     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1080     // https://en.wikipedia.org/wiki/Ogdoad
1081 
1082     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1083     //
1084     // These include masked and non-masked versions.
1085     // This subclass adds broadcast (masked or not).
1086 
1087     /**
1088      * {@inheritDoc} <!--workaround-->
1089      * @see #add(short)
1090      */
1091     @Override
1092     @ForceInline
1093     public final ShortVector add(Vector<Short> v) {
1094         return lanewise(ADD, v);
1095     }
1096 
1097     /**
1098      * Adds this vector to the broadcast of an input scalar.
1099      *
1100      * This is a lane-wise binary operation which applies
1101      * the primitive addition operation ({@code +}) to each lane.
1102      *
1103      * This method is also equivalent to the expression
1104      * {@link #lanewise(VectorOperators.Binary,short)
1105      *    lanewise}{@code (}{@link VectorOperators#ADD
1106      *    ADD}{@code , e)}.
1107      *
1108      * @param e the input scalar
1109      * @return the result of adding each lane of this vector to the scalar
1110      * @see #add(Vector)
1111      * @see #broadcast(short)
1112      * @see #add(short,VectorMask)
1113      * @see VectorOperators#ADD
1114      * @see #lanewise(VectorOperators.Binary,Vector)
1115      * @see #lanewise(VectorOperators.Binary,short)
1116      */
1117     @ForceInline
1118     public final
1119     ShortVector add(short e) {
1120         return lanewise(ADD, e);
1121     }
1122 
1123     /**
1124      * {@inheritDoc} <!--workaround-->
1125      * @see #add(short,VectorMask)
1126      */
1127     @Override
1128     @ForceInline
1129     public final ShortVector add(Vector<Short> v,
1130                                           VectorMask<Short> m) {
1131         return lanewise(ADD, v, m);
1132     }
1133 
1134     /**
1135      * Adds this vector to the broadcast of an input scalar,
1136      * selecting lane elements controlled by a mask.
1137      *
1138      * This is a masked lane-wise binary operation which applies
1139      * the primitive addition operation ({@code +}) to each lane.
1140      *
1141      * This method is also equivalent to the expression
1142      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1143      *    lanewise}{@code (}{@link VectorOperators#ADD
1144      *    ADD}{@code , s, m)}.
1145      *
1146      * @param e the input scalar
1147      * @param m the mask controlling lane selection
1148      * @return the result of adding each lane of this vector to the scalar
1149      * @see #add(Vector,VectorMask)
1150      * @see #broadcast(short)
1151      * @see #add(short)
1152      * @see VectorOperators#ADD
1153      * @see #lanewise(VectorOperators.Binary,Vector)
1154      * @see #lanewise(VectorOperators.Binary,short)
1155      */
1156     @ForceInline
1157     public final ShortVector add(short e,
1158                                           VectorMask<Short> m) {
1159         return lanewise(ADD, e, m);
1160     }
1161 
1162     /**
1163      * {@inheritDoc} <!--workaround-->
1164      * @see #sub(short)
1165      */
1166     @Override
1167     @ForceInline
1168     public final ShortVector sub(Vector<Short> v) {
1169         return lanewise(SUB, v);
1170     }
1171 
1172     /**
1173      * Subtracts an input scalar from this vector.
1174      *
1175      * This is a masked lane-wise binary operation which applies
1176      * the primitive subtraction operation ({@code -}) to each lane.
1177      *
1178      * This method is also equivalent to the expression
1179      * {@link #lanewise(VectorOperators.Binary,short)
1180      *    lanewise}{@code (}{@link VectorOperators#SUB
1181      *    SUB}{@code , e)}.
1182      *
1183      * @param e the input scalar
1184      * @return the result of subtracting the scalar from each lane of this vector
1185      * @see #sub(Vector)
1186      * @see #broadcast(short)
1187      * @see #sub(short,VectorMask)
1188      * @see VectorOperators#SUB
1189      * @see #lanewise(VectorOperators.Binary,Vector)
1190      * @see #lanewise(VectorOperators.Binary,short)
1191      */
1192     @ForceInline
1193     public final ShortVector sub(short e) {
1194         return lanewise(SUB, e);
1195     }
1196 
1197     /**
1198      * {@inheritDoc} <!--workaround-->
1199      * @see #sub(short,VectorMask)
1200      */
1201     @Override
1202     @ForceInline
1203     public final ShortVector sub(Vector<Short> v,
1204                                           VectorMask<Short> m) {
1205         return lanewise(SUB, v, m);
1206     }
1207 
1208     /**
1209      * Subtracts an input scalar from this vector
1210      * under the control of a mask.
1211      *
1212      * This is a masked lane-wise binary operation which applies
1213      * the primitive subtraction operation ({@code -}) to each lane.
1214      *
1215      * This method is also equivalent to the expression
1216      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1217      *    lanewise}{@code (}{@link VectorOperators#SUB
1218      *    SUB}{@code , s, m)}.
1219      *
1220      * @param e the input scalar
1221      * @param m the mask controlling lane selection
1222      * @return the result of subtracting the scalar from each lane of this vector
1223      * @see #sub(Vector,VectorMask)
1224      * @see #broadcast(short)
1225      * @see #sub(short)
1226      * @see VectorOperators#SUB
1227      * @see #lanewise(VectorOperators.Binary,Vector)
1228      * @see #lanewise(VectorOperators.Binary,short)
1229      */
1230     @ForceInline
1231     public final ShortVector sub(short e,
1232                                           VectorMask<Short> m) {
1233         return lanewise(SUB, e, m);
1234     }
1235 
1236     /**
1237      * {@inheritDoc} <!--workaround-->
1238      * @see #mul(short)
1239      */
1240     @Override
1241     @ForceInline
1242     public final ShortVector mul(Vector<Short> v) {
1243         return lanewise(MUL, v);
1244     }
1245 
1246     /**
1247      * Multiplies this vector by the broadcast of an input scalar.
1248      *
1249      * This is a lane-wise binary operation which applies
1250      * the primitive multiplication operation ({@code *}) to each lane.
1251      *
1252      * This method is also equivalent to the expression
1253      * {@link #lanewise(VectorOperators.Binary,short)
1254      *    lanewise}{@code (}{@link VectorOperators#MUL
1255      *    MUL}{@code , e)}.
1256      *
1257      * @param e the input scalar
1258      * @return the result of multiplying this vector by the given scalar
1259      * @see #mul(Vector)
1260      * @see #broadcast(short)
1261      * @see #mul(short,VectorMask)
1262      * @see VectorOperators#MUL
1263      * @see #lanewise(VectorOperators.Binary,Vector)
1264      * @see #lanewise(VectorOperators.Binary,short)
1265      */
1266     @ForceInline
1267     public final ShortVector mul(short e) {
1268         return lanewise(MUL, e);
1269     }
1270 
1271     /**
1272      * {@inheritDoc} <!--workaround-->
1273      * @see #mul(short,VectorMask)
1274      */
1275     @Override
1276     @ForceInline
1277     public final ShortVector mul(Vector<Short> v,
1278                                           VectorMask<Short> m) {
1279         return lanewise(MUL, v, m);
1280     }
1281 
1282     /**
1283      * Multiplies this vector by the broadcast of an input scalar,
1284      * selecting lane elements controlled by a mask.
1285      *
1286      * This is a masked lane-wise binary operation which applies
1287      * the primitive multiplication operation ({@code *}) to each lane.
1288      *
1289      * This method is also equivalent to the expression
1290      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1291      *    lanewise}{@code (}{@link VectorOperators#MUL
1292      *    MUL}{@code , s, m)}.
1293      *
1294      * @param e the input scalar
1295      * @param m the mask controlling lane selection
1296      * @return the result of muling each lane of this vector to the scalar
1297      * @see #mul(Vector,VectorMask)
1298      * @see #broadcast(short)
1299      * @see #mul(short)
1300      * @see VectorOperators#MUL
1301      * @see #lanewise(VectorOperators.Binary,Vector)
1302      * @see #lanewise(VectorOperators.Binary,short)
1303      */
1304     @ForceInline
1305     public final ShortVector mul(short e,
1306                                           VectorMask<Short> m) {
1307         return lanewise(MUL, e, m);
1308     }
1309 
1310     /**
1311      * {@inheritDoc} <!--workaround-->
1312      * @apiNote If there is a zero divisor, {@code
1313      * ArithmeticException} will be thrown.
1314      */
1315     @Override
1316     @ForceInline
1317     public final ShortVector div(Vector<Short> v) {
1318         return lanewise(DIV, v);
1319     }
1320 
1321     /**
1322      * Divides this vector by the broadcast of an input scalar.
1323      *
1324      * This is a lane-wise binary operation which applies
1325      * the primitive division operation ({@code /}) to each lane.
1326      *
1327      * This method is also equivalent to the expression
1328      * {@link #lanewise(VectorOperators.Binary,short)
1329      *    lanewise}{@code (}{@link VectorOperators#DIV
1330      *    DIV}{@code , e)}.
1331      *
1332      * @apiNote If there is a zero divisor, {@code
1333      * ArithmeticException} will be thrown.
1334      *
1335      * @param e the input scalar
1336      * @return the result of dividing each lane of this vector by the scalar
1337      * @see #div(Vector)
1338      * @see #broadcast(short)
1339      * @see #div(short,VectorMask)
1340      * @see VectorOperators#DIV
1341      * @see #lanewise(VectorOperators.Binary,Vector)
1342      * @see #lanewise(VectorOperators.Binary,short)
1343      */
1344     @ForceInline
1345     public final ShortVector div(short e) {
1346         return lanewise(DIV, e);
1347     }
1348 
1349     /**
1350      * {@inheritDoc} <!--workaround-->
1351      * @see #div(short,VectorMask)
1352      * @apiNote If there is a zero divisor, {@code
1353      * ArithmeticException} will be thrown.
1354      */
1355     @Override
1356     @ForceInline
1357     public final ShortVector div(Vector<Short> v,
1358                                           VectorMask<Short> m) {
1359         return lanewise(DIV, v, m);
1360     }
1361 
1362     /**
1363      * Divides this vector by the broadcast of an input scalar,
1364      * selecting lane elements controlled by a mask.
1365      *
1366      * This is a masked lane-wise binary operation which applies
1367      * the primitive division operation ({@code /}) to each lane.
1368      *
1369      * This method is also equivalent to the expression
1370      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1371      *    lanewise}{@code (}{@link VectorOperators#DIV
1372      *    DIV}{@code , s, m)}.
1373      *
1374      * @apiNote If there is a zero divisor, {@code
1375      * ArithmeticException} will be thrown.
1376      *
1377      * @param e the input scalar
1378      * @param m the mask controlling lane selection
1379      * @return the result of dividing each lane of this vector by the scalar
1380      * @see #div(Vector,VectorMask)
1381      * @see #broadcast(short)
1382      * @see #div(short)
1383      * @see VectorOperators#DIV
1384      * @see #lanewise(VectorOperators.Binary,Vector)
1385      * @see #lanewise(VectorOperators.Binary,short)
1386      */
1387     @ForceInline
1388     public final ShortVector div(short e,
1389                                           VectorMask<Short> m) {
1390         return lanewise(DIV, e, m);
1391     }
1392 
1393     /// END OF FULL-SERVICE BINARY METHODS
1394 
1395     /// SECOND-TIER BINARY METHODS
1396     //
1397     // There are no masked versions.
1398 
1399     /**
1400      * {@inheritDoc} <!--workaround-->
1401      */
1402     @Override
1403     @ForceInline
1404     public final ShortVector min(Vector<Short> v) {
1405         return lanewise(MIN, v);
1406     }
1407 
1408     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1409     /**
1410      * Computes the smaller of this vector and the broadcast of an input scalar.
1411      *
1412      * This is a lane-wise binary operation which applies the
1413      * operation {@code Math.min()} to each pair of
1414      * corresponding lane values.
1415      *
1416      * This method is also equivalent to the expression
1417      * {@link #lanewise(VectorOperators.Binary,short)
1418      *    lanewise}{@code (}{@link VectorOperators#MIN
1419      *    MIN}{@code , e)}.
1420      *
1421      * @param e the input scalar
1422      * @return the result of multiplying this vector by the given scalar
1423      * @see #min(Vector)
1424      * @see #broadcast(short)
1425      * @see VectorOperators#MIN
1426      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1427      */
1428     @ForceInline
1429     public final ShortVector min(short e) {
1430         return lanewise(MIN, e);
1431     }
1432 
1433     /**
1434      * {@inheritDoc} <!--workaround-->
1435      */
1436     @Override
1437     @ForceInline
1438     public final ShortVector max(Vector<Short> v) {
1439         return lanewise(MAX, v);
1440     }
1441 
1442     /**
1443      * Computes the larger of this vector and the broadcast of an input scalar.
1444      *
1445      * This is a lane-wise binary operation which applies the
1446      * operation {@code Math.max()} to each pair of
1447      * corresponding lane values.
1448      *
1449      * This method is also equivalent to the expression
1450      * {@link #lanewise(VectorOperators.Binary,short)
1451      *    lanewise}{@code (}{@link VectorOperators#MAX
1452      *    MAX}{@code , e)}.
1453      *
1454      * @param e the input scalar
1455      * @return the result of multiplying this vector by the given scalar
1456      * @see #max(Vector)
1457      * @see #broadcast(short)
1458      * @see VectorOperators#MAX
1459      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1460      */
1461     @ForceInline
1462     public final ShortVector max(short e) {
1463         return lanewise(MAX, e);
1464     }
1465 
1466     // common bitwise operators: and, or, not (with scalar versions)
1467     /**
1468      * Computes the bitwise logical conjunction ({@code &})
1469      * of this vector and a second input vector.
1470      *
1471      * This is a lane-wise binary operation which applies the
1472      * the primitive bitwise "and" operation ({@code &})
1473      * to each pair of corresponding lane values.
1474      *
1475      * This method is also equivalent to the expression
1476      * {@link #lanewise(VectorOperators.Binary,Vector)
1477      *    lanewise}{@code (}{@link VectorOperators#AND
1478      *    AND}{@code , v)}.
1479      *
1480      * <p>
1481      * This is not a full-service named operation like
1482      * {@link #add(Vector) add}.  A masked version of
1483      * this operation is not directly available
1484      * but may be obtained via the masked version of
1485      * {@code lanewise}.
1486      *
1487      * @param v a second input vector
1488      * @return the bitwise {@code &} of this vector and the second input vector
1489      * @see #and(short)
1490      * @see #or(Vector)
1491      * @see #not()
1492      * @see VectorOperators#AND
1493      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1494      */
1495     @ForceInline
1496     public final ShortVector and(Vector<Short> v) {
1497         return lanewise(AND, v);
1498     }
1499 
1500     /**
1501      * Computes the bitwise logical conjunction ({@code &})
1502      * of this vector and a scalar.
1503      *
1504      * This is a lane-wise binary operation which applies the
1505      * the primitive bitwise "and" operation ({@code &})
1506      * to each pair of corresponding lane values.
1507      *
1508      * This method is also equivalent to the expression
1509      * {@link #lanewise(VectorOperators.Binary,Vector)
1510      *    lanewise}{@code (}{@link VectorOperators#AND
1511      *    AND}{@code , e)}.
1512      *
1513      * @param e an input scalar
1514      * @return the bitwise {@code &} of this vector and scalar
1515      * @see #and(Vector)
1516      * @see VectorOperators#AND
1517      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1518      */
1519     @ForceInline
1520     public final ShortVector and(short e) {
1521         return lanewise(AND, e);
1522     }
1523 
1524     /**
1525      * Computes the bitwise logical disjunction ({@code |})
1526      * of this vector and a second input vector.
1527      *
1528      * This is a lane-wise binary operation which applies the
1529      * the primitive bitwise "or" operation ({@code |})
1530      * to each pair of corresponding lane values.
1531      *
1532      * This method is also equivalent to the expression
1533      * {@link #lanewise(VectorOperators.Binary,Vector)
1534      *    lanewise}{@code (}{@link VectorOperators#OR
1535      *    AND}{@code , v)}.
1536      *
1537      * <p>
1538      * This is not a full-service named operation like
1539      * {@link #add(Vector) add}.  A masked version of
1540      * this operation is not directly available
1541      * but may be obtained via the masked version of
1542      * {@code lanewise}.
1543      *
1544      * @param v a second input vector
1545      * @return the bitwise {@code |} of this vector and the second input vector
1546      * @see #or(short)
1547      * @see #and(Vector)
1548      * @see #not()
1549      * @see VectorOperators#OR
1550      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1551      */
1552     @ForceInline
1553     public final ShortVector or(Vector<Short> v) {
1554         return lanewise(OR, v);
1555     }
1556 
1557     /**
1558      * Computes the bitwise logical disjunction ({@code |})
1559      * of this vector and a scalar.
1560      *
1561      * This is a lane-wise binary operation which applies the
1562      * the primitive bitwise "or" operation ({@code |})
1563      * to each pair of corresponding lane values.
1564      *
1565      * This method is also equivalent to the expression
1566      * {@link #lanewise(VectorOperators.Binary,Vector)
1567      *    lanewise}{@code (}{@link VectorOperators#OR
1568      *    OR}{@code , e)}.
1569      *
1570      * @param e an input scalar
1571      * @return the bitwise {@code |} of this vector and scalar
1572      * @see #or(Vector)
1573      * @see VectorOperators#OR
1574      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1575      */
1576     @ForceInline
1577     public final ShortVector or(short e) {
1578         return lanewise(OR, e);
1579     }
1580 
1581 
1582 
1583     /// UNARY METHODS
1584 
1585     /**
1586      * {@inheritDoc} <!--workaround-->
1587      */
1588     @Override
1589     @ForceInline
1590     public final
1591     ShortVector neg() {
1592         return lanewise(NEG);
1593     }
1594 
1595     /**
1596      * {@inheritDoc} <!--workaround-->
1597      */
1598     @Override
1599     @ForceInline
1600     public final
1601     ShortVector abs() {
1602         return lanewise(ABS);
1603     }
1604 
1605     // not (~)
1606     /**
1607      * Computes the bitwise logical complement ({@code ~})
1608      * of this vector.
1609      *
1610      * This is a lane-wise binary operation which applies the
1611      * the primitive bitwise "not" operation ({@code ~})
1612      * to each lane value.
1613      *
1614      * This method is also equivalent to the expression
1615      * {@link #lanewise(VectorOperators.Unary)
1616      *    lanewise}{@code (}{@link VectorOperators#NOT
1617      *    NOT}{@code )}.
1618      *
1619      * <p>
1620      * This is not a full-service named operation like
1621      * {@link #add(Vector) add}.  A masked version of
1622      * this operation is not directly available
1623      * but may be obtained via the masked version of
1624      * {@code lanewise}.
1625      *
1626      * @return the bitwise complement {@code ~} of this vector
1627      * @see #and(Vector)
1628      * @see VectorOperators#NOT
1629      * @see #lanewise(VectorOperators.Unary,VectorMask)
1630      */
1631     @ForceInline
1632     public final ShortVector not() {
1633         return lanewise(NOT);
1634     }
1635 
1636 
1637     /// COMPARISONS
1638 
1639     /**
1640      * {@inheritDoc} <!--workaround-->
1641      */
1642     @Override
1643     @ForceInline
1644     public final
1645     VectorMask<Short> eq(Vector<Short> v) {
1646         return compare(EQ, v);
1647     }
1648 
1649     /**
1650      * Tests if this vector is equal to an input scalar.
1651      *
1652      * This is a lane-wise binary test operation which applies
1653      * the primitive equals operation ({@code ==}) to each lane.
1654      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1655      *
1656      * @param e the input scalar
1657      * @return the result mask of testing if this vector
1658      *         is equal to {@code e}
1659      * @see #compare(VectorOperators.Comparison,short)
1660      */
1661     @ForceInline
1662     public final
1663     VectorMask<Short> eq(short e) {
1664         return compare(EQ, e);
1665     }
1666 
1667     /**
1668      * {@inheritDoc} <!--workaround-->
1669      */
1670     @Override
1671     @ForceInline
1672     public final
1673     VectorMask<Short> lt(Vector<Short> v) {
1674         return compare(LT, v);
1675     }
1676 
1677     /**
1678      * Tests if this vector is less than an input scalar.
1679      *
1680      * This is a lane-wise binary test operation which applies
1681      * the primitive less than operation ({@code <}) to each lane.
1682      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1683      *
1684      * @param e the input scalar
1685      * @return the mask result of testing if this vector
1686      *         is less than the input scalar
1687      * @see #compare(VectorOperators.Comparison,short)
1688      */
1689     @ForceInline
1690     public final
1691     VectorMask<Short> lt(short e) {
1692         return compare(LT, e);
1693     }
1694 
1695     /**
1696      * {@inheritDoc} <!--workaround-->
1697      */
1698     @Override
1699     public abstract
1700     VectorMask<Short> test(VectorOperators.Test op);
1701 
1702     /*package-private*/
1703     @ForceInline
1704     final
1705     <M extends VectorMask<Short>>
1706     M testTemplate(Class<M> maskType, Test op) {
1707         ShortSpecies vsp = vspecies();
1708         if (opKind(op, VO_SPECIAL)) {
1709             ShortVector bits = this.viewAsIntegralLanes();
1710             VectorMask<Short> m;
1711             if (op == IS_DEFAULT) {
1712                 m = bits.compare(EQ, (short) 0);
1713             } else if (op == IS_NEGATIVE) {
1714                 m = bits.compare(LT, (short) 0);
1715             }
1716             else {
1717                 throw new AssertionError(op);
1718             }
1719             return maskType.cast(m);
1720         }
1721         int opc = opCode(op);
1722         throw new AssertionError(op);
1723     }
1724 
1725     /**
1726      * {@inheritDoc} <!--workaround-->
1727      */
1728     @Override
1729     @ForceInline
1730     public final
1731     VectorMask<Short> test(VectorOperators.Test op,
1732                                   VectorMask<Short> m) {
1733         return test(op).and(m);
1734     }
1735 
1736     /**
1737      * {@inheritDoc} <!--workaround-->
1738      */
1739     @Override
1740     public abstract
1741     VectorMask<Short> compare(VectorOperators.Comparison op, Vector<Short> v);
1742 
1743     /*package-private*/
1744     @ForceInline
1745     final
1746     <M extends VectorMask<Short>>
1747     M compareTemplate(Class<M> maskType, Comparison op, Vector<Short> v) {
1748         Objects.requireNonNull(v);
1749         ShortSpecies vsp = vspecies();
1750         ShortVector that = (ShortVector) v;
1751         that.check(this);
1752         int opc = opCode(op);
1753         return VectorSupport.compare(
1754             opc, getClass(), maskType, short.class, length(),
1755             this, that,
1756             (cond, v0, v1) -> {
1757                 AbstractMask<Short> m
1758                     = v0.bTest(cond, v1, (cond_, i, a, b)
1759                                -> compareWithOp(cond, a, b));
1760                 @SuppressWarnings("unchecked")
1761                 M m2 = (M) m;
1762                 return m2;
1763             });
1764     }
1765 
1766     @ForceInline
1767     private static boolean compareWithOp(int cond, short a, short b) {
1768         return switch (cond) {
1769             case BT_eq -> a == b;
1770             case BT_ne -> a != b;
1771             case BT_lt -> a < b;
1772             case BT_le -> a <= b;
1773             case BT_gt -> a > b;
1774             case BT_ge -> a >= b;
1775             case BT_ult -> Short.compareUnsigned(a, b) < 0;
1776             case BT_ule -> Short.compareUnsigned(a, b) <= 0;
1777             case BT_ugt -> Short.compareUnsigned(a, b) > 0;
1778             case BT_uge -> Short.compareUnsigned(a, b) >= 0;
1779             default -> throw new AssertionError();
1780         };
1781     }
1782 
1783     /**
1784      * {@inheritDoc} <!--workaround-->
1785      */
1786     @Override
1787     @ForceInline
1788     public final
1789     VectorMask<Short> compare(VectorOperators.Comparison op,
1790                                   Vector<Short> v,
1791                                   VectorMask<Short> m) {
1792         return compare(op, v).and(m);
1793     }
1794 
1795     /**
1796      * Tests this vector by comparing it with an input scalar,
1797      * according to the given comparison operation.
1798      *
1799      * This is a lane-wise binary test operation which applies
1800      * the comparison operation to each lane.
1801      * <p>
1802      * The result is the same as
1803      * {@code compare(op, broadcast(species(), e))}.
1804      * That is, the scalar may be regarded as broadcast to
1805      * a vector of the same species, and then compared
1806      * against the original vector, using the selected
1807      * comparison operation.
1808      *
1809      * @param op the operation used to compare lane values
1810      * @param e the input scalar
1811      * @return the mask result of testing lane-wise if this vector
1812      *         compares to the input, according to the selected
1813      *         comparison operator
1814      * @see ShortVector#compare(VectorOperators.Comparison,Vector)
1815      * @see #eq(short)
1816      * @see #lt(short)
1817      */
1818     public abstract
1819     VectorMask<Short> compare(Comparison op, short e);
1820 
1821     /*package-private*/
1822     @ForceInline
1823     final
1824     <M extends VectorMask<Short>>
1825     M compareTemplate(Class<M> maskType, Comparison op, short e) {
1826         return compareTemplate(maskType, op, broadcast(e));
1827     }
1828 
1829     /**
1830      * Tests this vector by comparing it with an input scalar,
1831      * according to the given comparison operation,
1832      * in lanes selected by a mask.
1833      *
1834      * This is a masked lane-wise binary test operation which applies
1835      * to each pair of corresponding lane values.
1836      *
1837      * The returned result is equal to the expression
1838      * {@code compare(op,s).and(m)}.
1839      *
1840      * @param op the operation used to compare lane values
1841      * @param e the input scalar
1842      * @param m the mask controlling lane selection
1843      * @return the mask result of testing lane-wise if this vector
1844      *         compares to the input, according to the selected
1845      *         comparison operator,
1846      *         and only in the lanes selected by the mask
1847      * @see ShortVector#compare(VectorOperators.Comparison,Vector,VectorMask)
1848      */
1849     @ForceInline
1850     public final VectorMask<Short> compare(VectorOperators.Comparison op,
1851                                                short e,
1852                                                VectorMask<Short> m) {
1853         return compare(op, e).and(m);
1854     }
1855 
1856     /**
1857      * {@inheritDoc} <!--workaround-->
1858      */
1859     @Override
1860     public abstract
1861     VectorMask<Short> compare(Comparison op, long e);
1862 
1863     /*package-private*/
1864     @ForceInline
1865     final
1866     <M extends VectorMask<Short>>
1867     M compareTemplate(Class<M> maskType, Comparison op, long e) {
1868         return compareTemplate(maskType, op, broadcast(e));
1869     }
1870 
1871     /**
1872      * {@inheritDoc} <!--workaround-->
1873      */
1874     @Override
1875     @ForceInline
1876     public final
1877     VectorMask<Short> compare(Comparison op, long e, VectorMask<Short> m) {
1878         return compare(op, broadcast(e), m);
1879     }
1880 
1881 
1882 
1883     /**
1884      * {@inheritDoc} <!--workaround-->
1885      */
1886     @Override public abstract
1887     ShortVector blend(Vector<Short> v, VectorMask<Short> m);
1888 
1889     /*package-private*/
1890     @ForceInline
1891     final
1892     <M extends VectorMask<Short>>
1893     ShortVector
1894     blendTemplate(Class<M> maskType, ShortVector v, M m) {
1895         v.check(this);
1896         return VectorSupport.blend(
1897             getClass(), maskType, short.class, length(),
1898             this, v, m,
1899             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
1900     }
1901 
1902     /**
1903      * {@inheritDoc} <!--workaround-->
1904      */
1905     @Override public abstract ShortVector addIndex(int scale);
1906 
1907     /*package-private*/
1908     @ForceInline
1909     final ShortVector addIndexTemplate(int scale) {
1910         ShortSpecies vsp = vspecies();
1911         // make sure VLENGTH*scale doesn't overflow:
1912         vsp.checkScale(scale);
1913         return VectorSupport.indexVector(
1914             getClass(), short.class, length(),
1915             this, scale, vsp,
1916             (v, scale_, s)
1917             -> {
1918                 // If the platform doesn't support an INDEX
1919                 // instruction directly, load IOTA from memory
1920                 // and multiply.
1921                 ShortVector iota = s.iota();
1922                 short sc = (short) scale_;
1923                 return v.add(sc == 1 ? iota : iota.mul(sc));
1924             });
1925     }
1926 
1927     /**
1928      * Replaces selected lanes of this vector with
1929      * a scalar value
1930      * under the control of a mask.
1931      *
1932      * This is a masked lane-wise binary operation which
1933      * selects each lane value from one or the other input.
1934      *
1935      * The returned result is equal to the expression
1936      * {@code blend(broadcast(e),m)}.
1937      *
1938      * @param e the input scalar, containing the replacement lane value
1939      * @param m the mask controlling lane selection of the scalar
1940      * @return the result of blending the lane elements of this vector with
1941      *         the scalar value
1942      */
1943     @ForceInline
1944     public final ShortVector blend(short e,
1945                                             VectorMask<Short> m) {
1946         return blend(broadcast(e), m);
1947     }
1948 
1949     /**
1950      * Replaces selected lanes of this vector with
1951      * a scalar value
1952      * under the control of a mask.
1953      *
1954      * This is a masked lane-wise binary operation which
1955      * selects each lane value from one or the other input.
1956      *
1957      * The returned result is equal to the expression
1958      * {@code blend(broadcast(e),m)}.
1959      *
1960      * @param e the input scalar, containing the replacement lane value
1961      * @param m the mask controlling lane selection of the scalar
1962      * @return the result of blending the lane elements of this vector with
1963      *         the scalar value
1964      */
1965     @ForceInline
1966     public final ShortVector blend(long e,
1967                                             VectorMask<Short> m) {
1968         return blend(broadcast(e), m);
1969     }
1970 
1971     /**
1972      * {@inheritDoc} <!--workaround-->
1973      */
1974     @Override
1975     public abstract
1976     ShortVector slice(int origin, Vector<Short> v1);
1977 
1978     /*package-private*/
1979     final
1980     @ForceInline
1981     ShortVector sliceTemplate(int origin, Vector<Short> v1) {
1982         ShortVector that = (ShortVector) v1;
1983         that.check(this);
1984         Objects.checkIndex(origin, length() + 1);
1985         VectorShuffle<Short> iota = iotaShuffle();
1986         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
1987         iota = iotaShuffle(origin, 1, true);
1988         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
1989     }
1990 
1991     /**
1992      * {@inheritDoc} <!--workaround-->
1993      */
1994     @Override
1995     @ForceInline
1996     public final
1997     ShortVector slice(int origin,
1998                                Vector<Short> w,
1999                                VectorMask<Short> m) {
2000         return broadcast(0).blend(slice(origin, w), m);
2001     }
2002 
2003     /**
2004      * {@inheritDoc} <!--workaround-->
2005      */
2006     @Override
2007     public abstract
2008     ShortVector slice(int origin);
2009 
2010     /*package-private*/
2011     final
2012     @ForceInline
2013     ShortVector sliceTemplate(int origin) {
2014         Objects.checkIndex(origin, length() + 1);
2015         VectorShuffle<Short> iota = iotaShuffle();
2016         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
2017         iota = iotaShuffle(origin, 1, true);
2018         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2019     }
2020 
2021     /**
2022      * {@inheritDoc} <!--workaround-->
2023      */
2024     @Override
2025     public abstract
2026     ShortVector unslice(int origin, Vector<Short> w, int part);
2027 
2028     /*package-private*/
2029     final
2030     @ForceInline
2031     ShortVector
2032     unsliceTemplate(int origin, Vector<Short> w, int part) {
2033         ShortVector that = (ShortVector) w;
2034         that.check(this);
2035         Objects.checkIndex(origin, length() + 1);
2036         VectorShuffle<Short> iota = iotaShuffle();
2037         VectorMask<Short> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
2038                                                                   (broadcast((short)(origin))));
2039         iota = iotaShuffle(-origin, 1, true);
2040         return that.blend(this.rearrange(iota), blendMask);
2041     }
2042 
2043     /*package-private*/
2044     final
2045     @ForceInline
2046     <M extends VectorMask<Short>>
2047     ShortVector
2048     unsliceTemplate(Class<M> maskType, int origin, Vector<Short> w, int part, M m) {
2049         ShortVector that = (ShortVector) w;
2050         that.check(this);
2051         ShortVector slice = that.sliceTemplate(origin, that);
2052         slice = slice.blendTemplate(maskType, this, m);
2053         return slice.unsliceTemplate(origin, w, part);
2054     }
2055 
2056     /**
2057      * {@inheritDoc} <!--workaround-->
2058      */
2059     @Override
2060     public abstract
2061     ShortVector unslice(int origin, Vector<Short> w, int part, VectorMask<Short> m);
2062 
2063     /**
2064      * {@inheritDoc} <!--workaround-->
2065      */
2066     @Override
2067     public abstract
2068     ShortVector unslice(int origin);
2069 
2070     /*package-private*/
2071     final
2072     @ForceInline
2073     ShortVector
2074     unsliceTemplate(int origin) {
2075         Objects.checkIndex(origin, length() + 1);
2076         VectorShuffle<Short> iota = iotaShuffle();
2077         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.GE,
2078                                                                   (broadcast((short)(origin))));
2079         iota = iotaShuffle(-origin, 1, true);
2080         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2081     }
2082 
2083     private ArrayIndexOutOfBoundsException
2084     wrongPartForSlice(int part) {
2085         String msg = String.format("bad part number %d for slice operation",
2086                                    part);
2087         return new ArrayIndexOutOfBoundsException(msg);
2088     }
2089 
2090     /**
2091      * {@inheritDoc} <!--workaround-->
2092      */
2093     @Override
2094     public abstract
2095     ShortVector rearrange(VectorShuffle<Short> m);
2096 
2097     /*package-private*/
2098     @ForceInline
2099     final
2100     <S extends VectorShuffle<Short>>
2101     ShortVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2102         shuffle.checkIndexes();
2103         return VectorSupport.rearrangeOp(
2104             getClass(), shuffletype, short.class, length(),
2105             this, shuffle,
2106             (v1, s_) -> v1.uOp((i, a) -> {
2107                 int ei = s_.laneSource(i);
2108                 return v1.lane(ei);
2109             }));
2110     }
2111 
2112     /**
2113      * {@inheritDoc} <!--workaround-->
2114      */
2115     @Override
2116     public abstract
2117     ShortVector rearrange(VectorShuffle<Short> s,
2118                                    VectorMask<Short> m);
2119 
2120     /*package-private*/
2121     @ForceInline
2122     final
2123     <S extends VectorShuffle<Short>>
2124     ShortVector rearrangeTemplate(Class<S> shuffletype,
2125                                            S shuffle,
2126                                            VectorMask<Short> m) {
2127         ShortVector unmasked =
2128             VectorSupport.rearrangeOp(
2129                 getClass(), shuffletype, short.class, length(),
2130                 this, shuffle,
2131                 (v1, s_) -> v1.uOp((i, a) -> {
2132                     int ei = s_.laneSource(i);
2133                     return ei < 0 ? 0 : v1.lane(ei);
2134                 }));
2135         VectorMask<Short> valid = shuffle.laneIsValid();
2136         if (m.andNot(valid).anyTrue()) {
2137             shuffle.checkIndexes();
2138             throw new AssertionError();
2139         }
2140         return broadcast((short)0).blend(unmasked, m);
2141     }
2142 
2143     /**
2144      * {@inheritDoc} <!--workaround-->
2145      */
2146     @Override
2147     public abstract
2148     ShortVector rearrange(VectorShuffle<Short> s,
2149                                    Vector<Short> v);
2150 
2151     /*package-private*/
2152     @ForceInline
2153     final
2154     <S extends VectorShuffle<Short>>
2155     ShortVector rearrangeTemplate(Class<S> shuffletype,
2156                                            S shuffle,
2157                                            ShortVector v) {
2158         VectorMask<Short> valid = shuffle.laneIsValid();
2159         @SuppressWarnings("unchecked")
2160         S ws = (S) shuffle.wrapIndexes();
2161         ShortVector r0 =
2162             VectorSupport.rearrangeOp(
2163                 getClass(), shuffletype, short.class, length(),
2164                 this, ws,
2165                 (v0, s_) -> v0.uOp((i, a) -> {
2166                     int ei = s_.laneSource(i);
2167                     return v0.lane(ei);
2168                 }));
2169         ShortVector r1 =
2170             VectorSupport.rearrangeOp(
2171                 getClass(), shuffletype, short.class, length(),
2172                 v, ws,
2173                 (v1, s_) -> v1.uOp((i, a) -> {
2174                     int ei = s_.laneSource(i);
2175                     return v1.lane(ei);
2176                 }));
2177         return r1.blend(r0, valid);
2178     }
2179 
2180     @ForceInline
2181     private final
2182     VectorShuffle<Short> toShuffle0(ShortSpecies dsp) {
2183         short[] a = toArray();
2184         int[] sa = new int[a.length];
2185         for (int i = 0; i < a.length; i++) {
2186             sa[i] = (int) a[i];
2187         }
2188         return VectorShuffle.fromArray(dsp, sa, 0);
2189     }
2190 
2191     /*package-private*/
2192     @ForceInline
2193     final
2194     VectorShuffle<Short> toShuffleTemplate(Class<?> shuffleType) {
2195         ShortSpecies vsp = vspecies();
2196         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2197                                      getClass(), short.class, length(),
2198                                      shuffleType, byte.class, length(),
2199                                      this, vsp,
2200                                      ShortVector::toShuffle0);
2201     }
2202 
2203     /**
2204      * {@inheritDoc} <!--workaround-->
2205      */
2206     @Override
2207     public abstract
2208     ShortVector selectFrom(Vector<Short> v);
2209 
2210     /*package-private*/
2211     @ForceInline
2212     final ShortVector selectFromTemplate(ShortVector v) {
2213         return v.rearrange(this.toShuffle());
2214     }
2215 
2216     /**
2217      * {@inheritDoc} <!--workaround-->
2218      */
2219     @Override
2220     public abstract
2221     ShortVector selectFrom(Vector<Short> s, VectorMask<Short> m);
2222 
2223     /*package-private*/
2224     @ForceInline
2225     final ShortVector selectFromTemplate(ShortVector v,
2226                                                   AbstractMask<Short> m) {
2227         return v.rearrange(this.toShuffle(), m);
2228     }
2229 
2230     /// Ternary operations
2231 
2232     /**
2233      * Blends together the bits of two vectors under
2234      * the control of a third, which supplies mask bits.
2235      *
2236      * This is a lane-wise ternary operation which performs
2237      * a bitwise blending operation {@code (a&~c)|(b&c)}
2238      * to each lane.
2239      *
2240      * This method is also equivalent to the expression
2241      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2242      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2243      *    BITWISE_BLEND}{@code , bits, mask)}.
2244      *
2245      * @param bits input bits to blend into the current vector
2246      * @param mask a bitwise mask to enable blending of the input bits
2247      * @return the bitwise blend of the given bits into the current vector,
2248      *         under control of the bitwise mask
2249      * @see #bitwiseBlend(short,short)
2250      * @see #bitwiseBlend(short,Vector)
2251      * @see #bitwiseBlend(Vector,short)
2252      * @see VectorOperators#BITWISE_BLEND
2253      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2254      */
2255     @ForceInline
2256     public final
2257     ShortVector bitwiseBlend(Vector<Short> bits, Vector<Short> mask) {
2258         return lanewise(BITWISE_BLEND, bits, mask);
2259     }
2260 
2261     /**
2262      * Blends together the bits of a vector and a scalar under
2263      * the control of another scalar, which supplies mask bits.
2264      *
2265      * This is a lane-wise ternary operation which performs
2266      * a bitwise blending operation {@code (a&~c)|(b&c)}
2267      * to each lane.
2268      *
2269      * This method is also equivalent to the expression
2270      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2271      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2272      *    BITWISE_BLEND}{@code , bits, mask)}.
2273      *
2274      * @param bits input bits to blend into the current vector
2275      * @param mask a bitwise mask to enable blending of the input bits
2276      * @return the bitwise blend of the given bits into the current vector,
2277      *         under control of the bitwise mask
2278      * @see #bitwiseBlend(Vector,Vector)
2279      * @see VectorOperators#BITWISE_BLEND
2280      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
2281      */
2282     @ForceInline
2283     public final
2284     ShortVector bitwiseBlend(short bits, short mask) {
2285         return lanewise(BITWISE_BLEND, bits, mask);
2286     }
2287 
2288     /**
2289      * Blends together the bits of a vector and a scalar under
2290      * the control of another vector, which supplies mask bits.
2291      *
2292      * This is a lane-wise ternary operation which performs
2293      * a bitwise blending operation {@code (a&~c)|(b&c)}
2294      * to each lane.
2295      *
2296      * This method is also equivalent to the expression
2297      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2298      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2299      *    BITWISE_BLEND}{@code , bits, mask)}.
2300      *
2301      * @param bits input bits to blend into the current vector
2302      * @param mask a bitwise mask to enable blending of the input bits
2303      * @return the bitwise blend of the given bits into the current vector,
2304      *         under control of the bitwise mask
2305      * @see #bitwiseBlend(Vector,Vector)
2306      * @see VectorOperators#BITWISE_BLEND
2307      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
2308      */
2309     @ForceInline
2310     public final
2311     ShortVector bitwiseBlend(short bits, Vector<Short> mask) {
2312         return lanewise(BITWISE_BLEND, bits, mask);
2313     }
2314 
2315     /**
2316      * Blends together the bits of two vectors under
2317      * the control of a scalar, which supplies mask bits.
2318      *
2319      * This is a lane-wise ternary operation which performs
2320      * a bitwise blending operation {@code (a&~c)|(b&c)}
2321      * to each lane.
2322      *
2323      * This method is also equivalent to the expression
2324      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2325      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2326      *    BITWISE_BLEND}{@code , bits, mask)}.
2327      *
2328      * @param bits input bits to blend into the current vector
2329      * @param mask a bitwise mask to enable blending of the input bits
2330      * @return the bitwise blend of the given bits into the current vector,
2331      *         under control of the bitwise mask
2332      * @see #bitwiseBlend(Vector,Vector)
2333      * @see VectorOperators#BITWISE_BLEND
2334      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
2335      */
2336     @ForceInline
2337     public final
2338     ShortVector bitwiseBlend(Vector<Short> bits, short mask) {
2339         return lanewise(BITWISE_BLEND, bits, mask);
2340     }
2341 
2342 
2343     // Type specific horizontal reductions
2344 
2345     /**
2346      * Returns a value accumulated from all the lanes of this vector.
2347      *
2348      * This is an associative cross-lane reduction operation which
2349      * applies the specified operation to all the lane elements.
2350      * <p>
2351      * A few reduction operations do not support arbitrary reordering
2352      * of their operands, yet are included here because of their
2353      * usefulness.
2354      * <ul>
2355      * <li>
2356      * In the case of {@code FIRST_NONZERO}, the reduction returns
2357      * the value from the lowest-numbered non-zero lane.
2358      * <li>
2359      * All other reduction operations are fully commutative and
2360      * associative.  The implementation can choose any order of
2361      * processing, yet it will always produce the same result.
2362      * </ul>
2363      *
2364      * @param op the operation used to combine lane values
2365      * @return the accumulated result
2366      * @throws UnsupportedOperationException if this vector does
2367      *         not support the requested operation
2368      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2369      * @see #add(Vector)
2370      * @see #mul(Vector)
2371      * @see #min(Vector)
2372      * @see #max(Vector)
2373      * @see #and(Vector)
2374      * @see #or(Vector)
2375      * @see VectorOperators#XOR
2376      * @see VectorOperators#FIRST_NONZERO
2377      */
2378     public abstract short reduceLanes(VectorOperators.Associative op);
2379 
2380     /**
2381      * Returns a value accumulated from selected lanes of this vector,
2382      * controlled by a mask.
2383      *
2384      * This is an associative cross-lane reduction operation which
2385      * applies the specified operation to the selected lane elements.
2386      * <p>
2387      * If no elements are selected, an operation-specific identity
2388      * value is returned.
2389      * <ul>
2390      * <li>
2391      * If the operation is
2392      *  {@code ADD}, {@code XOR}, {@code OR},
2393      * or {@code FIRST_NONZERO},
2394      * then the identity value is zero, the default {@code short} value.
2395      * <li>
2396      * If the operation is {@code MUL},
2397      * then the identity value is one.
2398      * <li>
2399      * If the operation is {@code AND},
2400      * then the identity value is minus one (all bits set).
2401      * <li>
2402      * If the operation is {@code MAX},
2403      * then the identity value is {@code Short.MIN_VALUE}.
2404      * <li>
2405      * If the operation is {@code MIN},
2406      * then the identity value is {@code Short.MAX_VALUE}.
2407      * </ul>
2408      * <p>
2409      * A few reduction operations do not support arbitrary reordering
2410      * of their operands, yet are included here because of their
2411      * usefulness.
2412      * <ul>
2413      * <li>
2414      * In the case of {@code FIRST_NONZERO}, the reduction returns
2415      * the value from the lowest-numbered non-zero lane.
2416      * <li>
2417      * All other reduction operations are fully commutative and
2418      * associative.  The implementation can choose any order of
2419      * processing, yet it will always produce the same result.
2420      * </ul>
2421      *
2422      * @param op the operation used to combine lane values
2423      * @param m the mask controlling lane selection
2424      * @return the reduced result accumulated from the selected lane values
2425      * @throws UnsupportedOperationException if this vector does
2426      *         not support the requested operation
2427      * @see #reduceLanes(VectorOperators.Associative)
2428      */
2429     public abstract short reduceLanes(VectorOperators.Associative op,
2430                                        VectorMask<Short> m);
2431 
2432     /*package-private*/
2433     @ForceInline
2434     final
2435     short reduceLanesTemplate(VectorOperators.Associative op,
2436                                VectorMask<Short> m) {
2437         ShortVector v = reduceIdentityVector(op).blend(this, m);
2438         return v.reduceLanesTemplate(op);
2439     }
2440 
2441     /*package-private*/
2442     @ForceInline
2443     final
2444     short reduceLanesTemplate(VectorOperators.Associative op) {
2445         if (op == FIRST_NONZERO) {
2446             // FIXME:  The JIT should handle this, and other scan ops alos.
2447             VectorMask<Short> thisNZ
2448                 = this.viewAsIntegralLanes().compare(NE, (short) 0);
2449             return this.lane(thisNZ.firstTrue());
2450         }
2451         int opc = opCode(op);
2452         return fromBits(VectorSupport.reductionCoerced(
2453             opc, getClass(), short.class, length(),
2454             this,
2455             REDUCE_IMPL.find(op, opc, (opc_) -> {
2456               switch (opc_) {
2457               case VECTOR_OP_ADD: return v ->
2458                       toBits(v.rOp((short)0, (i, a, b) -> (short)(a + b)));
2459               case VECTOR_OP_MUL: return v ->
2460                       toBits(v.rOp((short)1, (i, a, b) -> (short)(a * b)));
2461               case VECTOR_OP_MIN: return v ->
2462                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (short) Math.min(a, b)));
2463               case VECTOR_OP_MAX: return v ->
2464                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (short) Math.max(a, b)));
2465               case VECTOR_OP_AND: return v ->
2466                       toBits(v.rOp((short)-1, (i, a, b) -> (short)(a & b)));
2467               case VECTOR_OP_OR: return v ->
2468                       toBits(v.rOp((short)0, (i, a, b) -> (short)(a | b)));
2469               case VECTOR_OP_XOR: return v ->
2470                       toBits(v.rOp((short)0, (i, a, b) -> (short)(a ^ b)));
2471               default: return null;
2472               }})));
2473     }
2474     private static final
2475     ImplCache<Associative,Function<ShortVector,Long>> REDUCE_IMPL
2476         = new ImplCache<>(Associative.class, ShortVector.class);
2477 
2478     private
2479     @ForceInline
2480     ShortVector reduceIdentityVector(VectorOperators.Associative op) {
2481         int opc = opCode(op);
2482         UnaryOperator<ShortVector> fn
2483             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2484                 switch (opc_) {
2485                 case VECTOR_OP_ADD:
2486                 case VECTOR_OP_OR:
2487                 case VECTOR_OP_XOR:
2488                     return v -> v.broadcast(0);
2489                 case VECTOR_OP_MUL:
2490                     return v -> v.broadcast(1);
2491                 case VECTOR_OP_AND:
2492                     return v -> v.broadcast(-1);
2493                 case VECTOR_OP_MIN:
2494                     return v -> v.broadcast(MAX_OR_INF);
2495                 case VECTOR_OP_MAX:
2496                     return v -> v.broadcast(MIN_OR_INF);
2497                 default: return null;
2498                 }
2499             });
2500         return fn.apply(this);
2501     }
2502     private static final
2503     ImplCache<Associative,UnaryOperator<ShortVector>> REDUCE_ID_IMPL
2504         = new ImplCache<>(Associative.class, ShortVector.class);
2505 
2506     private static final short MIN_OR_INF = Short.MIN_VALUE;
2507     private static final short MAX_OR_INF = Short.MAX_VALUE;
2508 
2509     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2510     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2511                                                      VectorMask<Short> m);
2512 
2513     // Type specific accessors
2514 
2515     /**
2516      * Gets the lane element at lane index {@code i}
2517      *
2518      * @param i the lane index
2519      * @return the lane element at lane index {@code i}
2520      * @throws IllegalArgumentException if the index is is out of range
2521      * ({@code < 0 || >= length()})
2522      */
2523     public abstract short lane(int i);
2524 
2525     /**
2526      * Replaces the lane element of this vector at lane index {@code i} with
2527      * value {@code e}.
2528      *
2529      * This is a cross-lane operation and behaves as if it returns the result
2530      * of blending this vector with an input vector that is the result of
2531      * broadcasting {@code e} and a mask that has only one lane set at lane
2532      * index {@code i}.
2533      *
2534      * @param i the lane index of the lane element to be replaced
2535      * @param e the value to be placed
2536      * @return the result of replacing the lane element of this vector at lane
2537      * index {@code i} with value {@code e}.
2538      * @throws IllegalArgumentException if the index is is out of range
2539      * ({@code < 0 || >= length()})
2540      */
2541     public abstract ShortVector withLane(int i, short e);
2542 
2543     // Memory load operations
2544 
2545     /**
2546      * Returns an array of type {@code short[]}
2547      * containing all the lane values.
2548      * The array length is the same as the vector length.
2549      * The array elements are stored in lane order.
2550      * <p>
2551      * This method behaves as if it stores
2552      * this vector into an allocated array
2553      * (using {@link #intoArray(short[], int) intoArray})
2554      * and returns the array as follows:
2555      * <pre>{@code
2556      *   short[] a = new short[this.length()];
2557      *   this.intoArray(a, 0);
2558      *   return a;
2559      * }</pre>
2560      *
2561      * @return an array containing the lane values of this vector
2562      */
2563     @ForceInline
2564     @Override
2565     public final short[] toArray() {
2566         short[] a = new short[vspecies().laneCount()];
2567         intoArray(a, 0);
2568         return a;
2569     }
2570 
2571     /** {@inheritDoc} <!--workaround-->
2572      * @implNote
2573      * When this method is used on used on vectors
2574      * of type {@code ShortVector},
2575      * there will be no loss of precision or range,
2576      * and so no {@code UnsupportedOperationException} will
2577      * be thrown.
2578      */
2579     @ForceInline
2580     @Override
2581     public final int[] toIntArray() {
2582         short[] a = toArray();
2583         int[] res = new int[a.length];
2584         for (int i = 0; i < a.length; i++) {
2585             short e = a[i];
2586             res[i] = (int) ShortSpecies.toIntegralChecked(e, true);
2587         }
2588         return res;
2589     }
2590 
2591     /** {@inheritDoc} <!--workaround-->
2592      * @implNote
2593      * When this method is used on used on vectors
2594      * of type {@code ShortVector},
2595      * there will be no loss of precision or range,
2596      * and so no {@code UnsupportedOperationException} will
2597      * be thrown.
2598      */
2599     @ForceInline
2600     @Override
2601     public final long[] toLongArray() {
2602         short[] a = toArray();
2603         long[] res = new long[a.length];
2604         for (int i = 0; i < a.length; i++) {
2605             short e = a[i];
2606             res[i] = ShortSpecies.toIntegralChecked(e, false);
2607         }
2608         return res;
2609     }
2610 
2611     /** {@inheritDoc} <!--workaround-->
2612      * @implNote
2613      * When this method is used on used on vectors
2614      * of type {@code ShortVector},
2615      * there will be no loss of precision.
2616      */
2617     @ForceInline
2618     @Override
2619     public final double[] toDoubleArray() {
2620         short[] a = toArray();
2621         double[] res = new double[a.length];
2622         for (int i = 0; i < a.length; i++) {
2623             res[i] = (double) a[i];
2624         }
2625         return res;
2626     }
2627 
2628     /**
2629      * Loads a vector from a byte array starting at an offset.
2630      * Bytes are composed into primitive lane elements according
2631      * to the specified byte order.
2632      * The vector is arranged into lanes according to
2633      * <a href="Vector.html#lane-order">memory ordering</a>.
2634      * <p>
2635      * This method behaves as if it returns the result of calling
2636      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2637      * fromByteBuffer()} as follows:
2638      * <pre>{@code
2639      * var bb = ByteBuffer.wrap(a);
2640      * var m = species.maskAll(true);
2641      * return fromByteBuffer(species, bb, offset, bo, m);
2642      * }</pre>
2643      *
2644      * @param species species of desired vector
2645      * @param a the byte array
2646      * @param offset the offset into the array
2647      * @param bo the intended byte order
2648      * @return a vector loaded from a byte array
2649      * @throws IndexOutOfBoundsException
2650      *         if {@code offset+N*ESIZE < 0}
2651      *         or {@code offset+(N+1)*ESIZE > a.length}
2652      *         for any lane {@code N} in the vector
2653      */
2654     @ForceInline
2655     public static
2656     ShortVector fromByteArray(VectorSpecies<Short> species,
2657                                        byte[] a, int offset,
2658                                        ByteOrder bo) {
2659         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
2660         ShortSpecies vsp = (ShortSpecies) species;
2661         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
2662     }
2663 
2664     /**
2665      * Loads a vector from a byte array starting at an offset
2666      * and using a mask.
2667      * Lanes where the mask is unset are filled with the default
2668      * value of {@code short} (zero).
2669      * Bytes are composed into primitive lane elements according
2670      * to the specified byte order.
2671      * The vector is arranged into lanes according to
2672      * <a href="Vector.html#lane-order">memory ordering</a>.
2673      * <p>
2674      * This method behaves as if it returns the result of calling
2675      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2676      * fromByteBuffer()} as follows:
2677      * <pre>{@code
2678      * var bb = ByteBuffer.wrap(a);
2679      * return fromByteBuffer(species, bb, offset, bo, m);
2680      * }</pre>
2681      *
2682      * @param species species of desired vector
2683      * @param a the byte array
2684      * @param offset the offset into the array
2685      * @param bo the intended byte order
2686      * @param m the mask controlling lane selection
2687      * @return a vector loaded from a byte array
2688      * @throws IndexOutOfBoundsException
2689      *         if {@code offset+N*ESIZE < 0}
2690      *         or {@code offset+(N+1)*ESIZE > a.length}
2691      *         for any lane {@code N} in the vector
2692      *         where the mask is set
2693      */
2694     @ForceInline
2695     public static
2696     ShortVector fromByteArray(VectorSpecies<Short> species,
2697                                        byte[] a, int offset,
2698                                        ByteOrder bo,
2699                                        VectorMask<Short> m) {
2700         ShortSpecies vsp = (ShortSpecies) species;
2701         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2702             ShortVector zero = vsp.zero();
2703             ShortVector v = zero.fromByteArray0(a, offset);
2704             return zero.blend(v.maybeSwap(bo), m);
2705         }
2706 
2707         // FIXME: optimize
2708         checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
2709         ByteBuffer wb = wrapper(a, bo);
2710         return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
2711                    (wb_, o, i)  -> wb_.getShort(o + i * 2));
2712     }
2713 
2714     /**
2715      * Loads a vector from an array of type {@code short[]}
2716      * starting at an offset.
2717      * For each vector lane, where {@code N} is the vector lane index, the
2718      * array element at index {@code offset + N} is placed into the
2719      * resulting vector at lane index {@code N}.
2720      *
2721      * @param species species of desired vector
2722      * @param a the array
2723      * @param offset the offset into the array
2724      * @return the vector loaded from an array
2725      * @throws IndexOutOfBoundsException
2726      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2727      *         for any lane {@code N} in the vector
2728      */
2729     @ForceInline
2730     public static
2731     ShortVector fromArray(VectorSpecies<Short> species,
2732                                    short[] a, int offset) {
2733         offset = checkFromIndexSize(offset, species.length(), a.length);
2734         ShortSpecies vsp = (ShortSpecies) species;
2735         return vsp.dummyVector().fromArray0(a, offset);
2736     }
2737 
2738     /**
2739      * Loads a vector from an array of type {@code short[]}
2740      * starting at an offset and using a mask.
2741      * Lanes where the mask is unset are filled with the default
2742      * value of {@code short} (zero).
2743      * For each vector lane, where {@code N} is the vector lane index,
2744      * if the mask lane at index {@code N} is set then the array element at
2745      * index {@code offset + N} is placed into the resulting vector at lane index
2746      * {@code N}, otherwise the default element value is placed into the
2747      * resulting vector at lane index {@code N}.
2748      *
2749      * @param species species of desired vector
2750      * @param a the array
2751      * @param offset the offset into the array
2752      * @param m the mask controlling lane selection
2753      * @return the vector loaded from an array
2754      * @throws IndexOutOfBoundsException
2755      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2756      *         for any lane {@code N} in the vector
2757      *         where the mask is set
2758      */
2759     @ForceInline
2760     public static
2761     ShortVector fromArray(VectorSpecies<Short> species,
2762                                    short[] a, int offset,
2763                                    VectorMask<Short> m) {
2764         ShortSpecies vsp = (ShortSpecies) species;
2765         if (offset >= 0 && offset <= (a.length - species.length())) {
2766             ShortVector zero = vsp.zero();
2767             return zero.blend(zero.fromArray0(a, offset), m);
2768         }
2769 
2770         // FIXME: optimize
2771         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2772         return vsp.vOp(m, i -> a[offset + i]);
2773     }
2774 
2775     /**
2776      * Gathers a new vector composed of elements from an array of type
2777      * {@code short[]},
2778      * using indexes obtained by adding a fixed {@code offset} to a
2779      * series of secondary offsets from an <em>index map</em>.
2780      * The index map is a contiguous sequence of {@code VLENGTH}
2781      * elements in a second array of {@code int}s, starting at a given
2782      * {@code mapOffset}.
2783      * <p>
2784      * For each vector lane, where {@code N} is the vector lane index,
2785      * the lane is loaded from the array
2786      * element {@code a[f(N)]}, where {@code f(N)} is the
2787      * index mapping expression
2788      * {@code offset + indexMap[mapOffset + N]]}.
2789      *
2790      * @param species species of desired vector
2791      * @param a the array
2792      * @param offset the offset into the array, may be negative if relative
2793      * indexes in the index map compensate to produce a value within the
2794      * array bounds
2795      * @param indexMap the index map
2796      * @param mapOffset the offset into the index map
2797      * @return the vector loaded from the indexed elements of the array
2798      * @throws IndexOutOfBoundsException
2799      *         if {@code mapOffset+N < 0}
2800      *         or if {@code mapOffset+N >= indexMap.length},
2801      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2802      *         is an invalid index into {@code a},
2803      *         for any lane {@code N} in the vector
2804      * @see ShortVector#toIntArray()
2805      */
2806     @ForceInline
2807     public static
2808     ShortVector fromArray(VectorSpecies<Short> species,
2809                                    short[] a, int offset,
2810                                    int[] indexMap, int mapOffset) {
2811         ShortSpecies vsp = (ShortSpecies) species;
2812         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
2813     }
2814 
2815     /**
2816      * Gathers a new vector composed of elements from an array of type
2817      * {@code short[]},
2818      * under the control of a mask, and
2819      * using indexes obtained by adding a fixed {@code offset} to a
2820      * series of secondary offsets from an <em>index map</em>.
2821      * The index map is a contiguous sequence of {@code VLENGTH}
2822      * elements in a second array of {@code int}s, starting at a given
2823      * {@code mapOffset}.
2824      * <p>
2825      * For each vector lane, where {@code N} is the vector lane index,
2826      * if the lane is set in the mask,
2827      * the lane is loaded from the array
2828      * element {@code a[f(N)]}, where {@code f(N)} is the
2829      * index mapping expression
2830      * {@code offset + indexMap[mapOffset + N]]}.
2831      * Unset lanes in the resulting vector are set to zero.
2832      *
2833      * @param species species of desired vector
2834      * @param a the array
2835      * @param offset the offset into the array, may be negative if relative
2836      * indexes in the index map compensate to produce a value within the
2837      * array bounds
2838      * @param indexMap the index map
2839      * @param mapOffset the offset into the index map
2840      * @param m the mask controlling lane selection
2841      * @return the vector loaded from the indexed elements of the array
2842      * @throws IndexOutOfBoundsException
2843      *         if {@code mapOffset+N < 0}
2844      *         or if {@code mapOffset+N >= indexMap.length},
2845      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2846      *         is an invalid index into {@code a},
2847      *         for any lane {@code N} in the vector
2848      *         where the mask is set
2849      * @see ShortVector#toIntArray()
2850      */
2851     @ForceInline
2852     public static
2853     ShortVector fromArray(VectorSpecies<Short> species,
2854                                    short[] a, int offset,
2855                                    int[] indexMap, int mapOffset,
2856                                    VectorMask<Short> m) {
2857         ShortSpecies vsp = (ShortSpecies) species;
2858         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
2859     }
2860 
2861     /**
2862      * Loads a vector from an array of type {@code char[]}
2863      * starting at an offset.
2864      * For each vector lane, where {@code N} is the vector lane index, the
2865      * array element at index {@code offset + N}
2866      * is first cast to a {@code short} value and then
2867      * placed into the resulting vector at lane index {@code N}.
2868      *
2869      * @param species species of desired vector
2870      * @param a the array
2871      * @param offset the offset into the array
2872      * @return the vector loaded from an array
2873      * @throws IndexOutOfBoundsException
2874      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2875      *         for any lane {@code N} in the vector
2876      */
2877     @ForceInline
2878     public static
2879     ShortVector fromCharArray(VectorSpecies<Short> species,
2880                                        char[] a, int offset) {
2881         offset = checkFromIndexSize(offset, species.length(), a.length);
2882         ShortSpecies vsp = (ShortSpecies) species;
2883         return vsp.dummyVector().fromCharArray0(a, offset);
2884     }
2885 
2886     /**
2887      * Loads a vector from an array of type {@code char[]}
2888      * starting at an offset and using a mask.
2889      * Lanes where the mask is unset are filled with the default
2890      * value of {@code short} (zero).
2891      * For each vector lane, where {@code N} is the vector lane index,
2892      * if the mask lane at index {@code N} is set then the array element at
2893      * index {@code offset + N}
2894      * is first cast to a {@code short} value and then
2895      * placed into the resulting vector at lane index
2896      * {@code N}, otherwise the default element value is placed into the
2897      * resulting vector at lane index {@code N}.
2898      *
2899      * @param species species of desired vector
2900      * @param a the array
2901      * @param offset the offset into the array
2902      * @param m the mask controlling lane selection
2903      * @return the vector loaded from an array
2904      * @throws IndexOutOfBoundsException
2905      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2906      *         for any lane {@code N} in the vector
2907      *         where the mask is set
2908      */
2909     @ForceInline
2910     public static
2911     ShortVector fromCharArray(VectorSpecies<Short> species,
2912                                        char[] a, int offset,
2913                                        VectorMask<Short> m) {
2914         ShortSpecies vsp = (ShortSpecies) species;
2915         if (offset >= 0 && offset <= (a.length - species.length())) {
2916             ShortVector zero = vsp.zero();
2917             return zero.blend(zero.fromCharArray0(a, offset), m);
2918         }
2919 
2920         // FIXME: optimize
2921         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2922         return vsp.vOp(m, i -> (short) a[offset + i]);
2923     }
2924 
2925     /**
2926      * Gathers a new vector composed of elements from an array of type
2927      * {@code char[]},
2928      * using indexes obtained by adding a fixed {@code offset} to a
2929      * series of secondary offsets from an <em>index map</em>.
2930      * The index map is a contiguous sequence of {@code VLENGTH}
2931      * elements in a second array of {@code int}s, starting at a given
2932      * {@code mapOffset}.
2933      * <p>
2934      * For each vector lane, where {@code N} is the vector lane index,
2935      * the lane is loaded from the expression
2936      * {@code (short) a[f(N)]}, where {@code f(N)} is the
2937      * index mapping expression
2938      * {@code offset + indexMap[mapOffset + N]]}.
2939      *
2940      * @param species species of desired vector
2941      * @param a the array
2942      * @param offset the offset into the array, may be negative if relative
2943      * indexes in the index map compensate to produce a value within the
2944      * array bounds
2945      * @param indexMap the index map
2946      * @param mapOffset the offset into the index map
2947      * @return the vector loaded from the indexed elements of the array
2948      * @throws IndexOutOfBoundsException
2949      *         if {@code mapOffset+N < 0}
2950      *         or if {@code mapOffset+N >= indexMap.length},
2951      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2952      *         is an invalid index into {@code a},
2953      *         for any lane {@code N} in the vector
2954      * @see ShortVector#toIntArray()
2955      */
2956     @ForceInline
2957     public static
2958     ShortVector fromCharArray(VectorSpecies<Short> species,
2959                                        char[] a, int offset,
2960                                        int[] indexMap, int mapOffset) {
2961         // FIXME: optimize
2962         ShortSpecies vsp = (ShortSpecies) species;
2963         return vsp.vOp(n -> (short) a[offset + indexMap[mapOffset + n]]);
2964     }
2965 
2966     /**
2967      * Gathers a new vector composed of elements from an array of type
2968      * {@code char[]},
2969      * under the control of a mask, and
2970      * using indexes obtained by adding a fixed {@code offset} to a
2971      * series of secondary offsets from an <em>index map</em>.
2972      * The index map is a contiguous sequence of {@code VLENGTH}
2973      * elements in a second array of {@code int}s, starting at a given
2974      * {@code mapOffset}.
2975      * <p>
2976      * For each vector lane, where {@code N} is the vector lane index,
2977      * if the lane is set in the mask,
2978      * the lane is loaded from the expression
2979      * {@code (short) a[f(N)]}, where {@code f(N)} is the
2980      * index mapping expression
2981      * {@code offset + indexMap[mapOffset + N]]}.
2982      * Unset lanes in the resulting vector are set to zero.
2983      *
2984      * @param species species of desired vector
2985      * @param a the array
2986      * @param offset the offset into the array, may be negative if relative
2987      * indexes in the index map compensate to produce a value within the
2988      * array bounds
2989      * @param indexMap the index map
2990      * @param mapOffset the offset into the index map
2991      * @param m the mask controlling lane selection
2992      * @return the vector loaded from the indexed elements of the array
2993      * @throws IndexOutOfBoundsException
2994      *         if {@code mapOffset+N < 0}
2995      *         or if {@code mapOffset+N >= indexMap.length},
2996      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2997      *         is an invalid index into {@code a},
2998      *         for any lane {@code N} in the vector
2999      *         where the mask is set
3000      * @see ShortVector#toIntArray()
3001      */
3002     @ForceInline
3003     public static
3004     ShortVector fromCharArray(VectorSpecies<Short> species,
3005                                        char[] a, int offset,
3006                                        int[] indexMap, int mapOffset,
3007                                        VectorMask<Short> m) {
3008         // FIXME: optimize
3009         ShortSpecies vsp = (ShortSpecies) species;
3010         return vsp.vOp(m, n -> (short) a[offset + indexMap[mapOffset + n]]);
3011     }
3012 
3013 
3014     /**
3015      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3016      * starting at an offset into the byte buffer.
3017      * Bytes are composed into primitive lane elements according
3018      * to the specified byte order.
3019      * The vector is arranged into lanes according to
3020      * <a href="Vector.html#lane-order">memory ordering</a>.
3021      * <p>
3022      * This method behaves as if it returns the result of calling
3023      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3024      * fromByteBuffer()} as follows:
3025      * <pre>{@code
3026      * var m = species.maskAll(true);
3027      * return fromByteBuffer(species, bb, offset, bo, m);
3028      * }</pre>
3029      *
3030      * @param species species of desired vector
3031      * @param bb the byte buffer
3032      * @param offset the offset into the byte buffer
3033      * @param bo the intended byte order
3034      * @return a vector loaded from a byte buffer
3035      * @throws IndexOutOfBoundsException
3036      *         if {@code offset+N*2 < 0}
3037      *         or {@code offset+N*2 >= bb.limit()}
3038      *         for any lane {@code N} in the vector
3039      */
3040     @ForceInline
3041     public static
3042     ShortVector fromByteBuffer(VectorSpecies<Short> species,
3043                                         ByteBuffer bb, int offset,
3044                                         ByteOrder bo) {
3045         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
3046         ShortSpecies vsp = (ShortSpecies) species;
3047         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
3048     }
3049 
3050     /**
3051      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3052      * starting at an offset into the byte buffer
3053      * and using a mask.
3054      * Lanes where the mask is unset are filled with the default
3055      * value of {@code short} (zero).
3056      * Bytes are composed into primitive lane elements according
3057      * to the specified byte order.
3058      * The vector is arranged into lanes according to
3059      * <a href="Vector.html#lane-order">memory ordering</a>.
3060      * <p>
3061      * The following pseudocode illustrates the behavior:
3062      * <pre>{@code
3063      * ShortBuffer eb = bb.duplicate()
3064      *     .position(offset)
3065      *     .order(bo).asShortBuffer();
3066      * short[] ar = new short[species.length()];
3067      * for (int n = 0; n < ar.length; n++) {
3068      *     if (m.laneIsSet(n)) {
3069      *         ar[n] = eb.get(n);
3070      *     }
3071      * }
3072      * ShortVector r = ShortVector.fromArray(species, ar, 0);
3073      * }</pre>
3074      * @implNote
3075      * This operation is likely to be more efficient if
3076      * the specified byte order is the same as
3077      * {@linkplain ByteOrder#nativeOrder()
3078      * the platform native order},
3079      * since this method will not need to reorder
3080      * the bytes of lane values.
3081      *
3082      * @param species species of desired vector
3083      * @param bb the byte buffer
3084      * @param offset the offset into the byte buffer
3085      * @param bo the intended byte order
3086      * @param m the mask controlling lane selection
3087      * @return a vector loaded from a byte buffer
3088      * @throws IndexOutOfBoundsException
3089      *         if {@code offset+N*2 < 0}
3090      *         or {@code offset+N*2 >= bb.limit()}
3091      *         for any lane {@code N} in the vector
3092      *         where the mask is set
3093      */
3094     @ForceInline
3095     public static
3096     ShortVector fromByteBuffer(VectorSpecies<Short> species,
3097                                         ByteBuffer bb, int offset,
3098                                         ByteOrder bo,
3099                                         VectorMask<Short> m) {
3100         ShortSpecies vsp = (ShortSpecies) species;
3101         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
3102             ShortVector zero = vsp.zero();
3103             ShortVector v = zero.fromByteBuffer0(bb, offset);
3104             return zero.blend(v.maybeSwap(bo), m);
3105         }
3106 
3107         // FIXME: optimize
3108         checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
3109         ByteBuffer wb = wrapper(bb, bo);
3110         return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
3111                    (wb_, o, i)  -> wb_.getShort(o + i * 2));
3112     }
3113 
3114     // Memory store operations
3115 
3116     /**
3117      * Stores this vector into an array of type {@code short[]}
3118      * starting at an offset.
3119      * <p>
3120      * For each vector lane, where {@code N} is the vector lane index,
3121      * the lane element at index {@code N} is stored into the array
3122      * element {@code a[offset+N]}.
3123      *
3124      * @param a the array, of type {@code short[]}
3125      * @param offset the offset into the array
3126      * @throws IndexOutOfBoundsException
3127      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3128      *         for any lane {@code N} in the vector
3129      */
3130     @ForceInline
3131     public final
3132     void intoArray(short[] a, int offset) {
3133         offset = checkFromIndexSize(offset, length(), a.length);
3134         ShortSpecies vsp = vspecies();
3135         VectorSupport.store(
3136             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3137             a, arrayAddress(a, offset),
3138             this,
3139             a, offset,
3140             (arr, off, v)
3141             -> v.stOp(arr, off,
3142                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3143     }
3144 
3145     /**
3146      * Stores this vector into an array of type {@code short[]}
3147      * starting at offset and using a mask.
3148      * <p>
3149      * For each vector lane, where {@code N} is the vector lane index,
3150      * the lane element at index {@code N} is stored into the array
3151      * element {@code a[offset+N]}.
3152      * If the mask lane at {@code N} is unset then the corresponding
3153      * array element {@code a[offset+N]} is left unchanged.
3154      * <p>
3155      * Array range checking is done for lanes where the mask is set.
3156      * Lanes where the mask is unset are not stored and do not need
3157      * to correspond to legitimate elements of {@code a}.
3158      * That is, unset lanes may correspond to array indexes less than
3159      * zero or beyond the end of the array.
3160      *
3161      * @param a the array, of type {@code short[]}
3162      * @param offset the offset into the array
3163      * @param m the mask controlling lane storage
3164      * @throws IndexOutOfBoundsException
3165      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3166      *         for any lane {@code N} in the vector
3167      *         where the mask is set
3168      */
3169     @ForceInline
3170     public final
3171     void intoArray(short[] a, int offset,
3172                    VectorMask<Short> m) {
3173         if (m.allTrue()) {
3174             intoArray(a, offset);
3175         } else {
3176             // FIXME: optimize
3177             ShortSpecies vsp = vspecies();
3178             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3179             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
3180         }
3181     }
3182 
3183     /**
3184      * Scatters this vector into an array of type {@code short[]}
3185      * using indexes obtained by adding a fixed {@code offset} to a
3186      * series of secondary offsets from an <em>index map</em>.
3187      * The index map is a contiguous sequence of {@code VLENGTH}
3188      * elements in a second array of {@code int}s, starting at a given
3189      * {@code mapOffset}.
3190      * <p>
3191      * For each vector lane, where {@code N} is the vector lane index,
3192      * the lane element at index {@code N} is stored into the array
3193      * element {@code a[f(N)]}, where {@code f(N)} is the
3194      * index mapping expression
3195      * {@code offset + indexMap[mapOffset + N]]}.
3196      *
3197      * @param a the array
3198      * @param offset an offset to combine with the index map offsets
3199      * @param indexMap the index map
3200      * @param mapOffset the offset into the index map
3201      * @throws IndexOutOfBoundsException
3202      *         if {@code mapOffset+N < 0}
3203      *         or if {@code mapOffset+N >= indexMap.length},
3204      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3205      *         is an invalid index into {@code a},
3206      *         for any lane {@code N} in the vector
3207      * @see ShortVector#toIntArray()
3208      */
3209     @ForceInline
3210     public final
3211     void intoArray(short[] a, int offset,
3212                    int[] indexMap, int mapOffset) {
3213         stOp(a, offset,
3214              (arr, off, i, e) -> {
3215                  int j = indexMap[mapOffset + i];
3216                  arr[off + j] = e;
3217              });
3218     }
3219 
3220     /**
3221      * Scatters this vector into an array of type {@code short[]},
3222      * under the control of a mask, and
3223      * using indexes obtained by adding a fixed {@code offset} to a
3224      * series of secondary offsets from an <em>index map</em>.
3225      * The index map is a contiguous sequence of {@code VLENGTH}
3226      * elements in a second array of {@code int}s, starting at a given
3227      * {@code mapOffset}.
3228      * <p>
3229      * For each vector lane, where {@code N} is the vector lane index,
3230      * if the mask lane at index {@code N} is set then
3231      * the lane element at index {@code N} is stored into the array
3232      * element {@code a[f(N)]}, where {@code f(N)} is the
3233      * index mapping expression
3234      * {@code offset + indexMap[mapOffset + N]]}.
3235      *
3236      * @param a the array
3237      * @param offset an offset to combine with the index map offsets
3238      * @param indexMap the index map
3239      * @param mapOffset the offset into the index map
3240      * @param m the mask
3241      * @throws IndexOutOfBoundsException
3242      *         if {@code mapOffset+N < 0}
3243      *         or if {@code mapOffset+N >= indexMap.length},
3244      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3245      *         is an invalid index into {@code a},
3246      *         for any lane {@code N} in the vector
3247      *         where the mask is set
3248      * @see ShortVector#toIntArray()
3249      */
3250     @ForceInline
3251     public final
3252     void intoArray(short[] a, int offset,
3253                    int[] indexMap, int mapOffset,
3254                    VectorMask<Short> m) {
3255         stOp(a, offset, m,
3256              (arr, off, i, e) -> {
3257                  int j = indexMap[mapOffset + i];
3258                  arr[off + j] = e;
3259              });
3260     }
3261 
3262     /**
3263      * Stores this vector into an array of type {@code char[]}
3264      * starting at an offset.
3265      * <p>
3266      * For each vector lane, where {@code N} is the vector lane index,
3267      * the lane element at index {@code N}
3268      * is first cast to a {@code char} value and then
3269      * stored into the array element {@code a[offset+N]}.
3270      *
3271      * @param a the array, of type {@code char[]}
3272      * @param offset the offset into the array
3273      * @throws IndexOutOfBoundsException
3274      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3275      *         for any lane {@code N} in the vector
3276      */
3277     @ForceInline
3278     public final
3279     void intoCharArray(char[] a, int offset) {
3280         offset = checkFromIndexSize(offset, length(), a.length);
3281         ShortSpecies vsp = vspecies();
3282         VectorSupport.store(
3283             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3284             a, charArrayAddress(a, offset),
3285             this,
3286             a, offset,
3287             (arr, off, v)
3288             -> v.stOp(arr, off,
3289                       (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
3290     }
3291 
3292     /**
3293      * Stores this vector into an array of type {@code char[]}
3294      * starting at offset and using a mask.
3295      * <p>
3296      * For each vector lane, where {@code N} is the vector lane index,
3297      * the lane element at index {@code N}
3298      * is first cast to a {@code char} value and then
3299      * stored into the array element {@code a[offset+N]}.
3300      * If the mask lane at {@code N} is unset then the corresponding
3301      * array element {@code a[offset+N]} is left unchanged.
3302      * <p>
3303      * Array range checking is done for lanes where the mask is set.
3304      * Lanes where the mask is unset are not stored and do not need
3305      * to correspond to legitimate elements of {@code a}.
3306      * That is, unset lanes may correspond to array indexes less than
3307      * zero or beyond the end of the array.
3308      *
3309      * @param a the array, of type {@code char[]}
3310      * @param offset the offset into the array
3311      * @param m the mask controlling lane storage
3312      * @throws IndexOutOfBoundsException
3313      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3314      *         for any lane {@code N} in the vector
3315      *         where the mask is set
3316      */
3317     @ForceInline
3318     public final
3319     void intoCharArray(char[] a, int offset,
3320                        VectorMask<Short> m) {
3321         if (m.allTrue()) {
3322             intoCharArray(a, offset);
3323         } else {
3324             // FIXME: optimize
3325             ShortSpecies vsp = vspecies();
3326             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3327             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = (char) v);
3328         }
3329     }
3330 
3331     /**
3332      * Scatters this vector into an array of type {@code char[]}
3333      * using indexes obtained by adding a fixed {@code offset} to a
3334      * series of secondary offsets from an <em>index map</em>.
3335      * The index map is a contiguous sequence of {@code VLENGTH}
3336      * elements in a second array of {@code int}s, starting at a given
3337      * {@code mapOffset}.
3338      * <p>
3339      * For each vector lane, where {@code N} is the vector lane index,
3340      * the lane element at index {@code N}
3341      * is first cast to a {@code char} value and then
3342      * stored into the array
3343      * element {@code a[f(N)]}, where {@code f(N)} is the
3344      * index mapping expression
3345      * {@code offset + indexMap[mapOffset + N]]}.
3346      *
3347      * @param a the array
3348      * @param offset an offset to combine with the index map offsets
3349      * @param indexMap the index map
3350      * @param mapOffset the offset into the index map
3351      * @throws IndexOutOfBoundsException
3352      *         if {@code mapOffset+N < 0}
3353      *         or if {@code mapOffset+N >= indexMap.length},
3354      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3355      *         is an invalid index into {@code a},
3356      *         for any lane {@code N} in the vector
3357      * @see ShortVector#toIntArray()
3358      */
3359     @ForceInline
3360     public final
3361     void intoCharArray(char[] a, int offset,
3362                        int[] indexMap, int mapOffset) {
3363         // FIXME: optimize
3364         stOp(a, offset,
3365              (arr, off, i, e) -> {
3366                  int j = indexMap[mapOffset + i];
3367                  arr[off + j] = (char) e;
3368              });
3369     }
3370 
3371     /**
3372      * Scatters this vector into an array of type {@code char[]},
3373      * under the control of a mask, and
3374      * using indexes obtained by adding a fixed {@code offset} to a
3375      * series of secondary offsets from an <em>index map</em>.
3376      * The index map is a contiguous sequence of {@code VLENGTH}
3377      * elements in a second array of {@code int}s, starting at a given
3378      * {@code mapOffset}.
3379      * <p>
3380      * For each vector lane, where {@code N} is the vector lane index,
3381      * if the mask lane at index {@code N} is set then
3382      * the lane element at index {@code N}
3383      * is first cast to a {@code char} value and then
3384      * stored into the array
3385      * element {@code a[f(N)]}, where {@code f(N)} is the
3386      * index mapping expression
3387      * {@code offset + indexMap[mapOffset + N]]}.
3388      *
3389      * @param a the array
3390      * @param offset an offset to combine with the index map offsets
3391      * @param indexMap the index map
3392      * @param mapOffset the offset into the index map
3393      * @param m the mask
3394      * @throws IndexOutOfBoundsException
3395      *         if {@code mapOffset+N < 0}
3396      *         or if {@code mapOffset+N >= indexMap.length},
3397      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3398      *         is an invalid index into {@code a},
3399      *         for any lane {@code N} in the vector
3400      *         where the mask is set
3401      * @see ShortVector#toIntArray()
3402      */
3403     @ForceInline
3404     public final
3405     void intoCharArray(char[] a, int offset,
3406                        int[] indexMap, int mapOffset,
3407                        VectorMask<Short> m) {
3408         // FIXME: optimize
3409         stOp(a, offset, m,
3410              (arr, off, i, e) -> {
3411                  int j = indexMap[mapOffset + i];
3412                  arr[off + j] = (char) e;
3413              });
3414     }
3415 
3416 
3417     /**
3418      * {@inheritDoc} <!--workaround-->
3419      */
3420     @Override
3421     @ForceInline
3422     public final
3423     void intoByteArray(byte[] a, int offset,
3424                        ByteOrder bo) {
3425         offset = checkFromIndexSize(offset, byteSize(), a.length);
3426         maybeSwap(bo).intoByteArray0(a, offset);
3427     }
3428 
3429     /**
3430      * {@inheritDoc} <!--workaround-->
3431      */
3432     @Override
3433     @ForceInline
3434     public final
3435     void intoByteArray(byte[] a, int offset,
3436                        ByteOrder bo,
3437                        VectorMask<Short> m) {
3438         if (m.allTrue()) {
3439             intoByteArray(a, offset, bo);
3440         } else {
3441             // FIXME: optimize
3442             ShortSpecies vsp = vspecies();
3443             checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
3444             ByteBuffer wb = wrapper(a, bo);
3445             this.stOp(wb, offset, m,
3446                     (wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
3447         }
3448     }
3449 
3450     /**
3451      * {@inheritDoc} <!--workaround-->
3452      */
3453     @Override
3454     @ForceInline
3455     public final
3456     void intoByteBuffer(ByteBuffer bb, int offset,
3457                         ByteOrder bo) {
3458         if (bb.isReadOnly()) {
3459             throw new ReadOnlyBufferException();
3460         }
3461         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3462         maybeSwap(bo).intoByteBuffer0(bb, offset);
3463     }
3464 
3465     /**
3466      * {@inheritDoc} <!--workaround-->
3467      */
3468     @Override
3469     @ForceInline
3470     public final
3471     void intoByteBuffer(ByteBuffer bb, int offset,
3472                         ByteOrder bo,
3473                         VectorMask<Short> m) {
3474         if (m.allTrue()) {
3475             intoByteBuffer(bb, offset, bo);
3476         } else {
3477             // FIXME: optimize
3478             if (bb.isReadOnly()) {
3479                 throw new ReadOnlyBufferException();
3480             }
3481             ShortSpecies vsp = vspecies();
3482             checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
3483             ByteBuffer wb = wrapper(bb, bo);
3484             this.stOp(wb, offset, m,
3485                     (wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
3486         }
3487     }
3488 
3489     // ================================================
3490 
3491     // Low-level memory operations.
3492     //
3493     // Note that all of these operations *must* inline into a context
3494     // where the exact species of the involved vector is a
3495     // compile-time constant.  Otherwise, the intrinsic generation
3496     // will fail and performance will suffer.
3497     //
3498     // In many cases this is achieved by re-deriving a version of the
3499     // method in each concrete subclass (per species).  The re-derived
3500     // method simply calls one of these generic methods, with exact
3501     // parameters for the controlling metadata, which is either a
3502     // typed vector or constant species instance.
3503 
3504     // Unchecked loading operations in native byte order.
3505     // Caller is responsible for applying index checks, masking, and
3506     // byte swapping.
3507 
3508     /*package-private*/
3509     abstract
3510     ShortVector fromArray0(short[] a, int offset);
3511     @ForceInline
3512     final
3513     ShortVector fromArray0Template(short[] a, int offset) {
3514         ShortSpecies vsp = vspecies();
3515         return VectorSupport.load(
3516             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3517             a, arrayAddress(a, offset),
3518             a, offset, vsp,
3519             (arr, off, s) -> s.ldOp(arr, off,
3520                                     (arr_, off_, i) -> arr_[off_ + i]));
3521     }
3522 
3523     /*package-private*/
3524     abstract
3525     ShortVector fromCharArray0(char[] a, int offset);
3526     @ForceInline
3527     final
3528     ShortVector fromCharArray0Template(char[] a, int offset) {
3529         ShortSpecies vsp = vspecies();
3530         return VectorSupport.load(
3531             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3532             a, charArrayAddress(a, offset),
3533             a, offset, vsp,
3534             (arr, off, s) -> s.ldOp(arr, off,
3535                                     (arr_, off_, i) -> (short) arr_[off_ + i]));
3536     }
3537 
3538 
3539     @Override
3540     abstract
3541     ShortVector fromByteArray0(byte[] a, int offset);
3542     @ForceInline
3543     final
3544     ShortVector fromByteArray0Template(byte[] a, int offset) {
3545         ShortSpecies vsp = vspecies();
3546         return VectorSupport.load(
3547             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3548             a, byteArrayAddress(a, offset),
3549             a, offset, vsp,
3550             (arr, off, s) -> {
3551                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3552                 return s.ldOp(wb, off,
3553                         (wb_, o, i) -> wb_.getShort(o + i * 2));
3554             });
3555     }
3556 
3557     abstract
3558     ShortVector fromByteBuffer0(ByteBuffer bb, int offset);
3559     @ForceInline
3560     final
3561     ShortVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3562         ShortSpecies vsp = vspecies();
3563         return ScopedMemoryAccess.loadFromByteBuffer(
3564                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3565                 bb, offset, vsp,
3566                 (buf, off, s) -> {
3567                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3568                     return s.ldOp(wb, off,
3569                             (wb_, o, i) -> wb_.getShort(o + i * 2));
3570                 });
3571     }
3572 
3573     // Unchecked storing operations in native byte order.
3574     // Caller is responsible for applying index checks, masking, and
3575     // byte swapping.
3576 
3577     abstract
3578     void intoArray0(short[] a, int offset);
3579     @ForceInline
3580     final
3581     void intoArray0Template(short[] a, int offset) {
3582         ShortSpecies vsp = vspecies();
3583         VectorSupport.store(
3584             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3585             a, arrayAddress(a, offset),
3586             this, a, offset,
3587             (arr, off, v)
3588             -> v.stOp(arr, off,
3589                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3590     }
3591 
3592     abstract
3593     void intoByteArray0(byte[] a, int offset);
3594     @ForceInline
3595     final
3596     void intoByteArray0Template(byte[] a, int offset) {
3597         ShortSpecies vsp = vspecies();
3598         VectorSupport.store(
3599             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3600             a, byteArrayAddress(a, offset),
3601             this, a, offset,
3602             (arr, off, v) -> {
3603                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3604                 v.stOp(wb, off,
3605                         (tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
3606             });
3607     }
3608 
3609     @ForceInline
3610     final
3611     void intoByteBuffer0(ByteBuffer bb, int offset) {
3612         ShortSpecies vsp = vspecies();
3613         ScopedMemoryAccess.storeIntoByteBuffer(
3614                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3615                 this, bb, offset,
3616                 (buf, off, v) -> {
3617                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3618                     v.stOp(wb, off,
3619                             (wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
3620                 });
3621     }
3622 
3623     // End of low-level memory operations.
3624 
3625     private static
3626     void checkMaskFromIndexSize(int offset,
3627                                 ShortSpecies vsp,
3628                                 VectorMask<Short> m,
3629                                 int scale,
3630                                 int limit) {
3631         ((AbstractMask<Short>)m)
3632             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3633     }
3634 
3635     @ForceInline
3636     private void conditionalStoreNYI(int offset,
3637                                      ShortSpecies vsp,
3638                                      VectorMask<Short> m,
3639                                      int scale,
3640                                      int limit) {
3641         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3642             String msg =
3643                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3644                               offset, limit, m, vsp);
3645             throw new AssertionError(msg);
3646         }
3647     }
3648 
3649     /*package-private*/
3650     @Override
3651     @ForceInline
3652     final
3653     ShortVector maybeSwap(ByteOrder bo) {
3654         if (bo != NATIVE_ENDIAN) {
3655             return this.reinterpretAsBytes()
3656                 .rearrange(swapBytesShuffle())
3657                 .reinterpretAsShorts();
3658         }
3659         return this;
3660     }
3661 
3662     static final int ARRAY_SHIFT =
3663         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_SHORT_INDEX_SCALE);
3664     static final long ARRAY_BASE =
3665         Unsafe.ARRAY_SHORT_BASE_OFFSET;
3666 
3667     @ForceInline
3668     static long arrayAddress(short[] a, int index) {
3669         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3670     }
3671 
3672     static final int ARRAY_CHAR_SHIFT =
3673             31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_CHAR_INDEX_SCALE);
3674     static final long ARRAY_CHAR_BASE =
3675             Unsafe.ARRAY_CHAR_BASE_OFFSET;
3676 
3677     @ForceInline
3678     static long charArrayAddress(char[] a, int index) {
3679         return ARRAY_CHAR_BASE + (((long)index) << ARRAY_CHAR_SHIFT);
3680     }
3681 
3682 
3683     @ForceInline
3684     static long byteArrayAddress(byte[] a, int index) {
3685         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3686     }
3687 
3688     // ================================================
3689 
3690     /// Reinterpreting view methods:
3691     //   lanewise reinterpret: viewAsXVector()
3692     //   keep shape, redraw lanes: reinterpretAsEs()
3693 
3694     /**
3695      * {@inheritDoc} <!--workaround-->
3696      */
3697     @ForceInline
3698     @Override
3699     public final ByteVector reinterpretAsBytes() {
3700          // Going to ByteVector, pay close attention to byte order.
3701          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
3702          return asByteVectorRaw();
3703          //return asByteVectorRaw().rearrange(swapBytesShuffle());
3704     }
3705 
3706     /**
3707      * {@inheritDoc} <!--workaround-->
3708      */
3709     @ForceInline
3710     @Override
3711     public final ShortVector viewAsIntegralLanes() {
3712         return this;
3713     }
3714 
3715     /**
3716      * {@inheritDoc} <!--workaround-->
3717      *
3718      * @implNote This method always throws
3719      * {@code UnsupportedOperationException}, because there is no floating
3720      * point type of the same size as {@code short}.  The return type
3721      * of this method is arbitrarily designated as
3722      * {@code Vector<?>}.  Future versions of this API may change the return
3723      * type if additional floating point types become available.
3724      */
3725     @ForceInline
3726     @Override
3727     public final
3728     Vector<?>
3729     viewAsFloatingLanes() {
3730         LaneType flt = LaneType.SHORT.asFloating();
3731         // asFloating() will throw UnsupportedOperationException for the unsupported type short
3732         throw new AssertionError("Cannot reach here");
3733     }
3734 
3735     // ================================================
3736 
3737     /// Object methods: toString, equals, hashCode
3738     //
3739     // Object methods are defined as if via Arrays.toString, etc.,
3740     // is applied to the array of elements.  Two equal vectors
3741     // are required to have equal species and equal lane values.
3742 
3743     /**
3744      * Returns a string representation of this vector, of the form
3745      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
3746      * in lane order.
3747      *
3748      * The string is produced as if by a call to {@link
3749      * java.util.Arrays#toString(short[]) Arrays.toString()},
3750      * as appropriate to the {@code short} array returned by
3751      * {@link #toArray this.toArray()}.
3752      *
3753      * @return a string of the form {@code "[0,1,2...]"}
3754      * reporting the lane values of this vector
3755      */
3756     @Override
3757     @ForceInline
3758     public final
3759     String toString() {
3760         // now that toArray is strongly typed, we can define this
3761         return Arrays.toString(toArray());
3762     }
3763 
3764     /**
3765      * {@inheritDoc} <!--workaround-->
3766      */
3767     @Override
3768     @ForceInline
3769     public final
3770     boolean equals(Object obj) {
3771         if (obj instanceof Vector) {
3772             Vector<?> that = (Vector<?>) obj;
3773             if (this.species().equals(that.species())) {
3774                 return this.eq(that.check(this.species())).allTrue();
3775             }
3776         }
3777         return false;
3778     }
3779 
3780     /**
3781      * {@inheritDoc} <!--workaround-->
3782      */
3783     @Override
3784     @ForceInline
3785     public final
3786     int hashCode() {
3787         // now that toArray is strongly typed, we can define this
3788         return Objects.hash(species(), Arrays.hashCode(toArray()));
3789     }
3790 
3791     // ================================================
3792 
3793     // Species
3794 
3795     /**
3796      * Class representing {@link ShortVector}'s of the same {@link VectorShape VectorShape}.
3797      */
3798     /*package-private*/
3799     static final class ShortSpecies extends AbstractSpecies<Short> {
3800         private ShortSpecies(VectorShape shape,
3801                 Class<? extends ShortVector> vectorType,
3802                 Class<? extends AbstractMask<Short>> maskType,
3803                 Function<Object, ShortVector> vectorFactory) {
3804             super(shape, LaneType.of(short.class),
3805                   vectorType, maskType,
3806                   vectorFactory);
3807             assert(this.elementSize() == Short.SIZE);
3808         }
3809 
3810         // Specializing overrides:
3811 
3812         @Override
3813         @ForceInline
3814         public final Class<Short> elementType() {
3815             return short.class;
3816         }
3817 
3818         @Override
3819         @ForceInline
3820         final Class<Short> genericElementType() {
3821             return Short.class;
3822         }
3823 
3824         @SuppressWarnings("unchecked")
3825         @Override
3826         @ForceInline
3827         public final Class<? extends ShortVector> vectorType() {
3828             return (Class<? extends ShortVector>) vectorType;
3829         }
3830 
3831         @Override
3832         @ForceInline
3833         public final long checkValue(long e) {
3834             longToElementBits(e);  // only for exception
3835             return e;
3836         }
3837 
3838         /*package-private*/
3839         @Override
3840         @ForceInline
3841         final ShortVector broadcastBits(long bits) {
3842             return (ShortVector)
3843                 VectorSupport.broadcastCoerced(
3844                     vectorType, short.class, laneCount,
3845                     bits, this,
3846                     (bits_, s_) -> s_.rvOp(i -> bits_));
3847         }
3848 
3849         /*package-private*/
3850         @ForceInline
3851         final ShortVector broadcast(short e) {
3852             return broadcastBits(toBits(e));
3853         }
3854 
3855         @Override
3856         @ForceInline
3857         public final ShortVector broadcast(long e) {
3858             return broadcastBits(longToElementBits(e));
3859         }
3860 
3861         /*package-private*/
3862         final @Override
3863         @ForceInline
3864         long longToElementBits(long value) {
3865             // Do the conversion, and then test it for failure.
3866             short e = (short) value;
3867             if ((long) e != value) {
3868                 throw badElementBits(value, e);
3869             }
3870             return toBits(e);
3871         }
3872 
3873         /*package-private*/
3874         @ForceInline
3875         static long toIntegralChecked(short e, boolean convertToInt) {
3876             long value = convertToInt ? (int) e : (long) e;
3877             if ((short) value != e) {
3878                 throw badArrayBits(e, convertToInt, value);
3879             }
3880             return value;
3881         }
3882 
3883         /* this non-public one is for internal conversions */
3884         @Override
3885         @ForceInline
3886         final ShortVector fromIntValues(int[] values) {
3887             VectorIntrinsics.requireLength(values.length, laneCount);
3888             short[] va = new short[laneCount()];
3889             for (int i = 0; i < va.length; i++) {
3890                 int lv = values[i];
3891                 short v = (short) lv;
3892                 va[i] = v;
3893                 if ((int)v != lv) {
3894                     throw badElementBits(lv, v);
3895                 }
3896             }
3897             return dummyVector().fromArray0(va, 0);
3898         }
3899 
3900         // Virtual constructors
3901 
3902         @ForceInline
3903         @Override final
3904         public ShortVector fromArray(Object a, int offset) {
3905             // User entry point:  Be careful with inputs.
3906             return ShortVector
3907                 .fromArray(this, (short[]) a, offset);
3908         }
3909 
3910         @ForceInline
3911         @Override final
3912         ShortVector dummyVector() {
3913             return (ShortVector) super.dummyVector();
3914         }
3915 
3916         /*package-private*/
3917         final @Override
3918         @ForceInline
3919         ShortVector rvOp(RVOp f) {
3920             short[] res = new short[laneCount()];
3921             for (int i = 0; i < res.length; i++) {
3922                 short bits = (short) f.apply(i);
3923                 res[i] = fromBits(bits);
3924             }
3925             return dummyVector().vectorFactory(res);
3926         }
3927 
3928         ShortVector vOp(FVOp f) {
3929             short[] res = new short[laneCount()];
3930             for (int i = 0; i < res.length; i++) {
3931                 res[i] = f.apply(i);
3932             }
3933             return dummyVector().vectorFactory(res);
3934         }
3935 
3936         ShortVector vOp(VectorMask<Short> m, FVOp f) {
3937             short[] res = new short[laneCount()];
3938             boolean[] mbits = ((AbstractMask<Short>)m).getBits();
3939             for (int i = 0; i < res.length; i++) {
3940                 if (mbits[i]) {
3941                     res[i] = f.apply(i);
3942                 }
3943             }
3944             return dummyVector().vectorFactory(res);
3945         }
3946 
3947         /*package-private*/
3948         @ForceInline
3949         <M> ShortVector ldOp(M memory, int offset,
3950                                       FLdOp<M> f) {
3951             return dummyVector().ldOp(memory, offset, f);
3952         }
3953 
3954         /*package-private*/
3955         @ForceInline
3956         <M> ShortVector ldOp(M memory, int offset,
3957                                       AbstractMask<Short> m,
3958                                       FLdOp<M> f) {
3959             return dummyVector().ldOp(memory, offset, m, f);
3960         }
3961 
3962         /*package-private*/
3963         @ForceInline
3964         <M> void stOp(M memory, int offset, FStOp<M> f) {
3965             dummyVector().stOp(memory, offset, f);
3966         }
3967 
3968         /*package-private*/
3969         @ForceInline
3970         <M> void stOp(M memory, int offset,
3971                       AbstractMask<Short> m,
3972                       FStOp<M> f) {
3973             dummyVector().stOp(memory, offset, m, f);
3974         }
3975 
3976         // N.B. Make sure these constant vectors and
3977         // masks load up correctly into registers.
3978         //
3979         // Also, see if we can avoid all that switching.
3980         // Could we cache both vectors and both masks in
3981         // this species object?
3982 
3983         // Zero and iota vector access
3984         @Override
3985         @ForceInline
3986         public final ShortVector zero() {
3987             if ((Class<?>) vectorType() == ShortMaxVector.class)
3988                 return ShortMaxVector.ZERO;
3989             switch (vectorBitSize()) {
3990                 case 64: return Short64Vector.ZERO;
3991                 case 128: return Short128Vector.ZERO;
3992                 case 256: return Short256Vector.ZERO;
3993                 case 512: return Short512Vector.ZERO;
3994             }
3995             throw new AssertionError();
3996         }
3997 
3998         @Override
3999         @ForceInline
4000         public final ShortVector iota() {
4001             if ((Class<?>) vectorType() == ShortMaxVector.class)
4002                 return ShortMaxVector.IOTA;
4003             switch (vectorBitSize()) {
4004                 case 64: return Short64Vector.IOTA;
4005                 case 128: return Short128Vector.IOTA;
4006                 case 256: return Short256Vector.IOTA;
4007                 case 512: return Short512Vector.IOTA;
4008             }
4009             throw new AssertionError();
4010         }
4011 
4012         // Mask access
4013         @Override
4014         @ForceInline
4015         public final VectorMask<Short> maskAll(boolean bit) {
4016             if ((Class<?>) vectorType() == ShortMaxVector.class)
4017                 return ShortMaxVector.ShortMaxMask.maskAll(bit);
4018             switch (vectorBitSize()) {
4019                 case 64: return Short64Vector.Short64Mask.maskAll(bit);
4020                 case 128: return Short128Vector.Short128Mask.maskAll(bit);
4021                 case 256: return Short256Vector.Short256Mask.maskAll(bit);
4022                 case 512: return Short512Vector.Short512Mask.maskAll(bit);
4023             }
4024             throw new AssertionError();
4025         }
4026     }
4027 
4028     /**
4029      * Finds a species for an element type of {@code short} and shape.
4030      *
4031      * @param s the shape
4032      * @return a species for an element type of {@code short} and shape
4033      * @throws IllegalArgumentException if no such species exists for the shape
4034      */
4035     static ShortSpecies species(VectorShape s) {
4036         Objects.requireNonNull(s);
4037         switch (s) {
4038             case S_64_BIT: return (ShortSpecies) SPECIES_64;
4039             case S_128_BIT: return (ShortSpecies) SPECIES_128;
4040             case S_256_BIT: return (ShortSpecies) SPECIES_256;
4041             case S_512_BIT: return (ShortSpecies) SPECIES_512;
4042             case S_Max_BIT: return (ShortSpecies) SPECIES_MAX;
4043             default: throw new IllegalArgumentException("Bad shape: " + s);
4044         }
4045     }
4046 
4047     /** Species representing {@link ShortVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
4048     public static final VectorSpecies<Short> SPECIES_64
4049         = new ShortSpecies(VectorShape.S_64_BIT,
4050                             Short64Vector.class,
4051                             Short64Vector.Short64Mask.class,
4052                             Short64Vector::new);
4053 
4054     /** Species representing {@link ShortVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
4055     public static final VectorSpecies<Short> SPECIES_128
4056         = new ShortSpecies(VectorShape.S_128_BIT,
4057                             Short128Vector.class,
4058                             Short128Vector.Short128Mask.class,
4059                             Short128Vector::new);
4060 
4061     /** Species representing {@link ShortVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
4062     public static final VectorSpecies<Short> SPECIES_256
4063         = new ShortSpecies(VectorShape.S_256_BIT,
4064                             Short256Vector.class,
4065                             Short256Vector.Short256Mask.class,
4066                             Short256Vector::new);
4067 
4068     /** Species representing {@link ShortVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
4069     public static final VectorSpecies<Short> SPECIES_512
4070         = new ShortSpecies(VectorShape.S_512_BIT,
4071                             Short512Vector.class,
4072                             Short512Vector.Short512Mask.class,
4073                             Short512Vector::new);
4074 
4075     /** Species representing {@link ShortVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
4076     public static final VectorSpecies<Short> SPECIES_MAX
4077         = new ShortSpecies(VectorShape.S_Max_BIT,
4078                             ShortMaxVector.class,
4079                             ShortMaxVector.ShortMaxMask.class,
4080                             ShortMaxVector::new);
4081 
4082     /**
4083      * Preferred species for {@link ShortVector}s.
4084      * A preferred species is a species of maximal bit-size for the platform.
4085      */
4086     public static final VectorSpecies<Short> SPECIES_PREFERRED
4087         = (ShortSpecies) VectorSpecies.ofPreferred(short.class);
4088 }