1 /*
   2  * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.Function;
  33 import java.util.function.UnaryOperator;
  34 
  35 import jdk.internal.misc.ScopedMemoryAccess;
  36 import jdk.internal.misc.Unsafe;
  37 import jdk.internal.vm.annotation.ForceInline;
  38 import jdk.internal.vm.vector.VectorSupport;
  39 
  40 import static jdk.internal.vm.vector.VectorSupport.*;
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 
  43 import static jdk.incubator.vector.VectorOperators.*;
  44 
  45 // -- This file was mechanically generated: Do not edit! -- //
  46 
  47 /**
  48  * A specialized {@link Vector} representing an ordered immutable sequence of
  49  * {@code short} values.
  50  */
  51 @SuppressWarnings("cast")  // warning: redundant cast
  52 public abstract class ShortVector extends AbstractVector<Short> {
  53 
  54     ShortVector(short[] vec) {
  55         super(vec);
  56     }
  57 
  58     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  59 
  60     @ForceInline
  61     static int opCode(Operator op) {
  62         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  63     }
  64     @ForceInline
  65     static int opCode(Operator op, int requireKind) {
  66         requireKind |= VO_OPCODE_VALID;
  67         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  68     }
  69     @ForceInline
  70     static boolean opKind(Operator op, int bit) {
  71         return VectorOperators.opKind(op, bit);
  72     }
  73 
  74     // Virtualized factories and operators,
  75     // coded with portable definitions.
  76     // These are all @ForceInline in case
  77     // they need to be used performantly.
  78     // The various shape-specific subclasses
  79     // also specialize them by wrapping
  80     // them in a call like this:
  81     //    return (Byte128Vector)
  82     //       super.bOp((Byte128Vector) o);
  83     // The purpose of that is to forcibly inline
  84     // the generic definition from this file
  85     // into a sharply type- and size-specific
  86     // wrapper in the subclass file, so that
  87     // the JIT can specialize the code.
  88     // The code is only inlined and expanded
  89     // if it gets hot.  Think of it as a cheap
  90     // and lazy version of C++ templates.
  91 
  92     // Virtualized getter
  93 
  94     /*package-private*/
  95     abstract short[] vec();
  96 
  97     // Virtualized constructors
  98 
  99     /**
 100      * Build a vector directly using my own constructor.
 101      * It is an error if the array is aliased elsewhere.
 102      */
 103     /*package-private*/
 104     abstract ShortVector vectorFactory(short[] vec);
 105 
 106     /**
 107      * Build a mask directly using my species.
 108      * It is an error if the array is aliased elsewhere.
 109      */
 110     /*package-private*/
 111     @ForceInline
 112     final
 113     AbstractMask<Short> maskFactory(boolean[] bits) {
 114         return vspecies().maskFactory(bits);
 115     }
 116 
 117     // Constant loader (takes dummy as vector arg)
 118     interface FVOp {
 119         short apply(int i);
 120     }
 121 
 122     /*package-private*/
 123     @ForceInline
 124     final
 125     ShortVector vOp(FVOp f) {
 126         short[] res = new short[length()];
 127         for (int i = 0; i < res.length; i++) {
 128             res[i] = f.apply(i);
 129         }
 130         return vectorFactory(res);
 131     }
 132 
 133     @ForceInline
 134     final
 135     ShortVector vOp(VectorMask<Short> m, FVOp f) {
 136         short[] res = new short[length()];
 137         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 138         for (int i = 0; i < res.length; i++) {
 139             if (mbits[i]) {
 140                 res[i] = f.apply(i);
 141             }
 142         }
 143         return vectorFactory(res);
 144     }
 145 
 146     // Unary operator
 147 
 148     /*package-private*/
 149     interface FUnOp {
 150         short apply(int i, short a);
 151     }
 152 
 153     /*package-private*/
 154     abstract
 155     ShortVector uOp(FUnOp f);
 156     @ForceInline
 157     final
 158     ShortVector uOpTemplate(FUnOp f) {
 159         short[] vec = vec();
 160         short[] res = new short[length()];
 161         for (int i = 0; i < res.length; i++) {
 162             res[i] = f.apply(i, vec[i]);
 163         }
 164         return vectorFactory(res);
 165     }
 166 
 167     /*package-private*/
 168     abstract
 169     ShortVector uOp(VectorMask<Short> m,
 170                              FUnOp f);
 171     @ForceInline
 172     final
 173     ShortVector uOpTemplate(VectorMask<Short> m,
 174                                      FUnOp f) {
 175         if (m == null) {
 176             return uOpTemplate(f);
 177         }
 178         short[] vec = vec();
 179         short[] res = new short[length()];
 180         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 181         for (int i = 0; i < res.length; i++) {
 182             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 183         }
 184         return vectorFactory(res);
 185     }
 186 
 187     // Binary operator
 188 
 189     /*package-private*/
 190     interface FBinOp {
 191         short apply(int i, short a, short b);
 192     }
 193 
 194     /*package-private*/
 195     abstract
 196     ShortVector bOp(Vector<Short> o,
 197                              FBinOp f);
 198     @ForceInline
 199     final
 200     ShortVector bOpTemplate(Vector<Short> o,
 201                                      FBinOp f) {
 202         short[] res = new short[length()];
 203         short[] vec1 = this.vec();
 204         short[] vec2 = ((ShortVector)o).vec();
 205         for (int i = 0; i < res.length; i++) {
 206             res[i] = f.apply(i, vec1[i], vec2[i]);
 207         }
 208         return vectorFactory(res);
 209     }
 210 
 211     /*package-private*/
 212     abstract
 213     ShortVector bOp(Vector<Short> o,
 214                              VectorMask<Short> m,
 215                              FBinOp f);
 216     @ForceInline
 217     final
 218     ShortVector bOpTemplate(Vector<Short> o,
 219                                      VectorMask<Short> m,
 220                                      FBinOp f) {
 221         if (m == null) {
 222             return bOpTemplate(o, f);
 223         }
 224         short[] res = new short[length()];
 225         short[] vec1 = this.vec();
 226         short[] vec2 = ((ShortVector)o).vec();
 227         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 228         for (int i = 0; i < res.length; i++) {
 229             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 230         }
 231         return vectorFactory(res);
 232     }
 233 
 234     // Ternary operator
 235 
 236     /*package-private*/
 237     interface FTriOp {
 238         short apply(int i, short a, short b, short c);
 239     }
 240 
 241     /*package-private*/
 242     abstract
 243     ShortVector tOp(Vector<Short> o1,
 244                              Vector<Short> o2,
 245                              FTriOp f);
 246     @ForceInline
 247     final
 248     ShortVector tOpTemplate(Vector<Short> o1,
 249                                      Vector<Short> o2,
 250                                      FTriOp f) {
 251         short[] res = new short[length()];
 252         short[] vec1 = this.vec();
 253         short[] vec2 = ((ShortVector)o1).vec();
 254         short[] vec3 = ((ShortVector)o2).vec();
 255         for (int i = 0; i < res.length; i++) {
 256             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 257         }
 258         return vectorFactory(res);
 259     }
 260 
 261     /*package-private*/
 262     abstract
 263     ShortVector tOp(Vector<Short> o1,
 264                              Vector<Short> o2,
 265                              VectorMask<Short> m,
 266                              FTriOp f);
 267     @ForceInline
 268     final
 269     ShortVector tOpTemplate(Vector<Short> o1,
 270                                      Vector<Short> o2,
 271                                      VectorMask<Short> m,
 272                                      FTriOp f) {
 273         if (m == null) {
 274             return tOpTemplate(o1, o2, f);
 275         }
 276         short[] res = new short[length()];
 277         short[] vec1 = this.vec();
 278         short[] vec2 = ((ShortVector)o1).vec();
 279         short[] vec3 = ((ShortVector)o2).vec();
 280         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 281         for (int i = 0; i < res.length; i++) {
 282             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 283         }
 284         return vectorFactory(res);
 285     }
 286 
 287     // Reduction operator
 288 
 289     /*package-private*/
 290     abstract
 291     short rOp(short v, VectorMask<Short> m, FBinOp f);
 292 
 293     @ForceInline
 294     final
 295     short rOpTemplate(short v, VectorMask<Short> m, FBinOp f) {
 296         if (m == null) {
 297             return rOpTemplate(v, f);
 298         }
 299         short[] vec = vec();
 300         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 301         for (int i = 0; i < vec.length; i++) {
 302             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
 303         }
 304         return v;
 305     }
 306 
 307     @ForceInline
 308     final
 309     short rOpTemplate(short v, FBinOp f) {
 310         short[] vec = vec();
 311         for (int i = 0; i < vec.length; i++) {
 312             v = f.apply(i, v, vec[i]);
 313         }
 314         return v;
 315     }
 316 
 317     // Memory reference
 318 
 319     /*package-private*/
 320     interface FLdOp<M> {
 321         short apply(M memory, int offset, int i);
 322     }
 323 
 324     /*package-private*/
 325     @ForceInline
 326     final
 327     <M> ShortVector ldOp(M memory, int offset,
 328                                   FLdOp<M> f) {
 329         //dummy; no vec = vec();
 330         short[] res = new short[length()];
 331         for (int i = 0; i < res.length; i++) {
 332             res[i] = f.apply(memory, offset, i);
 333         }
 334         return vectorFactory(res);
 335     }
 336 
 337     /*package-private*/
 338     @ForceInline
 339     final
 340     <M> ShortVector ldOp(M memory, int offset,
 341                                   VectorMask<Short> m,
 342                                   FLdOp<M> f) {
 343         //short[] vec = vec();
 344         short[] res = new short[length()];
 345         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 346         for (int i = 0; i < res.length; i++) {
 347             if (mbits[i]) {
 348                 res[i] = f.apply(memory, offset, i);
 349             }
 350         }
 351         return vectorFactory(res);
 352     }
 353 
 354     interface FStOp<M> {
 355         void apply(M memory, int offset, int i, short a);
 356     }
 357 
 358     /*package-private*/
 359     @ForceInline
 360     final
 361     <M> void stOp(M memory, int offset,
 362                   FStOp<M> f) {
 363         short[] vec = vec();
 364         for (int i = 0; i < vec.length; i++) {
 365             f.apply(memory, offset, i, vec[i]);
 366         }
 367     }
 368 
 369     /*package-private*/
 370     @ForceInline
 371     final
 372     <M> void stOp(M memory, int offset,
 373                   VectorMask<Short> m,
 374                   FStOp<M> f) {
 375         short[] vec = vec();
 376         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 377         for (int i = 0; i < vec.length; i++) {
 378             if (mbits[i]) {
 379                 f.apply(memory, offset, i, vec[i]);
 380             }
 381         }
 382     }
 383 
 384     // Binary test
 385 
 386     /*package-private*/
 387     interface FBinTest {
 388         boolean apply(int cond, int i, short a, short b);
 389     }
 390 
 391     /*package-private*/
 392     @ForceInline
 393     final
 394     AbstractMask<Short> bTest(int cond,
 395                                   Vector<Short> o,
 396                                   FBinTest f) {
 397         short[] vec1 = vec();
 398         short[] vec2 = ((ShortVector)o).vec();
 399         boolean[] bits = new boolean[length()];
 400         for (int i = 0; i < length(); i++){
 401             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 402         }
 403         return maskFactory(bits);
 404     }
 405 
 406     /*package-private*/
 407     @ForceInline
 408     static short rotateLeft(short a, int n) {
 409         return (short)(((((short)a) & Short.toUnsignedInt((short)-1)) << (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) >>> (Short.SIZE - (n & Short.SIZE-1))));
 410     }
 411 
 412     /*package-private*/
 413     @ForceInline
 414     static short rotateRight(short a, int n) {
 415         return (short)(((((short)a) & Short.toUnsignedInt((short)-1)) >>> (n & Short.SIZE-1)) | ((((short)a) & Short.toUnsignedInt((short)-1)) << (Short.SIZE - (n & Short.SIZE-1))));
 416     }
 417 
 418     /*package-private*/
 419     @Override
 420     abstract ShortSpecies vspecies();
 421 
 422     /*package-private*/
 423     @ForceInline
 424     static long toBits(short e) {
 425         return  e;
 426     }
 427 
 428     /*package-private*/
 429     @ForceInline
 430     static short fromBits(long bits) {
 431         return ((short)bits);
 432     }
 433 
 434     // Static factories (other than memory operations)
 435 
 436     // Note: A surprising behavior in javadoc
 437     // sometimes makes a lone /** {@inheritDoc} */
 438     // comment drop the method altogether,
 439     // apparently if the method mentions an
 440     // parameter or return type of Vector<Short>
 441     // instead of Vector<E> as originally specified.
 442     // Adding an empty HTML fragment appears to
 443     // nudge javadoc into providing the desired
 444     // inherited documentation.  We use the HTML
 445     // comment <!--workaround--> for this.
 446 
 447     /**
 448      * Returns a vector of the given species
 449      * where all lane elements are set to
 450      * zero, the default primitive value.
 451      *
 452      * @param species species of the desired zero vector
 453      * @return a zero vector
 454      */
 455     @ForceInline
 456     public static ShortVector zero(VectorSpecies<Short> species) {
 457         ShortSpecies vsp = (ShortSpecies) species;
 458         return VectorSupport.broadcastCoerced(vsp.vectorType(), short.class, species.length(),
 459                                 0, vsp,
 460                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 461     }
 462 
 463     /**
 464      * Returns a vector of the same species as this one
 465      * where all lane elements are set to
 466      * the primitive value {@code e}.
 467      *
 468      * The contents of the current vector are discarded;
 469      * only the species is relevant to this operation.
 470      *
 471      * <p> This method returns the value of this expression:
 472      * {@code ShortVector.broadcast(this.species(), e)}.
 473      *
 474      * @apiNote
 475      * Unlike the similar method named {@code broadcast()}
 476      * in the supertype {@code Vector}, this method does not
 477      * need to validate its argument, and cannot throw
 478      * {@code IllegalArgumentException}.  This method is
 479      * therefore preferable to the supertype method.
 480      *
 481      * @param e the value to broadcast
 482      * @return a vector where all lane elements are set to
 483      *         the primitive value {@code e}
 484      * @see #broadcast(VectorSpecies,long)
 485      * @see Vector#broadcast(long)
 486      * @see VectorSpecies#broadcast(long)
 487      */
 488     public abstract ShortVector broadcast(short e);
 489 
 490     /**
 491      * Returns a vector of the given species
 492      * where all lane elements are set to
 493      * the primitive value {@code e}.
 494      *
 495      * @param species species of the desired vector
 496      * @param e the value to broadcast
 497      * @return a vector where all lane elements are set to
 498      *         the primitive value {@code e}
 499      * @see #broadcast(long)
 500      * @see Vector#broadcast(long)
 501      * @see VectorSpecies#broadcast(long)
 502      */
 503     @ForceInline
 504     public static ShortVector broadcast(VectorSpecies<Short> species, short e) {
 505         ShortSpecies vsp = (ShortSpecies) species;
 506         return vsp.broadcast(e);
 507     }
 508 
 509     /*package-private*/
 510     @ForceInline
 511     final ShortVector broadcastTemplate(short e) {
 512         ShortSpecies vsp = vspecies();
 513         return vsp.broadcast(e);
 514     }
 515 
 516     /**
 517      * {@inheritDoc} <!--workaround-->
 518      * @apiNote
 519      * When working with vector subtypes like {@code ShortVector},
 520      * {@linkplain #broadcast(short) the more strongly typed method}
 521      * is typically selected.  It can be explicitly selected
 522      * using a cast: {@code v.broadcast((short)e)}.
 523      * The two expressions will produce numerically identical results.
 524      */
 525     @Override
 526     public abstract ShortVector broadcast(long e);
 527 
 528     /**
 529      * Returns a vector of the given species
 530      * where all lane elements are set to
 531      * the primitive value {@code e}.
 532      *
 533      * The {@code long} value must be accurately representable
 534      * by the {@code ETYPE} of the vector species, so that
 535      * {@code e==(long)(ETYPE)e}.
 536      *
 537      * @param species species of the desired vector
 538      * @param e the value to broadcast
 539      * @return a vector where all lane elements are set to
 540      *         the primitive value {@code e}
 541      * @throws IllegalArgumentException
 542      *         if the given {@code long} value cannot
 543      *         be represented by the vector's {@code ETYPE}
 544      * @see #broadcast(VectorSpecies,short)
 545      * @see VectorSpecies#checkValue(long)
 546      */
 547     @ForceInline
 548     public static ShortVector broadcast(VectorSpecies<Short> species, long e) {
 549         ShortSpecies vsp = (ShortSpecies) species;
 550         return vsp.broadcast(e);
 551     }
 552 
 553     /*package-private*/
 554     @ForceInline
 555     final ShortVector broadcastTemplate(long e) {
 556         return vspecies().broadcast(e);
 557     }
 558 
 559     // Unary lanewise support
 560 
 561     /**
 562      * {@inheritDoc} <!--workaround-->
 563      */
 564     public abstract
 565     ShortVector lanewise(VectorOperators.Unary op);
 566 
 567     @ForceInline
 568     final
 569     ShortVector lanewiseTemplate(VectorOperators.Unary op) {
 570         if (opKind(op, VO_SPECIAL)) {
 571             if (op == ZOMO) {
 572                 return blend(broadcast(-1), compare(NE, 0));
 573             }
 574             if (op == NOT) {
 575                 return broadcast(-1).lanewise(XOR, this);
 576             } else if (op == NEG) {
 577                 // FIXME: Support this in the JIT.
 578                 return broadcast(0).lanewise(SUB, this);
 579             }
 580         }
 581         int opc = opCode(op);
 582         return VectorSupport.unaryOp(
 583             opc, getClass(), null, short.class, length(),
 584             this, null,
 585             UN_IMPL.find(op, opc, ShortVector::unaryOperations));
 586     }
 587 
 588     /**
 589      * {@inheritDoc} <!--workaround-->
 590      */
 591     @Override
 592     public abstract
 593     ShortVector lanewise(VectorOperators.Unary op,
 594                                   VectorMask<Short> m);
 595     @ForceInline
 596     final
 597     ShortVector lanewiseTemplate(VectorOperators.Unary op,
 598                                           Class<? extends VectorMask<Short>> maskClass,
 599                                           VectorMask<Short> m) {
 600         m.check(maskClass, this);
 601         if (opKind(op, VO_SPECIAL)) {
 602             if (op == ZOMO) {
 603                 return blend(broadcast(-1), compare(NE, 0, m));
 604             }
 605             if (op == NOT) {
 606                 return lanewise(XOR, broadcast(-1), m);
 607             } else if (op == NEG) {
 608                 return lanewise(NOT, m).lanewise(ADD, broadcast(1), m);
 609             }
 610         }
 611         int opc = opCode(op);
 612         return VectorSupport.unaryOp(
 613             opc, getClass(), maskClass, short.class, length(),
 614             this, m,
 615             UN_IMPL.find(op, opc, ShortVector::unaryOperations));
 616     }
 617 
 618     private static final
 619     ImplCache<Unary, UnaryOperation<ShortVector, VectorMask<Short>>>
 620         UN_IMPL = new ImplCache<>(Unary.class, ShortVector.class);
 621 
 622     private static UnaryOperation<ShortVector, VectorMask<Short>> unaryOperations(int opc_) {
 623         switch (opc_) {
 624             case VECTOR_OP_NEG: return (v0, m) ->
 625                     v0.uOp(m, (i, a) -> (short) -a);
 626             case VECTOR_OP_ABS: return (v0, m) ->
 627                     v0.uOp(m, (i, a) -> (short) Math.abs(a));
 628             default: return null;
 629         }
 630     }
 631 
 632     // Binary lanewise support
 633 
 634     /**
 635      * {@inheritDoc} <!--workaround-->
 636      * @see #lanewise(VectorOperators.Binary,short)
 637      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 638      */
 639     @Override
 640     public abstract
 641     ShortVector lanewise(VectorOperators.Binary op,
 642                                   Vector<Short> v);
 643     @ForceInline
 644     final
 645     ShortVector lanewiseTemplate(VectorOperators.Binary op,
 646                                           Vector<Short> v) {
 647         ShortVector that = (ShortVector) v;
 648         that.check(this);
 649 
 650         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 651             if (op == FIRST_NONZERO) {
 652                 // FIXME: Support this in the JIT.
 653                 VectorMask<Short> thisNZ
 654                     = this.viewAsIntegralLanes().compare(NE, (short) 0);
 655                 that = that.blend((short) 0, thisNZ.cast(vspecies()));
 656                 op = OR_UNCHECKED;
 657             }
 658             if (opKind(op, VO_SHIFT)) {
 659                 // As per shift specification for Java, mask the shift count.
 660                 // This allows the JIT to ignore some ISA details.
 661                 that = that.lanewise(AND, SHIFT_MASK);
 662             }
 663             if (op == AND_NOT) {
 664                 // FIXME: Support this in the JIT.
 665                 that = that.lanewise(NOT);
 666                 op = AND;
 667             } else if (op == DIV) {
 668                 VectorMask<Short> eqz = that.eq((short) 0);
 669                 if (eqz.anyTrue()) {
 670                     throw that.divZeroException();
 671                 }
 672             }
 673         }
 674 
 675         int opc = opCode(op);
 676         return VectorSupport.binaryOp(
 677             opc, getClass(), null, short.class, length(),
 678             this, that, null,
 679             BIN_IMPL.find(op, opc, ShortVector::binaryOperations));
 680     }
 681 
 682     /**
 683      * {@inheritDoc} <!--workaround-->
 684      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 685      */
 686     @Override
 687     public abstract
 688     ShortVector lanewise(VectorOperators.Binary op,
 689                                   Vector<Short> v,
 690                                   VectorMask<Short> m);
 691     @ForceInline
 692     final
 693     ShortVector lanewiseTemplate(VectorOperators.Binary op,
 694                                           Class<? extends VectorMask<Short>> maskClass,
 695                                           Vector<Short> v, VectorMask<Short> m) {
 696         ShortVector that = (ShortVector) v;
 697         that.check(this);
 698         m.check(maskClass, this);
 699 
 700         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 701             if (op == FIRST_NONZERO) {
 702                 // FIXME: Support this in the JIT.
 703                 VectorMask<Short> thisNZ
 704                     = this.viewAsIntegralLanes().compare(NE, (short) 0);
 705                 that = that.blend((short) 0, thisNZ.cast(vspecies()));
 706                 op = OR_UNCHECKED;
 707             }
 708             if (opKind(op, VO_SHIFT)) {
 709                 // As per shift specification for Java, mask the shift count.
 710                 // This allows the JIT to ignore some ISA details.
 711                 that = that.lanewise(AND, SHIFT_MASK);
 712             }
 713             if (op == AND_NOT) {
 714                 // FIXME: Support this in the JIT.
 715                 that = that.lanewise(NOT);
 716                 op = AND;
 717             } else if (op == DIV) {
 718                 VectorMask<Short> eqz = that.eq((short)0);
 719                 if (eqz.and(m).anyTrue()) {
 720                     throw that.divZeroException();
 721                 }
 722                 // suppress div/0 exceptions in unset lanes
 723                 that = that.lanewise(NOT, eqz);
 724             }
 725         }
 726 
 727         int opc = opCode(op);
 728         return VectorSupport.binaryOp(
 729             opc, getClass(), maskClass, short.class, length(),
 730             this, that, m,
 731             BIN_IMPL.find(op, opc, ShortVector::binaryOperations));
 732     }
 733 
 734     private static final
 735     ImplCache<Binary, BinaryOperation<ShortVector, VectorMask<Short>>>
 736         BIN_IMPL = new ImplCache<>(Binary.class, ShortVector.class);
 737 
 738     private static BinaryOperation<ShortVector, VectorMask<Short>> binaryOperations(int opc_) {
 739         switch (opc_) {
 740             case VECTOR_OP_ADD: return (v0, v1, vm) ->
 741                     v0.bOp(v1, vm, (i, a, b) -> (short)(a + b));
 742             case VECTOR_OP_SUB: return (v0, v1, vm) ->
 743                     v0.bOp(v1, vm, (i, a, b) -> (short)(a - b));
 744             case VECTOR_OP_MUL: return (v0, v1, vm) ->
 745                     v0.bOp(v1, vm, (i, a, b) -> (short)(a * b));
 746             case VECTOR_OP_DIV: return (v0, v1, vm) ->
 747                     v0.bOp(v1, vm, (i, a, b) -> (short)(a / b));
 748             case VECTOR_OP_MAX: return (v0, v1, vm) ->
 749                     v0.bOp(v1, vm, (i, a, b) -> (short)Math.max(a, b));
 750             case VECTOR_OP_MIN: return (v0, v1, vm) ->
 751                     v0.bOp(v1, vm, (i, a, b) -> (short)Math.min(a, b));
 752             case VECTOR_OP_AND: return (v0, v1, vm) ->
 753                     v0.bOp(v1, vm, (i, a, b) -> (short)(a & b));
 754             case VECTOR_OP_OR: return (v0, v1, vm) ->
 755                     v0.bOp(v1, vm, (i, a, b) -> (short)(a | b));
 756             case VECTOR_OP_XOR: return (v0, v1, vm) ->
 757                     v0.bOp(v1, vm, (i, a, b) -> (short)(a ^ b));
 758             case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
 759                     v0.bOp(v1, vm, (i, a, n) -> (short)(a << n));
 760             case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
 761                     v0.bOp(v1, vm, (i, a, n) -> (short)(a >> n));
 762             case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
 763                     v0.bOp(v1, vm, (i, a, n) -> (short)((a & LSHR_SETUP_MASK) >>> n));
 764             case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
 765                     v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
 766             case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
 767                     v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
 768             default: return null;
 769         }
 770     }
 771 
 772     // FIXME: Maybe all of the public final methods in this file (the
 773     // simple ones that just call lanewise) should be pushed down to
 774     // the X-VectorBits template.  They can't optimize properly at
 775     // this level, and must rely on inlining.  Does it work?
 776     // (If it works, of course keep the code here.)
 777 
 778     /**
 779      * Combines the lane values of this vector
 780      * with the value of a broadcast scalar.
 781      *
 782      * This is a lane-wise binary operation which applies
 783      * the selected operation to each lane.
 784      * The return value will be equal to this expression:
 785      * {@code this.lanewise(op, this.broadcast(e))}.
 786      *
 787      * @param op the operation used to process lane values
 788      * @param e the input scalar
 789      * @return the result of applying the operation lane-wise
 790      *         to the two input vectors
 791      * @throws UnsupportedOperationException if this vector does
 792      *         not support the requested operation
 793      * @see #lanewise(VectorOperators.Binary,Vector)
 794      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 795      */
 796     @ForceInline
 797     public final
 798     ShortVector lanewise(VectorOperators.Binary op,
 799                                   short e) {
 800         if (opKind(op, VO_SHIFT) && (short)(int)e == e) {
 801             return lanewiseShift(op, (int) e);
 802         }
 803         if (op == AND_NOT) {
 804             op = AND; e = (short) ~e;
 805         }
 806         return lanewise(op, broadcast(e));
 807     }
 808 
 809     /**
 810      * Combines the lane values of this vector
 811      * with the value of a broadcast scalar,
 812      * with selection of lane elements controlled by a mask.
 813      *
 814      * This is a masked lane-wise binary operation which applies
 815      * the selected operation to each lane.
 816      * The return value will be equal to this expression:
 817      * {@code this.lanewise(op, this.broadcast(e), m)}.
 818      *
 819      * @param op the operation used to process lane values
 820      * @param e the input scalar
 821      * @param m the mask controlling lane selection
 822      * @return the result of applying the operation lane-wise
 823      *         to the input vector and the scalar
 824      * @throws UnsupportedOperationException if this vector does
 825      *         not support the requested operation
 826      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 827      * @see #lanewise(VectorOperators.Binary,short)
 828      */
 829     @ForceInline
 830     public final
 831     ShortVector lanewise(VectorOperators.Binary op,
 832                                   short e,
 833                                   VectorMask<Short> m) {
 834         if (opKind(op, VO_SHIFT) && (short)(int)e == e) {
 835             return lanewiseShift(op, (int) e, m);
 836         }
 837         if (op == AND_NOT) {
 838             op = AND; e = (short) ~e;
 839         }
 840         return lanewise(op, broadcast(e), m);
 841     }
 842 
 843     /**
 844      * {@inheritDoc} <!--workaround-->
 845      * @apiNote
 846      * When working with vector subtypes like {@code ShortVector},
 847      * {@linkplain #lanewise(VectorOperators.Binary,short)
 848      * the more strongly typed method}
 849      * is typically selected.  It can be explicitly selected
 850      * using a cast: {@code v.lanewise(op,(short)e)}.
 851      * The two expressions will produce numerically identical results.
 852      */
 853     @ForceInline
 854     public final
 855     ShortVector lanewise(VectorOperators.Binary op,
 856                                   long e) {
 857         short e1 = (short) e;
 858         if ((long)e1 != e
 859             // allow shift ops to clip down their int parameters
 860             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 861             vspecies().checkValue(e);  // for exception
 862         }
 863         return lanewise(op, e1);
 864     }
 865 
 866     /**
 867      * {@inheritDoc} <!--workaround-->
 868      * @apiNote
 869      * When working with vector subtypes like {@code ShortVector},
 870      * {@linkplain #lanewise(VectorOperators.Binary,short,VectorMask)
 871      * the more strongly typed method}
 872      * is typically selected.  It can be explicitly selected
 873      * using a cast: {@code v.lanewise(op,(short)e,m)}.
 874      * The two expressions will produce numerically identical results.
 875      */
 876     @ForceInline
 877     public final
 878     ShortVector lanewise(VectorOperators.Binary op,
 879                                   long e, VectorMask<Short> m) {
 880         short e1 = (short) e;
 881         if ((long)e1 != e
 882             // allow shift ops to clip down their int parameters
 883             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 884             vspecies().checkValue(e);  // for exception
 885         }
 886         return lanewise(op, e1, m);
 887     }
 888 
 889     /*package-private*/
 890     abstract ShortVector
 891     lanewiseShift(VectorOperators.Binary op, int e);
 892 
 893     /*package-private*/
 894     @ForceInline
 895     final ShortVector
 896     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 897         // Special handling for these.  FIXME: Refactor?
 898         assert(opKind(op, VO_SHIFT));
 899         // As per shift specification for Java, mask the shift count.
 900         e &= SHIFT_MASK;
 901         int opc = opCode(op);
 902         return VectorSupport.broadcastInt(
 903             opc, getClass(), null, short.class, length(),
 904             this, e, null,
 905             BIN_INT_IMPL.find(op, opc, ShortVector::broadcastIntOperations));
 906     }
 907 
 908     /*package-private*/
 909     abstract ShortVector
 910     lanewiseShift(VectorOperators.Binary op, int e, VectorMask<Short> m);
 911 
 912     /*package-private*/
 913     @ForceInline
 914     final ShortVector
 915     lanewiseShiftTemplate(VectorOperators.Binary op,
 916                           Class<? extends VectorMask<Short>> maskClass,
 917                           int e, VectorMask<Short> m) {
 918         m.check(maskClass, this);
 919         assert(opKind(op, VO_SHIFT));
 920         // As per shift specification for Java, mask the shift count.
 921         e &= SHIFT_MASK;
 922         int opc = opCode(op);
 923         return VectorSupport.broadcastInt(
 924             opc, getClass(), maskClass, short.class, length(),
 925             this, e, m,
 926             BIN_INT_IMPL.find(op, opc, ShortVector::broadcastIntOperations));
 927     }
 928 
 929     private static final
 930     ImplCache<Binary,VectorBroadcastIntOp<ShortVector, VectorMask<Short>>> BIN_INT_IMPL
 931         = new ImplCache<>(Binary.class, ShortVector.class);
 932 
 933     private static VectorBroadcastIntOp<ShortVector, VectorMask<Short>> broadcastIntOperations(int opc_) {
 934         switch (opc_) {
 935             case VECTOR_OP_LSHIFT: return (v, n, m) ->
 936                     v.uOp(m, (i, a) -> (short)(a << n));
 937             case VECTOR_OP_RSHIFT: return (v, n, m) ->
 938                     v.uOp(m, (i, a) -> (short)(a >> n));
 939             case VECTOR_OP_URSHIFT: return (v, n, m) ->
 940                     v.uOp(m, (i, a) -> (short)((a & LSHR_SETUP_MASK) >>> n));
 941             case VECTOR_OP_LROTATE: return (v, n, m) ->
 942                     v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
 943             case VECTOR_OP_RROTATE: return (v, n, m) ->
 944                     v.uOp(m, (i, a) -> rotateRight(a, (int)n));
 945             default: return null;
 946         }
 947     }
 948 
 949     // As per shift specification for Java, mask the shift count.
 950     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 951     // The latter two maskings go beyond the JLS, but seem reasonable
 952     // since our lane types are first-class types, not just dressed
 953     // up ints.
 954     private static final int SHIFT_MASK = (Short.SIZE - 1);
 955     // Also simulate >>> on sub-word variables with a mask.
 956     private static final int LSHR_SETUP_MASK = ((1 << Short.SIZE) - 1);
 957 
 958     // Ternary lanewise support
 959 
 960     // Ternary operators come in eight variations:
 961     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 962     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 963 
 964     // It is annoying to support all of these variations of masking
 965     // and broadcast, but it would be more surprising not to continue
 966     // the obvious pattern started by unary and binary.
 967 
 968    /**
 969      * {@inheritDoc} <!--workaround-->
 970      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
 971      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
 972      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
 973      * @see #lanewise(VectorOperators.Ternary,short,short)
 974      * @see #lanewise(VectorOperators.Ternary,Vector,short)
 975      * @see #lanewise(VectorOperators.Ternary,short,Vector)
 976      */
 977     @Override
 978     public abstract
 979     ShortVector lanewise(VectorOperators.Ternary op,
 980                                                   Vector<Short> v1,
 981                                                   Vector<Short> v2);
 982     @ForceInline
 983     final
 984     ShortVector lanewiseTemplate(VectorOperators.Ternary op,
 985                                           Vector<Short> v1,
 986                                           Vector<Short> v2) {
 987         ShortVector that = (ShortVector) v1;
 988         ShortVector tother = (ShortVector) v2;
 989         // It's a word: https://www.dictionary.com/browse/tother
 990         // See also Chapter 11 of Dickens, Our Mutual Friend:
 991         // "Totherest Governor," replied Mr Riderhood...
 992         that.check(this);
 993         tother.check(this);
 994         if (op == BITWISE_BLEND) {
 995             // FIXME: Support this in the JIT.
 996             that = this.lanewise(XOR, that).lanewise(AND, tother);
 997             return this.lanewise(XOR, that);
 998         }
 999         int opc = opCode(op);
1000         return VectorSupport.ternaryOp(
1001             opc, getClass(), null, short.class, length(),
1002             this, that, tother, null,
1003             TERN_IMPL.find(op, opc, ShortVector::ternaryOperations));
1004     }
1005 
1006     /**
1007      * {@inheritDoc} <!--workaround-->
1008      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1009      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
1010      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
1011      */
1012     @Override
1013     public abstract
1014     ShortVector lanewise(VectorOperators.Ternary op,
1015                                   Vector<Short> v1,
1016                                   Vector<Short> v2,
1017                                   VectorMask<Short> m);
1018     @ForceInline
1019     final
1020     ShortVector lanewiseTemplate(VectorOperators.Ternary op,
1021                                           Class<? extends VectorMask<Short>> maskClass,
1022                                           Vector<Short> v1,
1023                                           Vector<Short> v2,
1024                                           VectorMask<Short> m) {
1025         ShortVector that = (ShortVector) v1;
1026         ShortVector tother = (ShortVector) v2;
1027         // It's a word: https://www.dictionary.com/browse/tother
1028         // See also Chapter 11 of Dickens, Our Mutual Friend:
1029         // "Totherest Governor," replied Mr Riderhood...
1030         that.check(this);
1031         tother.check(this);
1032         m.check(maskClass, this);
1033 
1034         if (op == BITWISE_BLEND) {
1035             // FIXME: Support this in the JIT.
1036             that = this.lanewise(XOR, that).lanewise(AND, tother);
1037             return this.lanewise(XOR, that, m);
1038         }
1039         int opc = opCode(op);
1040         return VectorSupport.ternaryOp(
1041             opc, getClass(), maskClass, short.class, length(),
1042             this, that, tother, m,
1043             TERN_IMPL.find(op, opc, ShortVector::ternaryOperations));
1044     }
1045 
1046     private static final
1047     ImplCache<Ternary, TernaryOperation<ShortVector, VectorMask<Short>>>
1048         TERN_IMPL = new ImplCache<>(Ternary.class, ShortVector.class);
1049 
1050     private static TernaryOperation<ShortVector, VectorMask<Short>> ternaryOperations(int opc_) {
1051         switch (opc_) {
1052             default: return null;
1053         }
1054     }
1055 
1056     /**
1057      * Combines the lane values of this vector
1058      * with the values of two broadcast scalars.
1059      *
1060      * This is a lane-wise ternary operation which applies
1061      * the selected operation to each lane.
1062      * The return value will be equal to this expression:
1063      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
1064      *
1065      * @param op the operation used to combine lane values
1066      * @param e1 the first input scalar
1067      * @param e2 the second input scalar
1068      * @return the result of applying the operation lane-wise
1069      *         to the input vector and the scalars
1070      * @throws UnsupportedOperationException if this vector does
1071      *         not support the requested operation
1072      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1073      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1074      */
1075     @ForceInline
1076     public final
1077     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
1078                                   short e1,
1079                                   short e2) {
1080         return lanewise(op, broadcast(e1), broadcast(e2));
1081     }
1082 
1083     /**
1084      * Combines the lane values of this vector
1085      * with the values of two broadcast scalars,
1086      * with selection of lane elements controlled by a mask.
1087      *
1088      * This is a masked lane-wise ternary operation which applies
1089      * the selected operation to each lane.
1090      * The return value will be equal to this expression:
1091      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1092      *
1093      * @param op the operation used to combine lane values
1094      * @param e1 the first input scalar
1095      * @param e2 the second input scalar
1096      * @param m the mask controlling lane selection
1097      * @return the result of applying the operation lane-wise
1098      *         to the input vector and the scalars
1099      * @throws UnsupportedOperationException if this vector does
1100      *         not support the requested operation
1101      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1102      * @see #lanewise(VectorOperators.Ternary,short,short)
1103      */
1104     @ForceInline
1105     public final
1106     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1107                                   short e1,
1108                                   short e2,
1109                                   VectorMask<Short> m) {
1110         return lanewise(op, broadcast(e1), broadcast(e2), m);
1111     }
1112 
1113     /**
1114      * Combines the lane values of this vector
1115      * with the values of another vector and a broadcast scalar.
1116      *
1117      * This is a lane-wise ternary operation which applies
1118      * the selected operation to each lane.
1119      * The return value will be equal to this expression:
1120      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1121      *
1122      * @param op the operation used to combine lane values
1123      * @param v1 the other input vector
1124      * @param e2 the input scalar
1125      * @return the result of applying the operation lane-wise
1126      *         to the input vectors and the scalar
1127      * @throws UnsupportedOperationException if this vector does
1128      *         not support the requested operation
1129      * @see #lanewise(VectorOperators.Ternary,short,short)
1130      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
1131      */
1132     @ForceInline
1133     public final
1134     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1135                                   Vector<Short> v1,
1136                                   short e2) {
1137         return lanewise(op, v1, broadcast(e2));
1138     }
1139 
1140     /**
1141      * Combines the lane values of this vector
1142      * with the values of another vector and a broadcast scalar,
1143      * with selection of lane elements controlled by a mask.
1144      *
1145      * This is a masked lane-wise ternary operation which applies
1146      * the selected operation to each lane.
1147      * The return value will be equal to this expression:
1148      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1149      *
1150      * @param op the operation used to combine lane values
1151      * @param v1 the other input vector
1152      * @param e2 the input scalar
1153      * @param m the mask controlling lane selection
1154      * @return the result of applying the operation lane-wise
1155      *         to the input vectors and the scalar
1156      * @throws UnsupportedOperationException if this vector does
1157      *         not support the requested operation
1158      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1159      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1160      * @see #lanewise(VectorOperators.Ternary,Vector,short)
1161      */
1162     @ForceInline
1163     public final
1164     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1165                                   Vector<Short> v1,
1166                                   short e2,
1167                                   VectorMask<Short> m) {
1168         return lanewise(op, v1, broadcast(e2), m);
1169     }
1170 
1171     /**
1172      * Combines the lane values of this vector
1173      * with the values of another vector and a broadcast scalar.
1174      *
1175      * This is a lane-wise ternary operation which applies
1176      * the selected operation to each lane.
1177      * The return value will be equal to this expression:
1178      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1179      *
1180      * @param op the operation used to combine lane values
1181      * @param e1 the input scalar
1182      * @param v2 the other input vector
1183      * @return the result of applying the operation lane-wise
1184      *         to the input vectors and the scalar
1185      * @throws UnsupportedOperationException if this vector does
1186      *         not support the requested operation
1187      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1188      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
1189      */
1190     @ForceInline
1191     public final
1192     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1193                                   short e1,
1194                                   Vector<Short> v2) {
1195         return lanewise(op, broadcast(e1), v2);
1196     }
1197 
1198     /**
1199      * Combines the lane values of this vector
1200      * with the values of another vector and a broadcast scalar,
1201      * with selection of lane elements controlled by a mask.
1202      *
1203      * This is a masked lane-wise ternary operation which applies
1204      * the selected operation to each lane.
1205      * The return value will be equal to this expression:
1206      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1207      *
1208      * @param op the operation used to combine lane values
1209      * @param e1 the input scalar
1210      * @param v2 the other input vector
1211      * @param m the mask controlling lane selection
1212      * @return the result of applying the operation lane-wise
1213      *         to the input vectors and the scalar
1214      * @throws UnsupportedOperationException if this vector does
1215      *         not support the requested operation
1216      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1217      * @see #lanewise(VectorOperators.Ternary,short,Vector)
1218      */
1219     @ForceInline
1220     public final
1221     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1222                                   short e1,
1223                                   Vector<Short> v2,
1224                                   VectorMask<Short> m) {
1225         return lanewise(op, broadcast(e1), v2, m);
1226     }
1227 
1228     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1229     // https://en.wikipedia.org/wiki/Ogdoad
1230 
1231     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1232     //
1233     // These include masked and non-masked versions.
1234     // This subclass adds broadcast (masked or not).
1235 
1236     /**
1237      * {@inheritDoc} <!--workaround-->
1238      * @see #add(short)
1239      */
1240     @Override
1241     @ForceInline
1242     public final ShortVector add(Vector<Short> v) {
1243         return lanewise(ADD, v);
1244     }
1245 
1246     /**
1247      * Adds this vector to the broadcast of an input scalar.
1248      *
1249      * This is a lane-wise binary operation which applies
1250      * the primitive addition operation ({@code +}) to each lane.
1251      *
1252      * This method is also equivalent to the expression
1253      * {@link #lanewise(VectorOperators.Binary,short)
1254      *    lanewise}{@code (}{@link VectorOperators#ADD
1255      *    ADD}{@code , e)}.
1256      *
1257      * @param e the input scalar
1258      * @return the result of adding each lane of this vector to the scalar
1259      * @see #add(Vector)
1260      * @see #broadcast(short)
1261      * @see #add(short,VectorMask)
1262      * @see VectorOperators#ADD
1263      * @see #lanewise(VectorOperators.Binary,Vector)
1264      * @see #lanewise(VectorOperators.Binary,short)
1265      */
1266     @ForceInline
1267     public final
1268     ShortVector add(short e) {
1269         return lanewise(ADD, e);
1270     }
1271 
1272     /**
1273      * {@inheritDoc} <!--workaround-->
1274      * @see #add(short,VectorMask)
1275      */
1276     @Override
1277     @ForceInline
1278     public final ShortVector add(Vector<Short> v,
1279                                           VectorMask<Short> m) {
1280         return lanewise(ADD, v, m);
1281     }
1282 
1283     /**
1284      * Adds this vector to the broadcast of an input scalar,
1285      * selecting lane elements controlled by a mask.
1286      *
1287      * This is a masked lane-wise binary operation which applies
1288      * the primitive addition operation ({@code +}) to each lane.
1289      *
1290      * This method is also equivalent to the expression
1291      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1292      *    lanewise}{@code (}{@link VectorOperators#ADD
1293      *    ADD}{@code , s, m)}.
1294      *
1295      * @param e the input scalar
1296      * @param m the mask controlling lane selection
1297      * @return the result of adding each lane of this vector to the scalar
1298      * @see #add(Vector,VectorMask)
1299      * @see #broadcast(short)
1300      * @see #add(short)
1301      * @see VectorOperators#ADD
1302      * @see #lanewise(VectorOperators.Binary,Vector)
1303      * @see #lanewise(VectorOperators.Binary,short)
1304      */
1305     @ForceInline
1306     public final ShortVector add(short e,
1307                                           VectorMask<Short> m) {
1308         return lanewise(ADD, e, m);
1309     }
1310 
1311     /**
1312      * {@inheritDoc} <!--workaround-->
1313      * @see #sub(short)
1314      */
1315     @Override
1316     @ForceInline
1317     public final ShortVector sub(Vector<Short> v) {
1318         return lanewise(SUB, v);
1319     }
1320 
1321     /**
1322      * Subtracts an input scalar from this vector.
1323      *
1324      * This is a masked lane-wise binary operation which applies
1325      * the primitive subtraction operation ({@code -}) to each lane.
1326      *
1327      * This method is also equivalent to the expression
1328      * {@link #lanewise(VectorOperators.Binary,short)
1329      *    lanewise}{@code (}{@link VectorOperators#SUB
1330      *    SUB}{@code , e)}.
1331      *
1332      * @param e the input scalar
1333      * @return the result of subtracting the scalar from each lane of this vector
1334      * @see #sub(Vector)
1335      * @see #broadcast(short)
1336      * @see #sub(short,VectorMask)
1337      * @see VectorOperators#SUB
1338      * @see #lanewise(VectorOperators.Binary,Vector)
1339      * @see #lanewise(VectorOperators.Binary,short)
1340      */
1341     @ForceInline
1342     public final ShortVector sub(short e) {
1343         return lanewise(SUB, e);
1344     }
1345 
1346     /**
1347      * {@inheritDoc} <!--workaround-->
1348      * @see #sub(short,VectorMask)
1349      */
1350     @Override
1351     @ForceInline
1352     public final ShortVector sub(Vector<Short> v,
1353                                           VectorMask<Short> m) {
1354         return lanewise(SUB, v, m);
1355     }
1356 
1357     /**
1358      * Subtracts an input scalar from this vector
1359      * under the control of a mask.
1360      *
1361      * This is a masked lane-wise binary operation which applies
1362      * the primitive subtraction operation ({@code -}) to each lane.
1363      *
1364      * This method is also equivalent to the expression
1365      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1366      *    lanewise}{@code (}{@link VectorOperators#SUB
1367      *    SUB}{@code , s, m)}.
1368      *
1369      * @param e the input scalar
1370      * @param m the mask controlling lane selection
1371      * @return the result of subtracting the scalar from each lane of this vector
1372      * @see #sub(Vector,VectorMask)
1373      * @see #broadcast(short)
1374      * @see #sub(short)
1375      * @see VectorOperators#SUB
1376      * @see #lanewise(VectorOperators.Binary,Vector)
1377      * @see #lanewise(VectorOperators.Binary,short)
1378      */
1379     @ForceInline
1380     public final ShortVector sub(short e,
1381                                           VectorMask<Short> m) {
1382         return lanewise(SUB, e, m);
1383     }
1384 
1385     /**
1386      * {@inheritDoc} <!--workaround-->
1387      * @see #mul(short)
1388      */
1389     @Override
1390     @ForceInline
1391     public final ShortVector mul(Vector<Short> v) {
1392         return lanewise(MUL, v);
1393     }
1394 
1395     /**
1396      * Multiplies this vector by the broadcast of an input scalar.
1397      *
1398      * This is a lane-wise binary operation which applies
1399      * the primitive multiplication operation ({@code *}) to each lane.
1400      *
1401      * This method is also equivalent to the expression
1402      * {@link #lanewise(VectorOperators.Binary,short)
1403      *    lanewise}{@code (}{@link VectorOperators#MUL
1404      *    MUL}{@code , e)}.
1405      *
1406      * @param e the input scalar
1407      * @return the result of multiplying this vector by the given scalar
1408      * @see #mul(Vector)
1409      * @see #broadcast(short)
1410      * @see #mul(short,VectorMask)
1411      * @see VectorOperators#MUL
1412      * @see #lanewise(VectorOperators.Binary,Vector)
1413      * @see #lanewise(VectorOperators.Binary,short)
1414      */
1415     @ForceInline
1416     public final ShortVector mul(short e) {
1417         return lanewise(MUL, e);
1418     }
1419 
1420     /**
1421      * {@inheritDoc} <!--workaround-->
1422      * @see #mul(short,VectorMask)
1423      */
1424     @Override
1425     @ForceInline
1426     public final ShortVector mul(Vector<Short> v,
1427                                           VectorMask<Short> m) {
1428         return lanewise(MUL, v, m);
1429     }
1430 
1431     /**
1432      * Multiplies this vector by the broadcast of an input scalar,
1433      * selecting lane elements controlled by a mask.
1434      *
1435      * This is a masked lane-wise binary operation which applies
1436      * the primitive multiplication operation ({@code *}) to each lane.
1437      *
1438      * This method is also equivalent to the expression
1439      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1440      *    lanewise}{@code (}{@link VectorOperators#MUL
1441      *    MUL}{@code , s, m)}.
1442      *
1443      * @param e the input scalar
1444      * @param m the mask controlling lane selection
1445      * @return the result of muling each lane of this vector to the scalar
1446      * @see #mul(Vector,VectorMask)
1447      * @see #broadcast(short)
1448      * @see #mul(short)
1449      * @see VectorOperators#MUL
1450      * @see #lanewise(VectorOperators.Binary,Vector)
1451      * @see #lanewise(VectorOperators.Binary,short)
1452      */
1453     @ForceInline
1454     public final ShortVector mul(short e,
1455                                           VectorMask<Short> m) {
1456         return lanewise(MUL, e, m);
1457     }
1458 
1459     /**
1460      * {@inheritDoc} <!--workaround-->
1461      * @apiNote If there is a zero divisor, {@code
1462      * ArithmeticException} will be thrown.
1463      */
1464     @Override
1465     @ForceInline
1466     public final ShortVector div(Vector<Short> v) {
1467         return lanewise(DIV, v);
1468     }
1469 
1470     /**
1471      * Divides this vector by the broadcast of an input scalar.
1472      *
1473      * This is a lane-wise binary operation which applies
1474      * the primitive division operation ({@code /}) to each lane.
1475      *
1476      * This method is also equivalent to the expression
1477      * {@link #lanewise(VectorOperators.Binary,short)
1478      *    lanewise}{@code (}{@link VectorOperators#DIV
1479      *    DIV}{@code , e)}.
1480      *
1481      * @apiNote If there is a zero divisor, {@code
1482      * ArithmeticException} will be thrown.
1483      *
1484      * @param e the input scalar
1485      * @return the result of dividing each lane of this vector by the scalar
1486      * @see #div(Vector)
1487      * @see #broadcast(short)
1488      * @see #div(short,VectorMask)
1489      * @see VectorOperators#DIV
1490      * @see #lanewise(VectorOperators.Binary,Vector)
1491      * @see #lanewise(VectorOperators.Binary,short)
1492      */
1493     @ForceInline
1494     public final ShortVector div(short e) {
1495         return lanewise(DIV, e);
1496     }
1497 
1498     /**
1499      * {@inheritDoc} <!--workaround-->
1500      * @see #div(short,VectorMask)
1501      * @apiNote If there is a zero divisor, {@code
1502      * ArithmeticException} will be thrown.
1503      */
1504     @Override
1505     @ForceInline
1506     public final ShortVector div(Vector<Short> v,
1507                                           VectorMask<Short> m) {
1508         return lanewise(DIV, v, m);
1509     }
1510 
1511     /**
1512      * Divides this vector by the broadcast of an input scalar,
1513      * selecting lane elements controlled by a mask.
1514      *
1515      * This is a masked lane-wise binary operation which applies
1516      * the primitive division operation ({@code /}) to each lane.
1517      *
1518      * This method is also equivalent to the expression
1519      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1520      *    lanewise}{@code (}{@link VectorOperators#DIV
1521      *    DIV}{@code , s, m)}.
1522      *
1523      * @apiNote If there is a zero divisor, {@code
1524      * ArithmeticException} will be thrown.
1525      *
1526      * @param e the input scalar
1527      * @param m the mask controlling lane selection
1528      * @return the result of dividing each lane of this vector by the scalar
1529      * @see #div(Vector,VectorMask)
1530      * @see #broadcast(short)
1531      * @see #div(short)
1532      * @see VectorOperators#DIV
1533      * @see #lanewise(VectorOperators.Binary,Vector)
1534      * @see #lanewise(VectorOperators.Binary,short)
1535      */
1536     @ForceInline
1537     public final ShortVector div(short e,
1538                                           VectorMask<Short> m) {
1539         return lanewise(DIV, e, m);
1540     }
1541 
1542     /// END OF FULL-SERVICE BINARY METHODS
1543 
1544     /// SECOND-TIER BINARY METHODS
1545     //
1546     // There are no masked versions.
1547 
1548     /**
1549      * {@inheritDoc} <!--workaround-->
1550      */
1551     @Override
1552     @ForceInline
1553     public final ShortVector min(Vector<Short> v) {
1554         return lanewise(MIN, v);
1555     }
1556 
1557     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1558     /**
1559      * Computes the smaller of this vector and the broadcast of an input scalar.
1560      *
1561      * This is a lane-wise binary operation which applies the
1562      * operation {@code Math.min()} to each pair of
1563      * corresponding lane values.
1564      *
1565      * This method is also equivalent to the expression
1566      * {@link #lanewise(VectorOperators.Binary,short)
1567      *    lanewise}{@code (}{@link VectorOperators#MIN
1568      *    MIN}{@code , e)}.
1569      *
1570      * @param e the input scalar
1571      * @return the result of multiplying this vector by the given scalar
1572      * @see #min(Vector)
1573      * @see #broadcast(short)
1574      * @see VectorOperators#MIN
1575      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1576      */
1577     @ForceInline
1578     public final ShortVector min(short e) {
1579         return lanewise(MIN, e);
1580     }
1581 
1582     /**
1583      * {@inheritDoc} <!--workaround-->
1584      */
1585     @Override
1586     @ForceInline
1587     public final ShortVector max(Vector<Short> v) {
1588         return lanewise(MAX, v);
1589     }
1590 
1591     /**
1592      * Computes the larger of this vector and the broadcast of an input scalar.
1593      *
1594      * This is a lane-wise binary operation which applies the
1595      * operation {@code Math.max()} to each pair of
1596      * corresponding lane values.
1597      *
1598      * This method is also equivalent to the expression
1599      * {@link #lanewise(VectorOperators.Binary,short)
1600      *    lanewise}{@code (}{@link VectorOperators#MAX
1601      *    MAX}{@code , e)}.
1602      *
1603      * @param e the input scalar
1604      * @return the result of multiplying this vector by the given scalar
1605      * @see #max(Vector)
1606      * @see #broadcast(short)
1607      * @see VectorOperators#MAX
1608      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1609      */
1610     @ForceInline
1611     public final ShortVector max(short e) {
1612         return lanewise(MAX, e);
1613     }
1614 
1615     // common bitwise operators: and, or, not (with scalar versions)
1616     /**
1617      * Computes the bitwise logical conjunction ({@code &})
1618      * of this vector and a second input vector.
1619      *
1620      * This is a lane-wise binary operation which applies the
1621      * the primitive bitwise "and" operation ({@code &})
1622      * to each pair of corresponding lane values.
1623      *
1624      * This method is also equivalent to the expression
1625      * {@link #lanewise(VectorOperators.Binary,Vector)
1626      *    lanewise}{@code (}{@link VectorOperators#AND
1627      *    AND}{@code , v)}.
1628      *
1629      * <p>
1630      * This is not a full-service named operation like
1631      * {@link #add(Vector) add}.  A masked version of
1632      * this operation is not directly available
1633      * but may be obtained via the masked version of
1634      * {@code lanewise}.
1635      *
1636      * @param v a second input vector
1637      * @return the bitwise {@code &} of this vector and the second input vector
1638      * @see #and(short)
1639      * @see #or(Vector)
1640      * @see #not()
1641      * @see VectorOperators#AND
1642      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1643      */
1644     @ForceInline
1645     public final ShortVector and(Vector<Short> v) {
1646         return lanewise(AND, v);
1647     }
1648 
1649     /**
1650      * Computes the bitwise logical conjunction ({@code &})
1651      * of this vector and a scalar.
1652      *
1653      * This is a lane-wise binary operation which applies the
1654      * the primitive bitwise "and" operation ({@code &})
1655      * to each pair of corresponding lane values.
1656      *
1657      * This method is also equivalent to the expression
1658      * {@link #lanewise(VectorOperators.Binary,Vector)
1659      *    lanewise}{@code (}{@link VectorOperators#AND
1660      *    AND}{@code , e)}.
1661      *
1662      * @param e an input scalar
1663      * @return the bitwise {@code &} of this vector and scalar
1664      * @see #and(Vector)
1665      * @see VectorOperators#AND
1666      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1667      */
1668     @ForceInline
1669     public final ShortVector and(short e) {
1670         return lanewise(AND, e);
1671     }
1672 
1673     /**
1674      * Computes the bitwise logical disjunction ({@code |})
1675      * of this vector and a second input vector.
1676      *
1677      * This is a lane-wise binary operation which applies the
1678      * the primitive bitwise "or" operation ({@code |})
1679      * to each pair of corresponding lane values.
1680      *
1681      * This method is also equivalent to the expression
1682      * {@link #lanewise(VectorOperators.Binary,Vector)
1683      *    lanewise}{@code (}{@link VectorOperators#OR
1684      *    AND}{@code , v)}.
1685      *
1686      * <p>
1687      * This is not a full-service named operation like
1688      * {@link #add(Vector) add}.  A masked version of
1689      * this operation is not directly available
1690      * but may be obtained via the masked version of
1691      * {@code lanewise}.
1692      *
1693      * @param v a second input vector
1694      * @return the bitwise {@code |} of this vector and the second input vector
1695      * @see #or(short)
1696      * @see #and(Vector)
1697      * @see #not()
1698      * @see VectorOperators#OR
1699      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1700      */
1701     @ForceInline
1702     public final ShortVector or(Vector<Short> v) {
1703         return lanewise(OR, v);
1704     }
1705 
1706     /**
1707      * Computes the bitwise logical disjunction ({@code |})
1708      * of this vector and a scalar.
1709      *
1710      * This is a lane-wise binary operation which applies the
1711      * the primitive bitwise "or" operation ({@code |})
1712      * to each pair of corresponding lane values.
1713      *
1714      * This method is also equivalent to the expression
1715      * {@link #lanewise(VectorOperators.Binary,Vector)
1716      *    lanewise}{@code (}{@link VectorOperators#OR
1717      *    OR}{@code , e)}.
1718      *
1719      * @param e an input scalar
1720      * @return the bitwise {@code |} of this vector and scalar
1721      * @see #or(Vector)
1722      * @see VectorOperators#OR
1723      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1724      */
1725     @ForceInline
1726     public final ShortVector or(short e) {
1727         return lanewise(OR, e);
1728     }
1729 
1730 
1731 
1732     /// UNARY METHODS
1733 
1734     /**
1735      * {@inheritDoc} <!--workaround-->
1736      */
1737     @Override
1738     @ForceInline
1739     public final
1740     ShortVector neg() {
1741         return lanewise(NEG);
1742     }
1743 
1744     /**
1745      * {@inheritDoc} <!--workaround-->
1746      */
1747     @Override
1748     @ForceInline
1749     public final
1750     ShortVector abs() {
1751         return lanewise(ABS);
1752     }
1753 
1754     // not (~)
1755     /**
1756      * Computes the bitwise logical complement ({@code ~})
1757      * of this vector.
1758      *
1759      * This is a lane-wise binary operation which applies the
1760      * the primitive bitwise "not" operation ({@code ~})
1761      * to each lane value.
1762      *
1763      * This method is also equivalent to the expression
1764      * {@link #lanewise(VectorOperators.Unary)
1765      *    lanewise}{@code (}{@link VectorOperators#NOT
1766      *    NOT}{@code )}.
1767      *
1768      * <p>
1769      * This is not a full-service named operation like
1770      * {@link #add(Vector) add}.  A masked version of
1771      * this operation is not directly available
1772      * but may be obtained via the masked version of
1773      * {@code lanewise}.
1774      *
1775      * @return the bitwise complement {@code ~} of this vector
1776      * @see #and(Vector)
1777      * @see VectorOperators#NOT
1778      * @see #lanewise(VectorOperators.Unary,VectorMask)
1779      */
1780     @ForceInline
1781     public final ShortVector not() {
1782         return lanewise(NOT);
1783     }
1784 
1785 
1786     /// COMPARISONS
1787 
1788     /**
1789      * {@inheritDoc} <!--workaround-->
1790      */
1791     @Override
1792     @ForceInline
1793     public final
1794     VectorMask<Short> eq(Vector<Short> v) {
1795         return compare(EQ, v);
1796     }
1797 
1798     /**
1799      * Tests if this vector is equal to an input scalar.
1800      *
1801      * This is a lane-wise binary test operation which applies
1802      * the primitive equals operation ({@code ==}) to each lane.
1803      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1804      *
1805      * @param e the input scalar
1806      * @return the result mask of testing if this vector
1807      *         is equal to {@code e}
1808      * @see #compare(VectorOperators.Comparison,short)
1809      */
1810     @ForceInline
1811     public final
1812     VectorMask<Short> eq(short e) {
1813         return compare(EQ, e);
1814     }
1815 
1816     /**
1817      * {@inheritDoc} <!--workaround-->
1818      */
1819     @Override
1820     @ForceInline
1821     public final
1822     VectorMask<Short> lt(Vector<Short> v) {
1823         return compare(LT, v);
1824     }
1825 
1826     /**
1827      * Tests if this vector is less than an input scalar.
1828      *
1829      * This is a lane-wise binary test operation which applies
1830      * the primitive less than operation ({@code <}) to each lane.
1831      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1832      *
1833      * @param e the input scalar
1834      * @return the mask result of testing if this vector
1835      *         is less than the input scalar
1836      * @see #compare(VectorOperators.Comparison,short)
1837      */
1838     @ForceInline
1839     public final
1840     VectorMask<Short> lt(short e) {
1841         return compare(LT, e);
1842     }
1843 
1844     /**
1845      * {@inheritDoc} <!--workaround-->
1846      */
1847     @Override
1848     public abstract
1849     VectorMask<Short> test(VectorOperators.Test op);
1850 
1851     /*package-private*/
1852     @ForceInline
1853     final
1854     <M extends VectorMask<Short>>
1855     M testTemplate(Class<M> maskType, Test op) {
1856         ShortSpecies vsp = vspecies();
1857         if (opKind(op, VO_SPECIAL)) {
1858             ShortVector bits = this.viewAsIntegralLanes();
1859             VectorMask<Short> m;
1860             if (op == IS_DEFAULT) {
1861                 m = bits.compare(EQ, (short) 0);
1862             } else if (op == IS_NEGATIVE) {
1863                 m = bits.compare(LT, (short) 0);
1864             }
1865             else {
1866                 throw new AssertionError(op);
1867             }
1868             return maskType.cast(m);
1869         }
1870         int opc = opCode(op);
1871         throw new AssertionError(op);
1872     }
1873 
1874     /**
1875      * {@inheritDoc} <!--workaround-->
1876      */
1877     @Override
1878     @ForceInline
1879     public final
1880     VectorMask<Short> test(VectorOperators.Test op,
1881                                   VectorMask<Short> m) {
1882         return test(op).and(m);
1883     }
1884 
1885     /**
1886      * {@inheritDoc} <!--workaround-->
1887      */
1888     @Override
1889     public abstract
1890     VectorMask<Short> compare(VectorOperators.Comparison op, Vector<Short> v);
1891 
1892     /*package-private*/
1893     @ForceInline
1894     final
1895     <M extends VectorMask<Short>>
1896     M compareTemplate(Class<M> maskType, Comparison op, Vector<Short> v) {
1897         ShortVector that = (ShortVector) v;
1898         that.check(this);
1899         int opc = opCode(op);
1900         return VectorSupport.compare(
1901             opc, getClass(), maskType, short.class, length(),
1902             this, that, null,
1903             (cond, v0, v1, m1) -> {
1904                 AbstractMask<Short> m
1905                     = v0.bTest(cond, v1, (cond_, i, a, b)
1906                                -> compareWithOp(cond, a, b));
1907                 @SuppressWarnings("unchecked")
1908                 M m2 = (M) m;
1909                 return m2;
1910             });
1911     }
1912 
1913     /*package-private*/
1914     @ForceInline
1915     final
1916     <M extends VectorMask<Short>>
1917     M compareTemplate(Class<M> maskType, Comparison op, Vector<Short> v, M m) {
1918         ShortVector that = (ShortVector) v;
1919         that.check(this);
1920         m.check(maskType, this);
1921         int opc = opCode(op);
1922         return VectorSupport.compare(
1923             opc, getClass(), maskType, short.class, length(),
1924             this, that, m,
1925             (cond, v0, v1, m1) -> {
1926                 AbstractMask<Short> cmpM
1927                     = v0.bTest(cond, v1, (cond_, i, a, b)
1928                                -> compareWithOp(cond, a, b));
1929                 @SuppressWarnings("unchecked")
1930                 M m2 = (M) cmpM.and(m1);
1931                 return m2;
1932             });
1933     }
1934 
1935     @ForceInline
1936     private static boolean compareWithOp(int cond, short a, short b) {
1937         return switch (cond) {
1938             case BT_eq -> a == b;
1939             case BT_ne -> a != b;
1940             case BT_lt -> a < b;
1941             case BT_le -> a <= b;
1942             case BT_gt -> a > b;
1943             case BT_ge -> a >= b;
1944             case BT_ult -> Short.compareUnsigned(a, b) < 0;
1945             case BT_ule -> Short.compareUnsigned(a, b) <= 0;
1946             case BT_ugt -> Short.compareUnsigned(a, b) > 0;
1947             case BT_uge -> Short.compareUnsigned(a, b) >= 0;
1948             default -> throw new AssertionError();
1949         };
1950     }
1951 
1952     /**
1953      * Tests this vector by comparing it with an input scalar,
1954      * according to the given comparison operation.
1955      *
1956      * This is a lane-wise binary test operation which applies
1957      * the comparison operation to each lane.
1958      * <p>
1959      * The result is the same as
1960      * {@code compare(op, broadcast(species(), e))}.
1961      * That is, the scalar may be regarded as broadcast to
1962      * a vector of the same species, and then compared
1963      * against the original vector, using the selected
1964      * comparison operation.
1965      *
1966      * @param op the operation used to compare lane values
1967      * @param e the input scalar
1968      * @return the mask result of testing lane-wise if this vector
1969      *         compares to the input, according to the selected
1970      *         comparison operator
1971      * @see ShortVector#compare(VectorOperators.Comparison,Vector)
1972      * @see #eq(short)
1973      * @see #lt(short)
1974      */
1975     public abstract
1976     VectorMask<Short> compare(Comparison op, short e);
1977 
1978     /*package-private*/
1979     @ForceInline
1980     final
1981     <M extends VectorMask<Short>>
1982     M compareTemplate(Class<M> maskType, Comparison op, short e) {
1983         return compareTemplate(maskType, op, broadcast(e));
1984     }
1985 
1986     /**
1987      * Tests this vector by comparing it with an input scalar,
1988      * according to the given comparison operation,
1989      * in lanes selected by a mask.
1990      *
1991      * This is a masked lane-wise binary test operation which applies
1992      * to each pair of corresponding lane values.
1993      *
1994      * The returned result is equal to the expression
1995      * {@code compare(op,s).and(m)}.
1996      *
1997      * @param op the operation used to compare lane values
1998      * @param e the input scalar
1999      * @param m the mask controlling lane selection
2000      * @return the mask result of testing lane-wise if this vector
2001      *         compares to the input, according to the selected
2002      *         comparison operator,
2003      *         and only in the lanes selected by the mask
2004      * @see ShortVector#compare(VectorOperators.Comparison,Vector,VectorMask)
2005      */
2006     @ForceInline
2007     public final VectorMask<Short> compare(VectorOperators.Comparison op,
2008                                                short e,
2009                                                VectorMask<Short> m) {
2010         return compare(op, broadcast(e), m);
2011     }
2012 
2013     /**
2014      * {@inheritDoc} <!--workaround-->
2015      */
2016     @Override
2017     public abstract
2018     VectorMask<Short> compare(Comparison op, long e);
2019 
2020     /*package-private*/
2021     @ForceInline
2022     final
2023     <M extends VectorMask<Short>>
2024     M compareTemplate(Class<M> maskType, Comparison op, long e) {
2025         return compareTemplate(maskType, op, broadcast(e));
2026     }
2027 
2028     /**
2029      * {@inheritDoc} <!--workaround-->
2030      */
2031     @Override
2032     @ForceInline
2033     public final
2034     VectorMask<Short> compare(Comparison op, long e, VectorMask<Short> m) {
2035         return compare(op, broadcast(e), m);
2036     }
2037 
2038 
2039 
2040     /**
2041      * {@inheritDoc} <!--workaround-->
2042      */
2043     @Override public abstract
2044     ShortVector blend(Vector<Short> v, VectorMask<Short> m);
2045 
2046     /*package-private*/
2047     @ForceInline
2048     final
2049     <M extends VectorMask<Short>>
2050     ShortVector
2051     blendTemplate(Class<M> maskType, ShortVector v, M m) {
2052         v.check(this);
2053         return VectorSupport.blend(
2054             getClass(), maskType, short.class, length(),
2055             this, v, m,
2056             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
2057     }
2058 
2059     /**
2060      * {@inheritDoc} <!--workaround-->
2061      */
2062     @Override public abstract ShortVector addIndex(int scale);
2063 
2064     /*package-private*/
2065     @ForceInline
2066     final ShortVector addIndexTemplate(int scale) {
2067         ShortSpecies vsp = vspecies();
2068         // make sure VLENGTH*scale doesn't overflow:
2069         vsp.checkScale(scale);
2070         return VectorSupport.indexVector(
2071             getClass(), short.class, length(),
2072             this, scale, vsp,
2073             (v, scale_, s)
2074             -> {
2075                 // If the platform doesn't support an INDEX
2076                 // instruction directly, load IOTA from memory
2077                 // and multiply.
2078                 ShortVector iota = s.iota();
2079                 short sc = (short) scale_;
2080                 return v.add(sc == 1 ? iota : iota.mul(sc));
2081             });
2082     }
2083 
2084     /**
2085      * Replaces selected lanes of this vector with
2086      * a scalar value
2087      * under the control of a mask.
2088      *
2089      * This is a masked lane-wise binary operation which
2090      * selects each lane value from one or the other input.
2091      *
2092      * The returned result is equal to the expression
2093      * {@code blend(broadcast(e),m)}.
2094      *
2095      * @param e the input scalar, containing the replacement lane value
2096      * @param m the mask controlling lane selection of the scalar
2097      * @return the result of blending the lane elements of this vector with
2098      *         the scalar value
2099      */
2100     @ForceInline
2101     public final ShortVector blend(short e,
2102                                             VectorMask<Short> m) {
2103         return blend(broadcast(e), m);
2104     }
2105 
2106     /**
2107      * Replaces selected lanes of this vector with
2108      * a scalar value
2109      * under the control of a mask.
2110      *
2111      * This is a masked lane-wise binary operation which
2112      * selects each lane value from one or the other input.
2113      *
2114      * The returned result is equal to the expression
2115      * {@code blend(broadcast(e),m)}.
2116      *
2117      * @param e the input scalar, containing the replacement lane value
2118      * @param m the mask controlling lane selection of the scalar
2119      * @return the result of blending the lane elements of this vector with
2120      *         the scalar value
2121      */
2122     @ForceInline
2123     public final ShortVector blend(long e,
2124                                             VectorMask<Short> m) {
2125         return blend(broadcast(e), m);
2126     }
2127 
2128     /**
2129      * {@inheritDoc} <!--workaround-->
2130      */
2131     @Override
2132     public abstract
2133     ShortVector slice(int origin, Vector<Short> v1);
2134 
2135     /*package-private*/
2136     final
2137     @ForceInline
2138     ShortVector sliceTemplate(int origin, Vector<Short> v1) {
2139         ShortVector that = (ShortVector) v1;
2140         that.check(this);
2141         Objects.checkIndex(origin, length() + 1);
2142         VectorShuffle<Short> iota = iotaShuffle();
2143         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
2144         iota = iotaShuffle(origin, 1, true);
2145         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
2146     }
2147 
2148     /**
2149      * {@inheritDoc} <!--workaround-->
2150      */
2151     @Override
2152     @ForceInline
2153     public final
2154     ShortVector slice(int origin,
2155                                Vector<Short> w,
2156                                VectorMask<Short> m) {
2157         return broadcast(0).blend(slice(origin, w), m);
2158     }
2159 
2160     /**
2161      * {@inheritDoc} <!--workaround-->
2162      */
2163     @Override
2164     public abstract
2165     ShortVector slice(int origin);
2166 
2167     /*package-private*/
2168     final
2169     @ForceInline
2170     ShortVector sliceTemplate(int origin) {
2171         Objects.checkIndex(origin, length() + 1);
2172         VectorShuffle<Short> iota = iotaShuffle();
2173         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
2174         iota = iotaShuffle(origin, 1, true);
2175         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2176     }
2177 
2178     /**
2179      * {@inheritDoc} <!--workaround-->
2180      */
2181     @Override
2182     public abstract
2183     ShortVector unslice(int origin, Vector<Short> w, int part);
2184 
2185     /*package-private*/
2186     final
2187     @ForceInline
2188     ShortVector
2189     unsliceTemplate(int origin, Vector<Short> w, int part) {
2190         ShortVector that = (ShortVector) w;
2191         that.check(this);
2192         Objects.checkIndex(origin, length() + 1);
2193         VectorShuffle<Short> iota = iotaShuffle();
2194         VectorMask<Short> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
2195                                                                   (broadcast((short)(origin))));
2196         iota = iotaShuffle(-origin, 1, true);
2197         return that.blend(this.rearrange(iota), blendMask);
2198     }
2199 
2200     /*package-private*/
2201     final
2202     @ForceInline
2203     <M extends VectorMask<Short>>
2204     ShortVector
2205     unsliceTemplate(Class<M> maskType, int origin, Vector<Short> w, int part, M m) {
2206         ShortVector that = (ShortVector) w;
2207         that.check(this);
2208         ShortVector slice = that.sliceTemplate(origin, that);
2209         slice = slice.blendTemplate(maskType, this, m);
2210         return slice.unsliceTemplate(origin, w, part);
2211     }
2212 
2213     /**
2214      * {@inheritDoc} <!--workaround-->
2215      */
2216     @Override
2217     public abstract
2218     ShortVector unslice(int origin, Vector<Short> w, int part, VectorMask<Short> m);
2219 
2220     /**
2221      * {@inheritDoc} <!--workaround-->
2222      */
2223     @Override
2224     public abstract
2225     ShortVector unslice(int origin);
2226 
2227     /*package-private*/
2228     final
2229     @ForceInline
2230     ShortVector
2231     unsliceTemplate(int origin) {
2232         Objects.checkIndex(origin, length() + 1);
2233         VectorShuffle<Short> iota = iotaShuffle();
2234         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.GE,
2235                                                                   (broadcast((short)(origin))));
2236         iota = iotaShuffle(-origin, 1, true);
2237         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2238     }
2239 
2240     private ArrayIndexOutOfBoundsException
2241     wrongPartForSlice(int part) {
2242         String msg = String.format("bad part number %d for slice operation",
2243                                    part);
2244         return new ArrayIndexOutOfBoundsException(msg);
2245     }
2246 
2247     /**
2248      * {@inheritDoc} <!--workaround-->
2249      */
2250     @Override
2251     public abstract
2252     ShortVector rearrange(VectorShuffle<Short> m);
2253 
2254     /*package-private*/
2255     @ForceInline
2256     final
2257     <S extends VectorShuffle<Short>>
2258     ShortVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2259         shuffle.checkIndexes();
2260         return VectorSupport.rearrangeOp(
2261             getClass(), shuffletype, null, short.class, length(),
2262             this, shuffle, null,
2263             (v1, s_, m_) -> v1.uOp((i, a) -> {
2264                 int ei = s_.laneSource(i);
2265                 return v1.lane(ei);
2266             }));
2267     }
2268 
2269     /**
2270      * {@inheritDoc} <!--workaround-->
2271      */
2272     @Override
2273     public abstract
2274     ShortVector rearrange(VectorShuffle<Short> s,
2275                                    VectorMask<Short> m);
2276 
2277     /*package-private*/
2278     @ForceInline
2279     final
2280     <S extends VectorShuffle<Short>, M extends VectorMask<Short>>
2281     ShortVector rearrangeTemplate(Class<S> shuffletype,
2282                                            Class<M> masktype,
2283                                            S shuffle,
2284                                            M m) {
2285 
2286         m.check(masktype, this);
2287         VectorMask<Short> valid = shuffle.laneIsValid();
2288         if (m.andNot(valid).anyTrue()) {
2289             shuffle.checkIndexes();
2290             throw new AssertionError();
2291         }
2292         return VectorSupport.rearrangeOp(
2293                    getClass(), shuffletype, masktype, short.class, length(),
2294                    this, shuffle, m,
2295                    (v1, s_, m_) -> v1.uOp((i, a) -> {
2296                         int ei = s_.laneSource(i);
2297                         return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
2298                    }));
2299     }
2300 
2301     /**
2302      * {@inheritDoc} <!--workaround-->
2303      */
2304     @Override
2305     public abstract
2306     ShortVector rearrange(VectorShuffle<Short> s,
2307                                    Vector<Short> v);
2308 
2309     /*package-private*/
2310     @ForceInline
2311     final
2312     <S extends VectorShuffle<Short>>
2313     ShortVector rearrangeTemplate(Class<S> shuffletype,
2314                                            S shuffle,
2315                                            ShortVector v) {
2316         VectorMask<Short> valid = shuffle.laneIsValid();
2317         @SuppressWarnings("unchecked")
2318         S ws = (S) shuffle.wrapIndexes();
2319         ShortVector r0 =
2320             VectorSupport.rearrangeOp(
2321                 getClass(), shuffletype, null, short.class, length(),
2322                 this, ws, null,
2323                 (v0, s_, m_) -> v0.uOp((i, a) -> {
2324                     int ei = s_.laneSource(i);
2325                     return v0.lane(ei);
2326                 }));
2327         ShortVector r1 =
2328             VectorSupport.rearrangeOp(
2329                 getClass(), shuffletype, null, short.class, length(),
2330                 v, ws, null,
2331                 (v1, s_, m_) -> v1.uOp((i, a) -> {
2332                     int ei = s_.laneSource(i);
2333                     return v1.lane(ei);
2334                 }));
2335         return r1.blend(r0, valid);
2336     }
2337 
2338     @ForceInline
2339     private final
2340     VectorShuffle<Short> toShuffle0(ShortSpecies dsp) {
2341         short[] a = toArray();
2342         int[] sa = new int[a.length];
2343         for (int i = 0; i < a.length; i++) {
2344             sa[i] = (int) a[i];
2345         }
2346         return VectorShuffle.fromArray(dsp, sa, 0);
2347     }
2348 
2349     /*package-private*/
2350     @ForceInline
2351     final
2352     VectorShuffle<Short> toShuffleTemplate(Class<?> shuffleType) {
2353         ShortSpecies vsp = vspecies();
2354         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2355                                      getClass(), short.class, length(),
2356                                      shuffleType, byte.class, length(),
2357                                      this, vsp,
2358                                      ShortVector::toShuffle0);
2359     }
2360 
2361     /**
2362      * {@inheritDoc} <!--workaround-->
2363      */
2364     @Override
2365     public abstract
2366     ShortVector selectFrom(Vector<Short> v);
2367 
2368     /*package-private*/
2369     @ForceInline
2370     final ShortVector selectFromTemplate(ShortVector v) {
2371         return v.rearrange(this.toShuffle());
2372     }
2373 
2374     /**
2375      * {@inheritDoc} <!--workaround-->
2376      */
2377     @Override
2378     public abstract
2379     ShortVector selectFrom(Vector<Short> s, VectorMask<Short> m);
2380 
2381     /*package-private*/
2382     @ForceInline
2383     final ShortVector selectFromTemplate(ShortVector v,
2384                                                   AbstractMask<Short> m) {
2385         return v.rearrange(this.toShuffle(), m);
2386     }
2387 
2388     /// Ternary operations
2389 
2390     /**
2391      * Blends together the bits of two vectors under
2392      * the control of a third, which supplies mask bits.
2393      *
2394      * This is a lane-wise ternary operation which performs
2395      * a bitwise blending operation {@code (a&~c)|(b&c)}
2396      * to each lane.
2397      *
2398      * This method is also equivalent to the expression
2399      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2400      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2401      *    BITWISE_BLEND}{@code , bits, mask)}.
2402      *
2403      * @param bits input bits to blend into the current vector
2404      * @param mask a bitwise mask to enable blending of the input bits
2405      * @return the bitwise blend of the given bits into the current vector,
2406      *         under control of the bitwise mask
2407      * @see #bitwiseBlend(short,short)
2408      * @see #bitwiseBlend(short,Vector)
2409      * @see #bitwiseBlend(Vector,short)
2410      * @see VectorOperators#BITWISE_BLEND
2411      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2412      */
2413     @ForceInline
2414     public final
2415     ShortVector bitwiseBlend(Vector<Short> bits, Vector<Short> mask) {
2416         return lanewise(BITWISE_BLEND, bits, mask);
2417     }
2418 
2419     /**
2420      * Blends together the bits of a vector and a scalar under
2421      * the control of another scalar, which supplies mask bits.
2422      *
2423      * This is a lane-wise ternary operation which performs
2424      * a bitwise blending operation {@code (a&~c)|(b&c)}
2425      * to each lane.
2426      *
2427      * This method is also equivalent to the expression
2428      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2429      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2430      *    BITWISE_BLEND}{@code , bits, mask)}.
2431      *
2432      * @param bits input bits to blend into the current vector
2433      * @param mask a bitwise mask to enable blending of the input bits
2434      * @return the bitwise blend of the given bits into the current vector,
2435      *         under control of the bitwise mask
2436      * @see #bitwiseBlend(Vector,Vector)
2437      * @see VectorOperators#BITWISE_BLEND
2438      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
2439      */
2440     @ForceInline
2441     public final
2442     ShortVector bitwiseBlend(short bits, short mask) {
2443         return lanewise(BITWISE_BLEND, bits, mask);
2444     }
2445 
2446     /**
2447      * Blends together the bits of a vector and a scalar under
2448      * the control of another vector, which supplies mask bits.
2449      *
2450      * This is a lane-wise ternary operation which performs
2451      * a bitwise blending operation {@code (a&~c)|(b&c)}
2452      * to each lane.
2453      *
2454      * This method is also equivalent to the expression
2455      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2456      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2457      *    BITWISE_BLEND}{@code , bits, mask)}.
2458      *
2459      * @param bits input bits to blend into the current vector
2460      * @param mask a bitwise mask to enable blending of the input bits
2461      * @return the bitwise blend of the given bits into the current vector,
2462      *         under control of the bitwise mask
2463      * @see #bitwiseBlend(Vector,Vector)
2464      * @see VectorOperators#BITWISE_BLEND
2465      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
2466      */
2467     @ForceInline
2468     public final
2469     ShortVector bitwiseBlend(short bits, Vector<Short> mask) {
2470         return lanewise(BITWISE_BLEND, bits, mask);
2471     }
2472 
2473     /**
2474      * Blends together the bits of two vectors under
2475      * the control of a scalar, which supplies mask bits.
2476      *
2477      * This is a lane-wise ternary operation which performs
2478      * a bitwise blending operation {@code (a&~c)|(b&c)}
2479      * to each lane.
2480      *
2481      * This method is also equivalent to the expression
2482      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2483      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2484      *    BITWISE_BLEND}{@code , bits, mask)}.
2485      *
2486      * @param bits input bits to blend into the current vector
2487      * @param mask a bitwise mask to enable blending of the input bits
2488      * @return the bitwise blend of the given bits into the current vector,
2489      *         under control of the bitwise mask
2490      * @see #bitwiseBlend(Vector,Vector)
2491      * @see VectorOperators#BITWISE_BLEND
2492      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
2493      */
2494     @ForceInline
2495     public final
2496     ShortVector bitwiseBlend(Vector<Short> bits, short mask) {
2497         return lanewise(BITWISE_BLEND, bits, mask);
2498     }
2499 
2500 
2501     // Type specific horizontal reductions
2502 
2503     /**
2504      * Returns a value accumulated from all the lanes of this vector.
2505      *
2506      * This is an associative cross-lane reduction operation which
2507      * applies the specified operation to all the lane elements.
2508      * <p>
2509      * A few reduction operations do not support arbitrary reordering
2510      * of their operands, yet are included here because of their
2511      * usefulness.
2512      * <ul>
2513      * <li>
2514      * In the case of {@code FIRST_NONZERO}, the reduction returns
2515      * the value from the lowest-numbered non-zero lane.
2516      * <li>
2517      * All other reduction operations are fully commutative and
2518      * associative.  The implementation can choose any order of
2519      * processing, yet it will always produce the same result.
2520      * </ul>
2521      *
2522      * @param op the operation used to combine lane values
2523      * @return the accumulated result
2524      * @throws UnsupportedOperationException if this vector does
2525      *         not support the requested operation
2526      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2527      * @see #add(Vector)
2528      * @see #mul(Vector)
2529      * @see #min(Vector)
2530      * @see #max(Vector)
2531      * @see #and(Vector)
2532      * @see #or(Vector)
2533      * @see VectorOperators#XOR
2534      * @see VectorOperators#FIRST_NONZERO
2535      */
2536     public abstract short reduceLanes(VectorOperators.Associative op);
2537 
2538     /**
2539      * Returns a value accumulated from selected lanes of this vector,
2540      * controlled by a mask.
2541      *
2542      * This is an associative cross-lane reduction operation which
2543      * applies the specified operation to the selected lane elements.
2544      * <p>
2545      * If no elements are selected, an operation-specific identity
2546      * value is returned.
2547      * <ul>
2548      * <li>
2549      * If the operation is
2550      *  {@code ADD}, {@code XOR}, {@code OR},
2551      * or {@code FIRST_NONZERO},
2552      * then the identity value is zero, the default {@code short} value.
2553      * <li>
2554      * If the operation is {@code MUL},
2555      * then the identity value is one.
2556      * <li>
2557      * If the operation is {@code AND},
2558      * then the identity value is minus one (all bits set).
2559      * <li>
2560      * If the operation is {@code MAX},
2561      * then the identity value is {@code Short.MIN_VALUE}.
2562      * <li>
2563      * If the operation is {@code MIN},
2564      * then the identity value is {@code Short.MAX_VALUE}.
2565      * </ul>
2566      * <p>
2567      * A few reduction operations do not support arbitrary reordering
2568      * of their operands, yet are included here because of their
2569      * usefulness.
2570      * <ul>
2571      * <li>
2572      * In the case of {@code FIRST_NONZERO}, the reduction returns
2573      * the value from the lowest-numbered non-zero lane.
2574      * <li>
2575      * All other reduction operations are fully commutative and
2576      * associative.  The implementation can choose any order of
2577      * processing, yet it will always produce the same result.
2578      * </ul>
2579      *
2580      * @param op the operation used to combine lane values
2581      * @param m the mask controlling lane selection
2582      * @return the reduced result accumulated from the selected lane values
2583      * @throws UnsupportedOperationException if this vector does
2584      *         not support the requested operation
2585      * @see #reduceLanes(VectorOperators.Associative)
2586      */
2587     public abstract short reduceLanes(VectorOperators.Associative op,
2588                                        VectorMask<Short> m);
2589 
2590     /*package-private*/
2591     @ForceInline
2592     final
2593     short reduceLanesTemplate(VectorOperators.Associative op,
2594                                Class<? extends VectorMask<Short>> maskClass,
2595                                VectorMask<Short> m) {
2596         m.check(maskClass, this);
2597         if (op == FIRST_NONZERO) {
2598             ShortVector v = reduceIdentityVector(op).blend(this, m);
2599             return v.reduceLanesTemplate(op);
2600         }
2601         int opc = opCode(op);
2602         return fromBits(VectorSupport.reductionCoerced(
2603             opc, getClass(), maskClass, short.class, length(),
2604             this, m,
2605             REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations)));
2606     }
2607 
2608     /*package-private*/
2609     @ForceInline
2610     final
2611     short reduceLanesTemplate(VectorOperators.Associative op) {
2612         if (op == FIRST_NONZERO) {
2613             // FIXME:  The JIT should handle this, and other scan ops alos.
2614             VectorMask<Short> thisNZ
2615                 = this.viewAsIntegralLanes().compare(NE, (short) 0);
2616             return this.lane(thisNZ.firstTrue());
2617         }
2618         int opc = opCode(op);
2619         return fromBits(VectorSupport.reductionCoerced(
2620             opc, getClass(), null, short.class, length(),
2621             this, null,
2622             REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations)));
2623     }
2624 
2625     private static final
2626     ImplCache<Associative, ReductionOperation<ShortVector, VectorMask<Short>>>
2627         REDUCE_IMPL = new ImplCache<>(Associative.class, ShortVector.class);
2628 
2629     private static ReductionOperation<ShortVector, VectorMask<Short>> reductionOperations(int opc_) {
2630         switch (opc_) {
2631             case VECTOR_OP_ADD: return (v, m) ->
2632                     toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a + b)));
2633             case VECTOR_OP_MUL: return (v, m) ->
2634                     toBits(v.rOp((short)1, m, (i, a, b) -> (short)(a * b)));
2635             case VECTOR_OP_MIN: return (v, m) ->
2636                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (short) Math.min(a, b)));
2637             case VECTOR_OP_MAX: return (v, m) ->
2638                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (short) Math.max(a, b)));
2639             case VECTOR_OP_AND: return (v, m) ->
2640                     toBits(v.rOp((short)-1, m, (i, a, b) -> (short)(a & b)));
2641             case VECTOR_OP_OR: return (v, m) ->
2642                     toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a | b)));
2643             case VECTOR_OP_XOR: return (v, m) ->
2644                     toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a ^ b)));
2645             default: return null;
2646         }
2647     }
2648 
2649     private
2650     @ForceInline
2651     ShortVector reduceIdentityVector(VectorOperators.Associative op) {
2652         int opc = opCode(op);
2653         UnaryOperator<ShortVector> fn
2654             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2655                 switch (opc_) {
2656                 case VECTOR_OP_ADD:
2657                 case VECTOR_OP_OR:
2658                 case VECTOR_OP_XOR:
2659                     return v -> v.broadcast(0);
2660                 case VECTOR_OP_MUL:
2661                     return v -> v.broadcast(1);
2662                 case VECTOR_OP_AND:
2663                     return v -> v.broadcast(-1);
2664                 case VECTOR_OP_MIN:
2665                     return v -> v.broadcast(MAX_OR_INF);
2666                 case VECTOR_OP_MAX:
2667                     return v -> v.broadcast(MIN_OR_INF);
2668                 default: return null;
2669                 }
2670             });
2671         return fn.apply(this);
2672     }
2673     private static final
2674     ImplCache<Associative,UnaryOperator<ShortVector>> REDUCE_ID_IMPL
2675         = new ImplCache<>(Associative.class, ShortVector.class);
2676 
2677     private static final short MIN_OR_INF = Short.MIN_VALUE;
2678     private static final short MAX_OR_INF = Short.MAX_VALUE;
2679 
2680     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2681     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2682                                                      VectorMask<Short> m);
2683 
2684     // Type specific accessors
2685 
2686     /**
2687      * Gets the lane element at lane index {@code i}
2688      *
2689      * @param i the lane index
2690      * @return the lane element at lane index {@code i}
2691      * @throws IllegalArgumentException if the index is is out of range
2692      * ({@code < 0 || >= length()})
2693      */
2694     public abstract short lane(int i);
2695 
2696     /**
2697      * Replaces the lane element of this vector at lane index {@code i} with
2698      * value {@code e}.
2699      *
2700      * This is a cross-lane operation and behaves as if it returns the result
2701      * of blending this vector with an input vector that is the result of
2702      * broadcasting {@code e} and a mask that has only one lane set at lane
2703      * index {@code i}.
2704      *
2705      * @param i the lane index of the lane element to be replaced
2706      * @param e the value to be placed
2707      * @return the result of replacing the lane element of this vector at lane
2708      * index {@code i} with value {@code e}.
2709      * @throws IllegalArgumentException if the index is is out of range
2710      * ({@code < 0 || >= length()})
2711      */
2712     public abstract ShortVector withLane(int i, short e);
2713 
2714     // Memory load operations
2715 
2716     /**
2717      * Returns an array of type {@code short[]}
2718      * containing all the lane values.
2719      * The array length is the same as the vector length.
2720      * The array elements are stored in lane order.
2721      * <p>
2722      * This method behaves as if it stores
2723      * this vector into an allocated array
2724      * (using {@link #intoArray(short[], int) intoArray})
2725      * and returns the array as follows:
2726      * <pre>{@code
2727      *   short[] a = new short[this.length()];
2728      *   this.intoArray(a, 0);
2729      *   return a;
2730      * }</pre>
2731      *
2732      * @return an array containing the lane values of this vector
2733      */
2734     @ForceInline
2735     @Override
2736     public final short[] toArray() {
2737         short[] a = new short[vspecies().laneCount()];
2738         intoArray(a, 0);
2739         return a;
2740     }
2741 
2742     /** {@inheritDoc} <!--workaround-->
2743      * @implNote
2744      * When this method is used on used on vectors
2745      * of type {@code ShortVector},
2746      * there will be no loss of precision or range,
2747      * and so no {@code UnsupportedOperationException} will
2748      * be thrown.
2749      */
2750     @ForceInline
2751     @Override
2752     public final int[] toIntArray() {
2753         short[] a = toArray();
2754         int[] res = new int[a.length];
2755         for (int i = 0; i < a.length; i++) {
2756             short e = a[i];
2757             res[i] = (int) ShortSpecies.toIntegralChecked(e, true);
2758         }
2759         return res;
2760     }
2761 
2762     /** {@inheritDoc} <!--workaround-->
2763      * @implNote
2764      * When this method is used on used on vectors
2765      * of type {@code ShortVector},
2766      * there will be no loss of precision or range,
2767      * and so no {@code UnsupportedOperationException} will
2768      * be thrown.
2769      */
2770     @ForceInline
2771     @Override
2772     public final long[] toLongArray() {
2773         short[] a = toArray();
2774         long[] res = new long[a.length];
2775         for (int i = 0; i < a.length; i++) {
2776             short e = a[i];
2777             res[i] = ShortSpecies.toIntegralChecked(e, false);
2778         }
2779         return res;
2780     }
2781 
2782     /** {@inheritDoc} <!--workaround-->
2783      * @implNote
2784      * When this method is used on used on vectors
2785      * of type {@code ShortVector},
2786      * there will be no loss of precision.
2787      */
2788     @ForceInline
2789     @Override
2790     public final double[] toDoubleArray() {
2791         short[] a = toArray();
2792         double[] res = new double[a.length];
2793         for (int i = 0; i < a.length; i++) {
2794             res[i] = (double) a[i];
2795         }
2796         return res;
2797     }
2798 
2799     /**
2800      * Loads a vector from a byte array starting at an offset.
2801      * Bytes are composed into primitive lane elements according
2802      * to the specified byte order.
2803      * The vector is arranged into lanes according to
2804      * <a href="Vector.html#lane-order">memory ordering</a>.
2805      * <p>
2806      * This method behaves as if it returns the result of calling
2807      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2808      * fromByteBuffer()} as follows:
2809      * <pre>{@code
2810      * var bb = ByteBuffer.wrap(a);
2811      * var m = species.maskAll(true);
2812      * return fromByteBuffer(species, bb, offset, bo, m);
2813      * }</pre>
2814      *
2815      * @param species species of desired vector
2816      * @param a the byte array
2817      * @param offset the offset into the array
2818      * @param bo the intended byte order
2819      * @return a vector loaded from a byte array
2820      * @throws IndexOutOfBoundsException
2821      *         if {@code offset+N*ESIZE < 0}
2822      *         or {@code offset+(N+1)*ESIZE > a.length}
2823      *         for any lane {@code N} in the vector
2824      */
2825     @ForceInline
2826     public static
2827     ShortVector fromByteArray(VectorSpecies<Short> species,
2828                                        byte[] a, int offset,
2829                                        ByteOrder bo) {
2830         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
2831         ShortSpecies vsp = (ShortSpecies) species;
2832         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
2833     }
2834 
2835     /**
2836      * Loads a vector from a byte array starting at an offset
2837      * and using a mask.
2838      * Lanes where the mask is unset are filled with the default
2839      * value of {@code short} (zero).
2840      * Bytes are composed into primitive lane elements according
2841      * to the specified byte order.
2842      * The vector is arranged into lanes according to
2843      * <a href="Vector.html#lane-order">memory ordering</a>.
2844      * <p>
2845      * This method behaves as if it returns the result of calling
2846      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2847      * fromByteBuffer()} as follows:
2848      * <pre>{@code
2849      * var bb = ByteBuffer.wrap(a);
2850      * return fromByteBuffer(species, bb, offset, bo, m);
2851      * }</pre>
2852      *
2853      * @param species species of desired vector
2854      * @param a the byte array
2855      * @param offset the offset into the array
2856      * @param bo the intended byte order
2857      * @param m the mask controlling lane selection
2858      * @return a vector loaded from a byte array
2859      * @throws IndexOutOfBoundsException
2860      *         if {@code offset+N*ESIZE < 0}
2861      *         or {@code offset+(N+1)*ESIZE > a.length}
2862      *         for any lane {@code N} in the vector
2863      *         where the mask is set
2864      */
2865     @ForceInline
2866     public static
2867     ShortVector fromByteArray(VectorSpecies<Short> species,
2868                                        byte[] a, int offset,
2869                                        ByteOrder bo,
2870                                        VectorMask<Short> m) {
2871         ShortSpecies vsp = (ShortSpecies) species;
2872         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2873             return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
2874         }
2875 
2876         // FIXME: optimize
2877         checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
2878         ByteBuffer wb = wrapper(a, bo);
2879         return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
2880                    (wb_, o, i)  -> wb_.getShort(o + i * 2));
2881     }
2882 
2883     /**
2884      * Loads a vector from an array of type {@code short[]}
2885      * starting at an offset.
2886      * For each vector lane, where {@code N} is the vector lane index, the
2887      * array element at index {@code offset + N} is placed into the
2888      * resulting vector at lane index {@code N}.
2889      *
2890      * @param species species of desired vector
2891      * @param a the array
2892      * @param offset the offset into the array
2893      * @return the vector loaded from an array
2894      * @throws IndexOutOfBoundsException
2895      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2896      *         for any lane {@code N} in the vector
2897      */
2898     @ForceInline
2899     public static
2900     ShortVector fromArray(VectorSpecies<Short> species,
2901                                    short[] a, int offset) {
2902         offset = checkFromIndexSize(offset, species.length(), a.length);
2903         ShortSpecies vsp = (ShortSpecies) species;
2904         return vsp.dummyVector().fromArray0(a, offset);
2905     }
2906 
2907     /**
2908      * Loads a vector from an array of type {@code short[]}
2909      * starting at an offset and using a mask.
2910      * Lanes where the mask is unset are filled with the default
2911      * value of {@code short} (zero).
2912      * For each vector lane, where {@code N} is the vector lane index,
2913      * if the mask lane at index {@code N} is set then the array element at
2914      * index {@code offset + N} is placed into the resulting vector at lane index
2915      * {@code N}, otherwise the default element value is placed into the
2916      * resulting vector at lane index {@code N}.
2917      *
2918      * @param species species of desired vector
2919      * @param a the array
2920      * @param offset the offset into the array
2921      * @param m the mask controlling lane selection
2922      * @return the vector loaded from an array
2923      * @throws IndexOutOfBoundsException
2924      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2925      *         for any lane {@code N} in the vector
2926      *         where the mask is set
2927      */
2928     @ForceInline
2929     public static
2930     ShortVector fromArray(VectorSpecies<Short> species,
2931                                    short[] a, int offset,
2932                                    VectorMask<Short> m) {
2933         ShortSpecies vsp = (ShortSpecies) species;
2934         if (offset >= 0 && offset <= (a.length - species.length())) {
2935             return vsp.dummyVector().fromArray0(a, offset, m);
2936         }
2937 
2938         // FIXME: optimize
2939         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2940         return vsp.vOp(m, i -> a[offset + i]);
2941     }
2942 
2943     /**
2944      * Gathers a new vector composed of elements from an array of type
2945      * {@code short[]},
2946      * using indexes obtained by adding a fixed {@code offset} to a
2947      * series of secondary offsets from an <em>index map</em>.
2948      * The index map is a contiguous sequence of {@code VLENGTH}
2949      * elements in a second array of {@code int}s, starting at a given
2950      * {@code mapOffset}.
2951      * <p>
2952      * For each vector lane, where {@code N} is the vector lane index,
2953      * the lane is loaded from the array
2954      * element {@code a[f(N)]}, where {@code f(N)} is the
2955      * index mapping expression
2956      * {@code offset + indexMap[mapOffset + N]]}.
2957      *
2958      * @param species species of desired vector
2959      * @param a the array
2960      * @param offset the offset into the array, may be negative if relative
2961      * indexes in the index map compensate to produce a value within the
2962      * array bounds
2963      * @param indexMap the index map
2964      * @param mapOffset the offset into the index map
2965      * @return the vector loaded from the indexed elements of the array
2966      * @throws IndexOutOfBoundsException
2967      *         if {@code mapOffset+N < 0}
2968      *         or if {@code mapOffset+N >= indexMap.length},
2969      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2970      *         is an invalid index into {@code a},
2971      *         for any lane {@code N} in the vector
2972      * @see ShortVector#toIntArray()
2973      */
2974     @ForceInline
2975     public static
2976     ShortVector fromArray(VectorSpecies<Short> species,
2977                                    short[] a, int offset,
2978                                    int[] indexMap, int mapOffset) {
2979         ShortSpecies vsp = (ShortSpecies) species;
2980         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
2981     }
2982 
2983     /**
2984      * Gathers a new vector composed of elements from an array of type
2985      * {@code short[]},
2986      * under the control of a mask, and
2987      * using indexes obtained by adding a fixed {@code offset} to a
2988      * series of secondary offsets from an <em>index map</em>.
2989      * The index map is a contiguous sequence of {@code VLENGTH}
2990      * elements in a second array of {@code int}s, starting at a given
2991      * {@code mapOffset}.
2992      * <p>
2993      * For each vector lane, where {@code N} is the vector lane index,
2994      * if the lane is set in the mask,
2995      * the lane is loaded from the array
2996      * element {@code a[f(N)]}, where {@code f(N)} is the
2997      * index mapping expression
2998      * {@code offset + indexMap[mapOffset + N]]}.
2999      * Unset lanes in the resulting vector are set to zero.
3000      *
3001      * @param species species of desired vector
3002      * @param a the array
3003      * @param offset the offset into the array, may be negative if relative
3004      * indexes in the index map compensate to produce a value within the
3005      * array bounds
3006      * @param indexMap the index map
3007      * @param mapOffset the offset into the index map
3008      * @param m the mask controlling lane selection
3009      * @return the vector loaded from the indexed elements of the array
3010      * @throws IndexOutOfBoundsException
3011      *         if {@code mapOffset+N < 0}
3012      *         or if {@code mapOffset+N >= indexMap.length},
3013      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3014      *         is an invalid index into {@code a},
3015      *         for any lane {@code N} in the vector
3016      *         where the mask is set
3017      * @see ShortVector#toIntArray()
3018      */
3019     @ForceInline
3020     public static
3021     ShortVector fromArray(VectorSpecies<Short> species,
3022                                    short[] a, int offset,
3023                                    int[] indexMap, int mapOffset,
3024                                    VectorMask<Short> m) {
3025         ShortSpecies vsp = (ShortSpecies) species;
3026         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
3027     }
3028 
3029     /**
3030      * Loads a vector from an array of type {@code char[]}
3031      * starting at an offset.
3032      * For each vector lane, where {@code N} is the vector lane index, the
3033      * array element at index {@code offset + N}
3034      * is first cast to a {@code short} value and then
3035      * placed into the resulting vector at lane index {@code N}.
3036      *
3037      * @param species species of desired vector
3038      * @param a the array
3039      * @param offset the offset into the array
3040      * @return the vector loaded from an array
3041      * @throws IndexOutOfBoundsException
3042      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3043      *         for any lane {@code N} in the vector
3044      */
3045     @ForceInline
3046     public static
3047     ShortVector fromCharArray(VectorSpecies<Short> species,
3048                                        char[] a, int offset) {
3049         offset = checkFromIndexSize(offset, species.length(), a.length);
3050         ShortSpecies vsp = (ShortSpecies) species;
3051         return vsp.dummyVector().fromCharArray0(a, offset);
3052     }
3053 
3054     /**
3055      * Loads a vector from an array of type {@code char[]}
3056      * starting at an offset and using a mask.
3057      * Lanes where the mask is unset are filled with the default
3058      * value of {@code short} (zero).
3059      * For each vector lane, where {@code N} is the vector lane index,
3060      * if the mask lane at index {@code N} is set then the array element at
3061      * index {@code offset + N}
3062      * is first cast to a {@code short} value and then
3063      * placed into the resulting vector at lane index
3064      * {@code N}, otherwise the default element value is placed into the
3065      * resulting vector at lane index {@code N}.
3066      *
3067      * @param species species of desired vector
3068      * @param a the array
3069      * @param offset the offset into the array
3070      * @param m the mask controlling lane selection
3071      * @return the vector loaded from an array
3072      * @throws IndexOutOfBoundsException
3073      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3074      *         for any lane {@code N} in the vector
3075      *         where the mask is set
3076      */
3077     @ForceInline
3078     public static
3079     ShortVector fromCharArray(VectorSpecies<Short> species,
3080                                        char[] a, int offset,
3081                                        VectorMask<Short> m) {
3082         ShortSpecies vsp = (ShortSpecies) species;
3083         if (offset >= 0 && offset <= (a.length - species.length())) {
3084             return vsp.dummyVector().fromCharArray0(a, offset, m);
3085         }
3086 
3087         // FIXME: optimize
3088         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3089         return vsp.vOp(m, i -> (short) a[offset + i]);
3090     }
3091 
3092     /**
3093      * Gathers a new vector composed of elements from an array of type
3094      * {@code char[]},
3095      * using indexes obtained by adding a fixed {@code offset} to a
3096      * series of secondary offsets from an <em>index map</em>.
3097      * The index map is a contiguous sequence of {@code VLENGTH}
3098      * elements in a second array of {@code int}s, starting at a given
3099      * {@code mapOffset}.
3100      * <p>
3101      * For each vector lane, where {@code N} is the vector lane index,
3102      * the lane is loaded from the expression
3103      * {@code (short) a[f(N)]}, where {@code f(N)} is the
3104      * index mapping expression
3105      * {@code offset + indexMap[mapOffset + N]]}.
3106      *
3107      * @param species species of desired vector
3108      * @param a the array
3109      * @param offset the offset into the array, may be negative if relative
3110      * indexes in the index map compensate to produce a value within the
3111      * array bounds
3112      * @param indexMap the index map
3113      * @param mapOffset the offset into the index map
3114      * @return the vector loaded from the indexed elements of the array
3115      * @throws IndexOutOfBoundsException
3116      *         if {@code mapOffset+N < 0}
3117      *         or if {@code mapOffset+N >= indexMap.length},
3118      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3119      *         is an invalid index into {@code a},
3120      *         for any lane {@code N} in the vector
3121      * @see ShortVector#toIntArray()
3122      */
3123     @ForceInline
3124     public static
3125     ShortVector fromCharArray(VectorSpecies<Short> species,
3126                                        char[] a, int offset,
3127                                        int[] indexMap, int mapOffset) {
3128         // FIXME: optimize
3129         ShortSpecies vsp = (ShortSpecies) species;
3130         return vsp.vOp(n -> (short) a[offset + indexMap[mapOffset + n]]);
3131     }
3132 
3133     /**
3134      * Gathers a new vector composed of elements from an array of type
3135      * {@code char[]},
3136      * under the control of a mask, and
3137      * using indexes obtained by adding a fixed {@code offset} to a
3138      * series of secondary offsets from an <em>index map</em>.
3139      * The index map is a contiguous sequence of {@code VLENGTH}
3140      * elements in a second array of {@code int}s, starting at a given
3141      * {@code mapOffset}.
3142      * <p>
3143      * For each vector lane, where {@code N} is the vector lane index,
3144      * if the lane is set in the mask,
3145      * the lane is loaded from the expression
3146      * {@code (short) a[f(N)]}, where {@code f(N)} is the
3147      * index mapping expression
3148      * {@code offset + indexMap[mapOffset + N]]}.
3149      * Unset lanes in the resulting vector are set to zero.
3150      *
3151      * @param species species of desired vector
3152      * @param a the array
3153      * @param offset the offset into the array, may be negative if relative
3154      * indexes in the index map compensate to produce a value within the
3155      * array bounds
3156      * @param indexMap the index map
3157      * @param mapOffset the offset into the index map
3158      * @param m the mask controlling lane selection
3159      * @return the vector loaded from the indexed elements of the array
3160      * @throws IndexOutOfBoundsException
3161      *         if {@code mapOffset+N < 0}
3162      *         or if {@code mapOffset+N >= indexMap.length},
3163      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3164      *         is an invalid index into {@code a},
3165      *         for any lane {@code N} in the vector
3166      *         where the mask is set
3167      * @see ShortVector#toIntArray()
3168      */
3169     @ForceInline
3170     public static
3171     ShortVector fromCharArray(VectorSpecies<Short> species,
3172                                        char[] a, int offset,
3173                                        int[] indexMap, int mapOffset,
3174                                        VectorMask<Short> m) {
3175         // FIXME: optimize
3176         ShortSpecies vsp = (ShortSpecies) species;
3177         return vsp.vOp(m, n -> (short) a[offset + indexMap[mapOffset + n]]);
3178     }
3179 
3180 
3181     /**
3182      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3183      * starting at an offset into the byte buffer.
3184      * Bytes are composed into primitive lane elements according
3185      * to the specified byte order.
3186      * The vector is arranged into lanes according to
3187      * <a href="Vector.html#lane-order">memory ordering</a>.
3188      * <p>
3189      * This method behaves as if it returns the result of calling
3190      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3191      * fromByteBuffer()} as follows:
3192      * <pre>{@code
3193      * var m = species.maskAll(true);
3194      * return fromByteBuffer(species, bb, offset, bo, m);
3195      * }</pre>
3196      *
3197      * @param species species of desired vector
3198      * @param bb the byte buffer
3199      * @param offset the offset into the byte buffer
3200      * @param bo the intended byte order
3201      * @return a vector loaded from a byte buffer
3202      * @throws IndexOutOfBoundsException
3203      *         if {@code offset+N*2 < 0}
3204      *         or {@code offset+N*2 >= bb.limit()}
3205      *         for any lane {@code N} in the vector
3206      */
3207     @ForceInline
3208     public static
3209     ShortVector fromByteBuffer(VectorSpecies<Short> species,
3210                                         ByteBuffer bb, int offset,
3211                                         ByteOrder bo) {
3212         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
3213         ShortSpecies vsp = (ShortSpecies) species;
3214         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
3215     }
3216 
3217     /**
3218      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3219      * starting at an offset into the byte buffer
3220      * and using a mask.
3221      * Lanes where the mask is unset are filled with the default
3222      * value of {@code short} (zero).
3223      * Bytes are composed into primitive lane elements according
3224      * to the specified byte order.
3225      * The vector is arranged into lanes according to
3226      * <a href="Vector.html#lane-order">memory ordering</a>.
3227      * <p>
3228      * The following pseudocode illustrates the behavior:
3229      * <pre>{@code
3230      * ShortBuffer eb = bb.duplicate()
3231      *     .position(offset)
3232      *     .order(bo).asShortBuffer();
3233      * short[] ar = new short[species.length()];
3234      * for (int n = 0; n < ar.length; n++) {
3235      *     if (m.laneIsSet(n)) {
3236      *         ar[n] = eb.get(n);
3237      *     }
3238      * }
3239      * ShortVector r = ShortVector.fromArray(species, ar, 0);
3240      * }</pre>
3241      * @implNote
3242      * This operation is likely to be more efficient if
3243      * the specified byte order is the same as
3244      * {@linkplain ByteOrder#nativeOrder()
3245      * the platform native order},
3246      * since this method will not need to reorder
3247      * the bytes of lane values.
3248      *
3249      * @param species species of desired vector
3250      * @param bb the byte buffer
3251      * @param offset the offset into the byte buffer
3252      * @param bo the intended byte order
3253      * @param m the mask controlling lane selection
3254      * @return a vector loaded from a byte buffer
3255      * @throws IndexOutOfBoundsException
3256      *         if {@code offset+N*2 < 0}
3257      *         or {@code offset+N*2 >= bb.limit()}
3258      *         for any lane {@code N} in the vector
3259      *         where the mask is set
3260      */
3261     @ForceInline
3262     public static
3263     ShortVector fromByteBuffer(VectorSpecies<Short> species,
3264                                         ByteBuffer bb, int offset,
3265                                         ByteOrder bo,
3266                                         VectorMask<Short> m) {
3267         ShortSpecies vsp = (ShortSpecies) species;
3268         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
3269             return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
3270         }
3271 
3272         // FIXME: optimize
3273         checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
3274         ByteBuffer wb = wrapper(bb, bo);
3275         return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
3276                    (wb_, o, i)  -> wb_.getShort(o + i * 2));
3277     }
3278 
3279     // Memory store operations
3280 
3281     /**
3282      * Stores this vector into an array of type {@code short[]}
3283      * starting at an offset.
3284      * <p>
3285      * For each vector lane, where {@code N} is the vector lane index,
3286      * the lane element at index {@code N} is stored into the array
3287      * element {@code a[offset+N]}.
3288      *
3289      * @param a the array, of type {@code short[]}
3290      * @param offset the offset into the array
3291      * @throws IndexOutOfBoundsException
3292      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3293      *         for any lane {@code N} in the vector
3294      */
3295     @ForceInline
3296     public final
3297     void intoArray(short[] a, int offset) {
3298         offset = checkFromIndexSize(offset, length(), a.length);
3299         ShortSpecies vsp = vspecies();
3300         VectorSupport.store(
3301             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3302             a, arrayAddress(a, offset),
3303             this,
3304             a, offset,
3305             (arr, off, v)
3306             -> v.stOp(arr, off,
3307                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3308     }
3309 
3310     /**
3311      * Stores this vector into an array of type {@code short[]}
3312      * starting at offset and using a mask.
3313      * <p>
3314      * For each vector lane, where {@code N} is the vector lane index,
3315      * the lane element at index {@code N} is stored into the array
3316      * element {@code a[offset+N]}.
3317      * If the mask lane at {@code N} is unset then the corresponding
3318      * array element {@code a[offset+N]} is left unchanged.
3319      * <p>
3320      * Array range checking is done for lanes where the mask is set.
3321      * Lanes where the mask is unset are not stored and do not need
3322      * to correspond to legitimate elements of {@code a}.
3323      * That is, unset lanes may correspond to array indexes less than
3324      * zero or beyond the end of the array.
3325      *
3326      * @param a the array, of type {@code short[]}
3327      * @param offset the offset into the array
3328      * @param m the mask controlling lane storage
3329      * @throws IndexOutOfBoundsException
3330      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3331      *         for any lane {@code N} in the vector
3332      *         where the mask is set
3333      */
3334     @ForceInline
3335     public final
3336     void intoArray(short[] a, int offset,
3337                    VectorMask<Short> m) {
3338         if (m.allTrue()) {
3339             intoArray(a, offset);
3340         } else {
3341             ShortSpecies vsp = vspecies();
3342             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3343             intoArray0(a, offset, m);
3344         }
3345     }
3346 
3347     /**
3348      * Scatters this vector into an array of type {@code short[]}
3349      * using indexes obtained by adding a fixed {@code offset} to a
3350      * series of secondary offsets from an <em>index map</em>.
3351      * The index map is a contiguous sequence of {@code VLENGTH}
3352      * elements in a second array of {@code int}s, starting at a given
3353      * {@code mapOffset}.
3354      * <p>
3355      * For each vector lane, where {@code N} is the vector lane index,
3356      * the lane element at index {@code N} is stored into the array
3357      * element {@code a[f(N)]}, where {@code f(N)} is the
3358      * index mapping expression
3359      * {@code offset + indexMap[mapOffset + N]]}.
3360      *
3361      * @param a the array
3362      * @param offset an offset to combine with the index map offsets
3363      * @param indexMap the index map
3364      * @param mapOffset the offset into the index map
3365      * @throws IndexOutOfBoundsException
3366      *         if {@code mapOffset+N < 0}
3367      *         or if {@code mapOffset+N >= indexMap.length},
3368      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3369      *         is an invalid index into {@code a},
3370      *         for any lane {@code N} in the vector
3371      * @see ShortVector#toIntArray()
3372      */
3373     @ForceInline
3374     public final
3375     void intoArray(short[] a, int offset,
3376                    int[] indexMap, int mapOffset) {
3377         stOp(a, offset,
3378              (arr, off, i, e) -> {
3379                  int j = indexMap[mapOffset + i];
3380                  arr[off + j] = e;
3381              });
3382     }
3383 
3384     /**
3385      * Scatters this vector into an array of type {@code short[]},
3386      * under the control of a mask, and
3387      * using indexes obtained by adding a fixed {@code offset} to a
3388      * series of secondary offsets from an <em>index map</em>.
3389      * The index map is a contiguous sequence of {@code VLENGTH}
3390      * elements in a second array of {@code int}s, starting at a given
3391      * {@code mapOffset}.
3392      * <p>
3393      * For each vector lane, where {@code N} is the vector lane index,
3394      * if the mask lane at index {@code N} is set then
3395      * the lane element at index {@code N} is stored into the array
3396      * element {@code a[f(N)]}, where {@code f(N)} is the
3397      * index mapping expression
3398      * {@code offset + indexMap[mapOffset + N]]}.
3399      *
3400      * @param a the array
3401      * @param offset an offset to combine with the index map offsets
3402      * @param indexMap the index map
3403      * @param mapOffset the offset into the index map
3404      * @param m the mask
3405      * @throws IndexOutOfBoundsException
3406      *         if {@code mapOffset+N < 0}
3407      *         or if {@code mapOffset+N >= indexMap.length},
3408      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3409      *         is an invalid index into {@code a},
3410      *         for any lane {@code N} in the vector
3411      *         where the mask is set
3412      * @see ShortVector#toIntArray()
3413      */
3414     @ForceInline
3415     public final
3416     void intoArray(short[] a, int offset,
3417                    int[] indexMap, int mapOffset,
3418                    VectorMask<Short> m) {
3419         stOp(a, offset, m,
3420              (arr, off, i, e) -> {
3421                  int j = indexMap[mapOffset + i];
3422                  arr[off + j] = e;
3423              });
3424     }
3425 
3426     /**
3427      * Stores this vector into an array of type {@code char[]}
3428      * starting at an offset.
3429      * <p>
3430      * For each vector lane, where {@code N} is the vector lane index,
3431      * the lane element at index {@code N}
3432      * is first cast to a {@code char} value and then
3433      * stored into the array element {@code a[offset+N]}.
3434      *
3435      * @param a the array, of type {@code char[]}
3436      * @param offset the offset into the array
3437      * @throws IndexOutOfBoundsException
3438      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3439      *         for any lane {@code N} in the vector
3440      */
3441     @ForceInline
3442     public final
3443     void intoCharArray(char[] a, int offset) {
3444         offset = checkFromIndexSize(offset, length(), a.length);
3445         ShortSpecies vsp = vspecies();
3446         VectorSupport.store(
3447             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3448             a, charArrayAddress(a, offset),
3449             this,
3450             a, offset,
3451             (arr, off, v)
3452             -> v.stOp(arr, off,
3453                       (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
3454     }
3455 
3456     /**
3457      * Stores this vector into an array of type {@code char[]}
3458      * starting at offset and using a mask.
3459      * <p>
3460      * For each vector lane, where {@code N} is the vector lane index,
3461      * the lane element at index {@code N}
3462      * is first cast to a {@code char} value and then
3463      * stored into the array element {@code a[offset+N]}.
3464      * If the mask lane at {@code N} is unset then the corresponding
3465      * array element {@code a[offset+N]} is left unchanged.
3466      * <p>
3467      * Array range checking is done for lanes where the mask is set.
3468      * Lanes where the mask is unset are not stored and do not need
3469      * to correspond to legitimate elements of {@code a}.
3470      * That is, unset lanes may correspond to array indexes less than
3471      * zero or beyond the end of the array.
3472      *
3473      * @param a the array, of type {@code char[]}
3474      * @param offset the offset into the array
3475      * @param m the mask controlling lane storage
3476      * @throws IndexOutOfBoundsException
3477      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3478      *         for any lane {@code N} in the vector
3479      *         where the mask is set
3480      */
3481     @ForceInline
3482     public final
3483     void intoCharArray(char[] a, int offset,
3484                        VectorMask<Short> m) {
3485         if (m.allTrue()) {
3486             intoCharArray(a, offset);
3487         } else {
3488             ShortSpecies vsp = vspecies();
3489             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3490             intoCharArray0(a, offset, m);
3491         }
3492     }
3493 
3494     /**
3495      * Scatters this vector into an array of type {@code char[]}
3496      * using indexes obtained by adding a fixed {@code offset} to a
3497      * series of secondary offsets from an <em>index map</em>.
3498      * The index map is a contiguous sequence of {@code VLENGTH}
3499      * elements in a second array of {@code int}s, starting at a given
3500      * {@code mapOffset}.
3501      * <p>
3502      * For each vector lane, where {@code N} is the vector lane index,
3503      * the lane element at index {@code N}
3504      * is first cast to a {@code char} value and then
3505      * stored into the array
3506      * element {@code a[f(N)]}, where {@code f(N)} is the
3507      * index mapping expression
3508      * {@code offset + indexMap[mapOffset + N]]}.
3509      *
3510      * @param a the array
3511      * @param offset an offset to combine with the index map offsets
3512      * @param indexMap the index map
3513      * @param mapOffset the offset into the index map
3514      * @throws IndexOutOfBoundsException
3515      *         if {@code mapOffset+N < 0}
3516      *         or if {@code mapOffset+N >= indexMap.length},
3517      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3518      *         is an invalid index into {@code a},
3519      *         for any lane {@code N} in the vector
3520      * @see ShortVector#toIntArray()
3521      */
3522     @ForceInline
3523     public final
3524     void intoCharArray(char[] a, int offset,
3525                        int[] indexMap, int mapOffset) {
3526         // FIXME: optimize
3527         stOp(a, offset,
3528              (arr, off, i, e) -> {
3529                  int j = indexMap[mapOffset + i];
3530                  arr[off + j] = (char) e;
3531              });
3532     }
3533 
3534     /**
3535      * Scatters this vector into an array of type {@code char[]},
3536      * under the control of a mask, and
3537      * using indexes obtained by adding a fixed {@code offset} to a
3538      * series of secondary offsets from an <em>index map</em>.
3539      * The index map is a contiguous sequence of {@code VLENGTH}
3540      * elements in a second array of {@code int}s, starting at a given
3541      * {@code mapOffset}.
3542      * <p>
3543      * For each vector lane, where {@code N} is the vector lane index,
3544      * if the mask lane at index {@code N} is set then
3545      * the lane element at index {@code N}
3546      * is first cast to a {@code char} value and then
3547      * stored into the array
3548      * element {@code a[f(N)]}, where {@code f(N)} is the
3549      * index mapping expression
3550      * {@code offset + indexMap[mapOffset + N]]}.
3551      *
3552      * @param a the array
3553      * @param offset an offset to combine with the index map offsets
3554      * @param indexMap the index map
3555      * @param mapOffset the offset into the index map
3556      * @param m the mask
3557      * @throws IndexOutOfBoundsException
3558      *         if {@code mapOffset+N < 0}
3559      *         or if {@code mapOffset+N >= indexMap.length},
3560      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3561      *         is an invalid index into {@code a},
3562      *         for any lane {@code N} in the vector
3563      *         where the mask is set
3564      * @see ShortVector#toIntArray()
3565      */
3566     @ForceInline
3567     public final
3568     void intoCharArray(char[] a, int offset,
3569                        int[] indexMap, int mapOffset,
3570                        VectorMask<Short> m) {
3571         // FIXME: optimize
3572         stOp(a, offset, m,
3573              (arr, off, i, e) -> {
3574                  int j = indexMap[mapOffset + i];
3575                  arr[off + j] = (char) e;
3576              });
3577     }
3578 
3579 
3580     /**
3581      * {@inheritDoc} <!--workaround-->
3582      */
3583     @Override
3584     @ForceInline
3585     public final
3586     void intoByteArray(byte[] a, int offset,
3587                        ByteOrder bo) {
3588         offset = checkFromIndexSize(offset, byteSize(), a.length);
3589         maybeSwap(bo).intoByteArray0(a, offset);
3590     }
3591 
3592     /**
3593      * {@inheritDoc} <!--workaround-->
3594      */
3595     @Override
3596     @ForceInline
3597     public final
3598     void intoByteArray(byte[] a, int offset,
3599                        ByteOrder bo,
3600                        VectorMask<Short> m) {
3601         if (m.allTrue()) {
3602             intoByteArray(a, offset, bo);
3603         } else {
3604             ShortSpecies vsp = vspecies();
3605             checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
3606             maybeSwap(bo).intoByteArray0(a, offset, m);
3607         }
3608     }
3609 
3610     /**
3611      * {@inheritDoc} <!--workaround-->
3612      */
3613     @Override
3614     @ForceInline
3615     public final
3616     void intoByteBuffer(ByteBuffer bb, int offset,
3617                         ByteOrder bo) {
3618         if (ScopedMemoryAccess.isReadOnly(bb)) {
3619             throw new ReadOnlyBufferException();
3620         }
3621         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3622         maybeSwap(bo).intoByteBuffer0(bb, offset);
3623     }
3624 
3625     /**
3626      * {@inheritDoc} <!--workaround-->
3627      */
3628     @Override
3629     @ForceInline
3630     public final
3631     void intoByteBuffer(ByteBuffer bb, int offset,
3632                         ByteOrder bo,
3633                         VectorMask<Short> m) {
3634         if (m.allTrue()) {
3635             intoByteBuffer(bb, offset, bo);
3636         } else {
3637             if (bb.isReadOnly()) {
3638                 throw new ReadOnlyBufferException();
3639             }
3640             ShortSpecies vsp = vspecies();
3641             checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
3642             maybeSwap(bo).intoByteBuffer0(bb, offset, m);
3643         }
3644     }
3645 
3646     // ================================================
3647 
3648     // Low-level memory operations.
3649     //
3650     // Note that all of these operations *must* inline into a context
3651     // where the exact species of the involved vector is a
3652     // compile-time constant.  Otherwise, the intrinsic generation
3653     // will fail and performance will suffer.
3654     //
3655     // In many cases this is achieved by re-deriving a version of the
3656     // method in each concrete subclass (per species).  The re-derived
3657     // method simply calls one of these generic methods, with exact
3658     // parameters for the controlling metadata, which is either a
3659     // typed vector or constant species instance.
3660 
3661     // Unchecked loading operations in native byte order.
3662     // Caller is responsible for applying index checks, masking, and
3663     // byte swapping.
3664 
3665     /*package-private*/
3666     abstract
3667     ShortVector fromArray0(short[] a, int offset);
3668     @ForceInline
3669     final
3670     ShortVector fromArray0Template(short[] a, int offset) {
3671         ShortSpecies vsp = vspecies();
3672         return VectorSupport.load(
3673             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3674             a, arrayAddress(a, offset),
3675             a, offset, vsp,
3676             (arr, off, s) -> s.ldOp(arr, off,
3677                                     (arr_, off_, i) -> arr_[off_ + i]));
3678     }
3679 
3680     /*package-private*/
3681     abstract
3682     ShortVector fromArray0(short[] a, int offset, VectorMask<Short> m);
3683     @ForceInline
3684     final
3685     <M extends VectorMask<Short>>
3686     ShortVector fromArray0Template(Class<M> maskClass, short[] a, int offset, M m) {
3687         m.check(species());
3688         ShortSpecies vsp = vspecies();
3689         return VectorSupport.loadMasked(
3690             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3691             a, arrayAddress(a, offset), m,
3692             a, offset, vsp,
3693             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3694                                         (arr_, off_, i) -> arr_[off_ + i]));
3695     }
3696 
3697 
3698     /*package-private*/
3699     abstract
3700     ShortVector fromCharArray0(char[] a, int offset);
3701     @ForceInline
3702     final
3703     ShortVector fromCharArray0Template(char[] a, int offset) {
3704         ShortSpecies vsp = vspecies();
3705         return VectorSupport.load(
3706             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3707             a, charArrayAddress(a, offset),
3708             a, offset, vsp,
3709             (arr, off, s) -> s.ldOp(arr, off,
3710                                     (arr_, off_, i) -> (short) arr_[off_ + i]));
3711     }
3712 
3713     /*package-private*/
3714     abstract
3715     ShortVector fromCharArray0(char[] a, int offset, VectorMask<Short> m);
3716     @ForceInline
3717     final
3718     <M extends VectorMask<Short>>
3719     ShortVector fromCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
3720         m.check(species());
3721         ShortSpecies vsp = vspecies();
3722         return VectorSupport.loadMasked(
3723                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3724                 a, charArrayAddress(a, offset), m,
3725                 a, offset, vsp,
3726                 (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3727                                             (arr_, off_, i) -> (short) arr_[off_ + i]));
3728     }
3729 
3730 
3731     @Override
3732     abstract
3733     ShortVector fromByteArray0(byte[] a, int offset);
3734     @ForceInline
3735     final
3736     ShortVector fromByteArray0Template(byte[] a, int offset) {
3737         ShortSpecies vsp = vspecies();
3738         return VectorSupport.load(
3739             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3740             a, byteArrayAddress(a, offset),
3741             a, offset, vsp,
3742             (arr, off, s) -> {
3743                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3744                 return s.ldOp(wb, off,
3745                         (wb_, o, i) -> wb_.getShort(o + i * 2));
3746             });
3747     }
3748 
3749     abstract
3750     ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m);
3751     @ForceInline
3752     final
3753     <M extends VectorMask<Short>>
3754     ShortVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3755         ShortSpecies vsp = vspecies();
3756         m.check(vsp);
3757         return VectorSupport.loadMasked(
3758             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3759             a, byteArrayAddress(a, offset), m,
3760             a, offset, vsp,
3761             (arr, off, s, vm) -> {
3762                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3763                 return s.ldOp(wb, off, vm,
3764                         (wb_, o, i) -> wb_.getShort(o + i * 2));
3765             });
3766     }
3767 
3768     abstract
3769     ShortVector fromByteBuffer0(ByteBuffer bb, int offset);
3770     @ForceInline
3771     final
3772     ShortVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3773         ShortSpecies vsp = vspecies();
3774         return ScopedMemoryAccess.loadFromByteBuffer(
3775                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3776                 bb, offset, vsp,
3777                 (buf, off, s) -> {
3778                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3779                     return s.ldOp(wb, off,
3780                             (wb_, o, i) -> wb_.getShort(o + i * 2));
3781                 });
3782     }
3783 
3784     abstract
3785     ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m);
3786     @ForceInline
3787     final
3788     <M extends VectorMask<Short>>
3789     ShortVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3790         ShortSpecies vsp = vspecies();
3791         m.check(vsp);
3792         return ScopedMemoryAccess.loadFromByteBufferMasked(
3793                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3794                 bb, offset, m, vsp,
3795                 (buf, off, s, vm) -> {
3796                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3797                     return s.ldOp(wb, off, vm,
3798                             (wb_, o, i) -> wb_.getShort(o + i * 2));
3799                 });
3800     }
3801 
3802     // Unchecked storing operations in native byte order.
3803     // Caller is responsible for applying index checks, masking, and
3804     // byte swapping.
3805 
3806     abstract
3807     void intoArray0(short[] a, int offset);
3808     @ForceInline
3809     final
3810     void intoArray0Template(short[] a, int offset) {
3811         ShortSpecies vsp = vspecies();
3812         VectorSupport.store(
3813             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3814             a, arrayAddress(a, offset),
3815             this, a, offset,
3816             (arr, off, v)
3817             -> v.stOp(arr, off,
3818                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3819     }
3820 
3821     abstract
3822     void intoArray0(short[] a, int offset, VectorMask<Short> m);
3823     @ForceInline
3824     final
3825     <M extends VectorMask<Short>>
3826     void intoArray0Template(Class<M> maskClass, short[] a, int offset, M m) {
3827         m.check(species());
3828         ShortSpecies vsp = vspecies();
3829         VectorSupport.storeMasked(
3830             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3831             a, arrayAddress(a, offset),
3832             this, m, a, offset,
3833             (arr, off, v, vm)
3834             -> v.stOp(arr, off, vm,
3835                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3836     }
3837 
3838 
3839 
3840     abstract
3841     void intoByteArray0(byte[] a, int offset);
3842     @ForceInline
3843     final
3844     void intoByteArray0Template(byte[] a, int offset) {
3845         ShortSpecies vsp = vspecies();
3846         VectorSupport.store(
3847             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3848             a, byteArrayAddress(a, offset),
3849             this, a, offset,
3850             (arr, off, v) -> {
3851                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3852                 v.stOp(wb, off,
3853                         (tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
3854             });
3855     }
3856 
3857     abstract
3858     void intoByteArray0(byte[] a, int offset, VectorMask<Short> m);
3859     @ForceInline
3860     final
3861     <M extends VectorMask<Short>>
3862     void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3863         ShortSpecies vsp = vspecies();
3864         m.check(vsp);
3865         VectorSupport.storeMasked(
3866             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3867             a, byteArrayAddress(a, offset),
3868             this, m, a, offset,
3869             (arr, off, v, vm) -> {
3870                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3871                 v.stOp(wb, off, vm,
3872                         (tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
3873             });
3874     }
3875 
3876     @ForceInline
3877     final
3878     void intoByteBuffer0(ByteBuffer bb, int offset) {
3879         ShortSpecies vsp = vspecies();
3880         ScopedMemoryAccess.storeIntoByteBuffer(
3881                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3882                 this, bb, offset,
3883                 (buf, off, v) -> {
3884                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3885                     v.stOp(wb, off,
3886                             (wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
3887                 });
3888     }
3889 
3890     abstract
3891     void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m);
3892     @ForceInline
3893     final
3894     <M extends VectorMask<Short>>
3895     void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3896         ShortSpecies vsp = vspecies();
3897         m.check(vsp);
3898         ScopedMemoryAccess.storeIntoByteBufferMasked(
3899                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3900                 this, m, bb, offset,
3901                 (buf, off, v, vm) -> {
3902                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3903                     v.stOp(wb, off, vm,
3904                             (wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
3905                 });
3906     }
3907 
3908     /*package-private*/
3909     abstract
3910     void intoCharArray0(char[] a, int offset, VectorMask<Short> m);
3911     @ForceInline
3912     final
3913     <M extends VectorMask<Short>>
3914     void intoCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
3915         m.check(species());
3916         ShortSpecies vsp = vspecies();
3917         VectorSupport.storeMasked(
3918             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3919             a, charArrayAddress(a, offset),
3920             this, m, a, offset,
3921             (arr, off, v, vm)
3922             -> v.stOp(arr, off, vm,
3923                       (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
3924     }
3925 
3926     // End of low-level memory operations.
3927 
3928     private static
3929     void checkMaskFromIndexSize(int offset,
3930                                 ShortSpecies vsp,
3931                                 VectorMask<Short> m,
3932                                 int scale,
3933                                 int limit) {
3934         ((AbstractMask<Short>)m)
3935             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3936     }
3937 
3938     @ForceInline
3939     private void conditionalStoreNYI(int offset,
3940                                      ShortSpecies vsp,
3941                                      VectorMask<Short> m,
3942                                      int scale,
3943                                      int limit) {
3944         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3945             String msg =
3946                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3947                               offset, limit, m, vsp);
3948             throw new AssertionError(msg);
3949         }
3950     }
3951 
3952     /*package-private*/
3953     @Override
3954     @ForceInline
3955     final
3956     ShortVector maybeSwap(ByteOrder bo) {
3957         if (bo != NATIVE_ENDIAN) {
3958             return this.reinterpretAsBytes()
3959                 .rearrange(swapBytesShuffle())
3960                 .reinterpretAsShorts();
3961         }
3962         return this;
3963     }
3964 
3965     static final int ARRAY_SHIFT =
3966         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_SHORT_INDEX_SCALE);
3967     static final long ARRAY_BASE =
3968         Unsafe.ARRAY_SHORT_BASE_OFFSET;
3969 
3970     @ForceInline
3971     static long arrayAddress(short[] a, int index) {
3972         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3973     }
3974 
3975     static final int ARRAY_CHAR_SHIFT =
3976             31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_CHAR_INDEX_SCALE);
3977     static final long ARRAY_CHAR_BASE =
3978             Unsafe.ARRAY_CHAR_BASE_OFFSET;
3979 
3980     @ForceInline
3981     static long charArrayAddress(char[] a, int index) {
3982         return ARRAY_CHAR_BASE + (((long)index) << ARRAY_CHAR_SHIFT);
3983     }
3984 
3985 
3986     @ForceInline
3987     static long byteArrayAddress(byte[] a, int index) {
3988         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3989     }
3990 
3991     // ================================================
3992 
3993     /// Reinterpreting view methods:
3994     //   lanewise reinterpret: viewAsXVector()
3995     //   keep shape, redraw lanes: reinterpretAsEs()
3996 
3997     /**
3998      * {@inheritDoc} <!--workaround-->
3999      */
4000     @ForceInline
4001     @Override
4002     public final ByteVector reinterpretAsBytes() {
4003          // Going to ByteVector, pay close attention to byte order.
4004          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
4005          return asByteVectorRaw();
4006          //return asByteVectorRaw().rearrange(swapBytesShuffle());
4007     }
4008 
4009     /**
4010      * {@inheritDoc} <!--workaround-->
4011      */
4012     @ForceInline
4013     @Override
4014     public final ShortVector viewAsIntegralLanes() {
4015         return this;
4016     }
4017 
4018     /**
4019      * {@inheritDoc} <!--workaround-->
4020      *
4021      * @implNote This method always throws
4022      * {@code UnsupportedOperationException}, because there is no floating
4023      * point type of the same size as {@code short}.  The return type
4024      * of this method is arbitrarily designated as
4025      * {@code Vector<?>}.  Future versions of this API may change the return
4026      * type if additional floating point types become available.
4027      */
4028     @ForceInline
4029     @Override
4030     public final
4031     Vector<?>
4032     viewAsFloatingLanes() {
4033         LaneType flt = LaneType.SHORT.asFloating();
4034         // asFloating() will throw UnsupportedOperationException for the unsupported type short
4035         throw new AssertionError("Cannot reach here");
4036     }
4037 
4038     // ================================================
4039 
4040     /// Object methods: toString, equals, hashCode
4041     //
4042     // Object methods are defined as if via Arrays.toString, etc.,
4043     // is applied to the array of elements.  Two equal vectors
4044     // are required to have equal species and equal lane values.
4045 
4046     /**
4047      * Returns a string representation of this vector, of the form
4048      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
4049      * in lane order.
4050      *
4051      * The string is produced as if by a call to {@link
4052      * java.util.Arrays#toString(short[]) Arrays.toString()},
4053      * as appropriate to the {@code short} array returned by
4054      * {@link #toArray this.toArray()}.
4055      *
4056      * @return a string of the form {@code "[0,1,2...]"}
4057      * reporting the lane values of this vector
4058      */
4059     @Override
4060     @ForceInline
4061     public final
4062     String toString() {
4063         // now that toArray is strongly typed, we can define this
4064         return Arrays.toString(toArray());
4065     }
4066 
4067     /**
4068      * {@inheritDoc} <!--workaround-->
4069      */
4070     @Override
4071     @ForceInline
4072     public final
4073     boolean equals(Object obj) {
4074         if (obj instanceof Vector) {
4075             Vector<?> that = (Vector<?>) obj;
4076             if (this.species().equals(that.species())) {
4077                 return this.eq(that.check(this.species())).allTrue();
4078             }
4079         }
4080         return false;
4081     }
4082 
4083     /**
4084      * {@inheritDoc} <!--workaround-->
4085      */
4086     @Override
4087     @ForceInline
4088     public final
4089     int hashCode() {
4090         // now that toArray is strongly typed, we can define this
4091         return Objects.hash(species(), Arrays.hashCode(toArray()));
4092     }
4093 
4094     // ================================================
4095 
4096     // Species
4097 
4098     /**
4099      * Class representing {@link ShortVector}'s of the same {@link VectorShape VectorShape}.
4100      */
4101     /*package-private*/
4102     static final class ShortSpecies extends AbstractSpecies<Short> {
4103         private ShortSpecies(VectorShape shape,
4104                 Class<? extends ShortVector> vectorType,
4105                 Class<? extends AbstractMask<Short>> maskType,
4106                 Function<Object, ShortVector> vectorFactory) {
4107             super(shape, LaneType.of(short.class),
4108                   vectorType, maskType,
4109                   vectorFactory);
4110             assert(this.elementSize() == Short.SIZE);
4111         }
4112 
4113         // Specializing overrides:
4114 
4115         @Override
4116         @ForceInline
4117         public final Class<Short> elementType() {
4118             return short.class;
4119         }
4120 
4121         @Override
4122         @ForceInline
4123         final Class<Short> genericElementType() {
4124             return Short.class;
4125         }
4126 
4127         @SuppressWarnings("unchecked")
4128         @Override
4129         @ForceInline
4130         public final Class<? extends ShortVector> vectorType() {
4131             return (Class<? extends ShortVector>) vectorType;
4132         }
4133 
4134         @Override
4135         @ForceInline
4136         public final long checkValue(long e) {
4137             longToElementBits(e);  // only for exception
4138             return e;
4139         }
4140 
4141         /*package-private*/
4142         @Override
4143         @ForceInline
4144         final ShortVector broadcastBits(long bits) {
4145             return (ShortVector)
4146                 VectorSupport.broadcastCoerced(
4147                     vectorType, short.class, laneCount,
4148                     bits, this,
4149                     (bits_, s_) -> s_.rvOp(i -> bits_));
4150         }
4151 
4152         /*package-private*/
4153         @ForceInline
4154         final ShortVector broadcast(short e) {
4155             return broadcastBits(toBits(e));
4156         }
4157 
4158         @Override
4159         @ForceInline
4160         public final ShortVector broadcast(long e) {
4161             return broadcastBits(longToElementBits(e));
4162         }
4163 
4164         /*package-private*/
4165         final @Override
4166         @ForceInline
4167         long longToElementBits(long value) {
4168             // Do the conversion, and then test it for failure.
4169             short e = (short) value;
4170             if ((long) e != value) {
4171                 throw badElementBits(value, e);
4172             }
4173             return toBits(e);
4174         }
4175 
4176         /*package-private*/
4177         @ForceInline
4178         static long toIntegralChecked(short e, boolean convertToInt) {
4179             long value = convertToInt ? (int) e : (long) e;
4180             if ((short) value != e) {
4181                 throw badArrayBits(e, convertToInt, value);
4182             }
4183             return value;
4184         }
4185 
4186         /* this non-public one is for internal conversions */
4187         @Override
4188         @ForceInline
4189         final ShortVector fromIntValues(int[] values) {
4190             VectorIntrinsics.requireLength(values.length, laneCount);
4191             short[] va = new short[laneCount()];
4192             for (int i = 0; i < va.length; i++) {
4193                 int lv = values[i];
4194                 short v = (short) lv;
4195                 va[i] = v;
4196                 if ((int)v != lv) {
4197                     throw badElementBits(lv, v);
4198                 }
4199             }
4200             return dummyVector().fromArray0(va, 0);
4201         }
4202 
4203         // Virtual constructors
4204 
4205         @ForceInline
4206         @Override final
4207         public ShortVector fromArray(Object a, int offset) {
4208             // User entry point:  Be careful with inputs.
4209             return ShortVector
4210                 .fromArray(this, (short[]) a, offset);
4211         }
4212 
4213         @ForceInline
4214         @Override final
4215         ShortVector dummyVector() {
4216             return (ShortVector) super.dummyVector();
4217         }
4218 
4219         /*package-private*/
4220         final @Override
4221         @ForceInline
4222         ShortVector rvOp(RVOp f) {
4223             short[] res = new short[laneCount()];
4224             for (int i = 0; i < res.length; i++) {
4225                 short bits = (short) f.apply(i);
4226                 res[i] = fromBits(bits);
4227             }
4228             return dummyVector().vectorFactory(res);
4229         }
4230 
4231         ShortVector vOp(FVOp f) {
4232             short[] res = new short[laneCount()];
4233             for (int i = 0; i < res.length; i++) {
4234                 res[i] = f.apply(i);
4235             }
4236             return dummyVector().vectorFactory(res);
4237         }
4238 
4239         ShortVector vOp(VectorMask<Short> m, FVOp f) {
4240             short[] res = new short[laneCount()];
4241             boolean[] mbits = ((AbstractMask<Short>)m).getBits();
4242             for (int i = 0; i < res.length; i++) {
4243                 if (mbits[i]) {
4244                     res[i] = f.apply(i);
4245                 }
4246             }
4247             return dummyVector().vectorFactory(res);
4248         }
4249 
4250         /*package-private*/
4251         @ForceInline
4252         <M> ShortVector ldOp(M memory, int offset,
4253                                       FLdOp<M> f) {
4254             return dummyVector().ldOp(memory, offset, f);
4255         }
4256 
4257         /*package-private*/
4258         @ForceInline
4259         <M> ShortVector ldOp(M memory, int offset,
4260                                       VectorMask<Short> m,
4261                                       FLdOp<M> f) {
4262             return dummyVector().ldOp(memory, offset, m, f);
4263         }
4264 
4265         /*package-private*/
4266         @ForceInline
4267         <M> void stOp(M memory, int offset, FStOp<M> f) {
4268             dummyVector().stOp(memory, offset, f);
4269         }
4270 
4271         /*package-private*/
4272         @ForceInline
4273         <M> void stOp(M memory, int offset,
4274                       AbstractMask<Short> m,
4275                       FStOp<M> f) {
4276             dummyVector().stOp(memory, offset, m, f);
4277         }
4278 
4279         // N.B. Make sure these constant vectors and
4280         // masks load up correctly into registers.
4281         //
4282         // Also, see if we can avoid all that switching.
4283         // Could we cache both vectors and both masks in
4284         // this species object?
4285 
4286         // Zero and iota vector access
4287         @Override
4288         @ForceInline
4289         public final ShortVector zero() {
4290             if ((Class<?>) vectorType() == ShortMaxVector.class)
4291                 return ShortMaxVector.ZERO;
4292             switch (vectorBitSize()) {
4293                 case 64: return Short64Vector.ZERO;
4294                 case 128: return Short128Vector.ZERO;
4295                 case 256: return Short256Vector.ZERO;
4296                 case 512: return Short512Vector.ZERO;
4297             }
4298             throw new AssertionError();
4299         }
4300 
4301         @Override
4302         @ForceInline
4303         public final ShortVector iota() {
4304             if ((Class<?>) vectorType() == ShortMaxVector.class)
4305                 return ShortMaxVector.IOTA;
4306             switch (vectorBitSize()) {
4307                 case 64: return Short64Vector.IOTA;
4308                 case 128: return Short128Vector.IOTA;
4309                 case 256: return Short256Vector.IOTA;
4310                 case 512: return Short512Vector.IOTA;
4311             }
4312             throw new AssertionError();
4313         }
4314 
4315         // Mask access
4316         @Override
4317         @ForceInline
4318         public final VectorMask<Short> maskAll(boolean bit) {
4319             if ((Class<?>) vectorType() == ShortMaxVector.class)
4320                 return ShortMaxVector.ShortMaxMask.maskAll(bit);
4321             switch (vectorBitSize()) {
4322                 case 64: return Short64Vector.Short64Mask.maskAll(bit);
4323                 case 128: return Short128Vector.Short128Mask.maskAll(bit);
4324                 case 256: return Short256Vector.Short256Mask.maskAll(bit);
4325                 case 512: return Short512Vector.Short512Mask.maskAll(bit);
4326             }
4327             throw new AssertionError();
4328         }
4329     }
4330 
4331     /**
4332      * Finds a species for an element type of {@code short} and shape.
4333      *
4334      * @param s the shape
4335      * @return a species for an element type of {@code short} and shape
4336      * @throws IllegalArgumentException if no such species exists for the shape
4337      */
4338     static ShortSpecies species(VectorShape s) {
4339         Objects.requireNonNull(s);
4340         switch (s) {
4341             case S_64_BIT: return (ShortSpecies) SPECIES_64;
4342             case S_128_BIT: return (ShortSpecies) SPECIES_128;
4343             case S_256_BIT: return (ShortSpecies) SPECIES_256;
4344             case S_512_BIT: return (ShortSpecies) SPECIES_512;
4345             case S_Max_BIT: return (ShortSpecies) SPECIES_MAX;
4346             default: throw new IllegalArgumentException("Bad shape: " + s);
4347         }
4348     }
4349 
4350     /** Species representing {@link ShortVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
4351     public static final VectorSpecies<Short> SPECIES_64
4352         = new ShortSpecies(VectorShape.S_64_BIT,
4353                             Short64Vector.class,
4354                             Short64Vector.Short64Mask.class,
4355                             Short64Vector::new);
4356 
4357     /** Species representing {@link ShortVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
4358     public static final VectorSpecies<Short> SPECIES_128
4359         = new ShortSpecies(VectorShape.S_128_BIT,
4360                             Short128Vector.class,
4361                             Short128Vector.Short128Mask.class,
4362                             Short128Vector::new);
4363 
4364     /** Species representing {@link ShortVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
4365     public static final VectorSpecies<Short> SPECIES_256
4366         = new ShortSpecies(VectorShape.S_256_BIT,
4367                             Short256Vector.class,
4368                             Short256Vector.Short256Mask.class,
4369                             Short256Vector::new);
4370 
4371     /** Species representing {@link ShortVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
4372     public static final VectorSpecies<Short> SPECIES_512
4373         = new ShortSpecies(VectorShape.S_512_BIT,
4374                             Short512Vector.class,
4375                             Short512Vector.Short512Mask.class,
4376                             Short512Vector::new);
4377 
4378     /** Species representing {@link ShortVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
4379     public static final VectorSpecies<Short> SPECIES_MAX
4380         = new ShortSpecies(VectorShape.S_Max_BIT,
4381                             ShortMaxVector.class,
4382                             ShortMaxVector.ShortMaxMask.class,
4383                             ShortMaxVector::new);
4384 
4385     /**
4386      * Preferred species for {@link ShortVector}s.
4387      * A preferred species is a species of maximal bit-size for the platform.
4388      */
4389     public static final VectorSpecies<Short> SPECIES_PREFERRED
4390         = (ShortSpecies) VectorSpecies.ofPreferred(short.class);
4391 }