1 /*
   2  * Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.Function;
  33 import java.util.function.UnaryOperator;
  34 
  35 import jdk.internal.misc.ScopedMemoryAccess;
  36 import jdk.internal.misc.Unsafe;
  37 import jdk.internal.vm.annotation.ForceInline;
  38 import jdk.internal.vm.vector.VectorSupport;
  39 
  40 import static jdk.internal.vm.vector.VectorSupport.*;
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 
  43 import static jdk.incubator.vector.VectorOperators.*;
  44 
  45 // -- This file was mechanically generated: Do not edit! -- //
  46 
  47 /**
  48  * A specialized {@link Vector} representing an ordered immutable sequence of
  49  * {@code float} values.
  50  */
  51 @SuppressWarnings("cast")  // warning: redundant cast
  52 public abstract class FloatVector extends AbstractVector<Float> {
  53 
  54     FloatVector(float[] vec) {
  55         super(vec);
  56     }
  57 
  58     static final int FORBID_OPCODE_KIND = VO_NOFP;
  59 
  60     @ForceInline
  61     static int opCode(Operator op) {
  62         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  63     }
  64     @ForceInline
  65     static int opCode(Operator op, int requireKind) {
  66         requireKind |= VO_OPCODE_VALID;
  67         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  68     }
  69     @ForceInline
  70     static boolean opKind(Operator op, int bit) {
  71         return VectorOperators.opKind(op, bit);
  72     }
  73 
  74     // Virtualized factories and operators,
  75     // coded with portable definitions.
  76     // These are all @ForceInline in case
  77     // they need to be used performantly.
  78     // The various shape-specific subclasses
  79     // also specialize them by wrapping
  80     // them in a call like this:
  81     //    return (Byte128Vector)
  82     //       super.bOp((Byte128Vector) o);
  83     // The purpose of that is to forcibly inline
  84     // the generic definition from this file
  85     // into a sharply type- and size-specific
  86     // wrapper in the subclass file, so that
  87     // the JIT can specialize the code.
  88     // The code is only inlined and expanded
  89     // if it gets hot.  Think of it as a cheap
  90     // and lazy version of C++ templates.
  91 
  92     // Virtualized getter
  93 
  94     /*package-private*/
  95     abstract float[] vec();
  96 
  97     // Virtualized constructors
  98 
  99     /**
 100      * Build a vector directly using my own constructor.
 101      * It is an error if the array is aliased elsewhere.
 102      */
 103     /*package-private*/
 104     abstract FloatVector vectorFactory(float[] vec);
 105 
 106     /**
 107      * Build a mask directly using my species.
 108      * It is an error if the array is aliased elsewhere.
 109      */
 110     /*package-private*/
 111     @ForceInline
 112     final
 113     AbstractMask<Float> maskFactory(boolean[] bits) {
 114         return vspecies().maskFactory(bits);
 115     }
 116 
 117     // Constant loader (takes dummy as vector arg)
 118     interface FVOp {
 119         float apply(int i);
 120     }
 121 
 122     /*package-private*/
 123     @ForceInline
 124     final
 125     FloatVector vOp(FVOp f) {
 126         float[] res = new float[length()];
 127         for (int i = 0; i < res.length; i++) {
 128             res[i] = f.apply(i);
 129         }
 130         return vectorFactory(res);
 131     }
 132 
 133     @ForceInline
 134     final
 135     FloatVector vOp(VectorMask<Float> m, FVOp f) {
 136         float[] res = new float[length()];
 137         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 138         for (int i = 0; i < res.length; i++) {
 139             if (mbits[i]) {
 140                 res[i] = f.apply(i);
 141             }
 142         }
 143         return vectorFactory(res);
 144     }
 145 
 146     // Unary operator
 147 
 148     /*package-private*/
 149     interface FUnOp {
 150         float apply(int i, float a);
 151     }
 152 
 153     /*package-private*/
 154     abstract
 155     FloatVector uOp(FUnOp f);
 156     @ForceInline
 157     final
 158     FloatVector uOpTemplate(FUnOp f) {
 159         float[] vec = vec();
 160         float[] res = new float[length()];
 161         for (int i = 0; i < res.length; i++) {
 162             res[i] = f.apply(i, vec[i]);
 163         }
 164         return vectorFactory(res);
 165     }
 166 
 167     /*package-private*/
 168     abstract
 169     FloatVector uOp(VectorMask<Float> m,
 170                              FUnOp f);
 171     @ForceInline
 172     final
 173     FloatVector uOpTemplate(VectorMask<Float> m,
 174                                      FUnOp f) {
 175         if (m == null) {
 176             return uOpTemplate(f);
 177         }
 178         float[] vec = vec();
 179         float[] res = new float[length()];
 180         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 181         for (int i = 0; i < res.length; i++) {
 182             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 183         }
 184         return vectorFactory(res);
 185     }
 186 
 187     // Binary operator
 188 
 189     /*package-private*/
 190     interface FBinOp {
 191         float apply(int i, float a, float b);
 192     }
 193 
 194     /*package-private*/
 195     abstract
 196     FloatVector bOp(Vector<Float> o,
 197                              FBinOp f);
 198     @ForceInline
 199     final
 200     FloatVector bOpTemplate(Vector<Float> o,
 201                                      FBinOp f) {
 202         float[] res = new float[length()];
 203         float[] vec1 = this.vec();
 204         float[] vec2 = ((FloatVector)o).vec();
 205         for (int i = 0; i < res.length; i++) {
 206             res[i] = f.apply(i, vec1[i], vec2[i]);
 207         }
 208         return vectorFactory(res);
 209     }
 210 
 211     /*package-private*/
 212     abstract
 213     FloatVector bOp(Vector<Float> o,
 214                              VectorMask<Float> m,
 215                              FBinOp f);
 216     @ForceInline
 217     final
 218     FloatVector bOpTemplate(Vector<Float> o,
 219                                      VectorMask<Float> m,
 220                                      FBinOp f) {
 221         if (m == null) {
 222             return bOpTemplate(o, f);
 223         }
 224         float[] res = new float[length()];
 225         float[] vec1 = this.vec();
 226         float[] vec2 = ((FloatVector)o).vec();
 227         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 228         for (int i = 0; i < res.length; i++) {
 229             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 230         }
 231         return vectorFactory(res);
 232     }
 233 
 234     // Ternary operator
 235 
 236     /*package-private*/
 237     interface FTriOp {
 238         float apply(int i, float a, float b, float c);
 239     }
 240 
 241     /*package-private*/
 242     abstract
 243     FloatVector tOp(Vector<Float> o1,
 244                              Vector<Float> o2,
 245                              FTriOp f);
 246     @ForceInline
 247     final
 248     FloatVector tOpTemplate(Vector<Float> o1,
 249                                      Vector<Float> o2,
 250                                      FTriOp f) {
 251         float[] res = new float[length()];
 252         float[] vec1 = this.vec();
 253         float[] vec2 = ((FloatVector)o1).vec();
 254         float[] vec3 = ((FloatVector)o2).vec();
 255         for (int i = 0; i < res.length; i++) {
 256             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 257         }
 258         return vectorFactory(res);
 259     }
 260 
 261     /*package-private*/
 262     abstract
 263     FloatVector tOp(Vector<Float> o1,
 264                              Vector<Float> o2,
 265                              VectorMask<Float> m,
 266                              FTriOp f);
 267     @ForceInline
 268     final
 269     FloatVector tOpTemplate(Vector<Float> o1,
 270                                      Vector<Float> o2,
 271                                      VectorMask<Float> m,
 272                                      FTriOp f) {
 273         if (m == null) {
 274             return tOpTemplate(o1, o2, f);
 275         }
 276         float[] res = new float[length()];
 277         float[] vec1 = this.vec();
 278         float[] vec2 = ((FloatVector)o1).vec();
 279         float[] vec3 = ((FloatVector)o2).vec();
 280         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 281         for (int i = 0; i < res.length; i++) {
 282             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 283         }
 284         return vectorFactory(res);
 285     }
 286 
 287     // Reduction operator
 288 
 289     /*package-private*/
 290     abstract
 291     float rOp(float v, VectorMask<Float> m, FBinOp f);
 292 
 293     @ForceInline
 294     final
 295     float rOpTemplate(float v, VectorMask<Float> m, FBinOp f) {
 296         if (m == null) {
 297             return rOpTemplate(v, f);
 298         }
 299         float[] vec = vec();
 300         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 301         for (int i = 0; i < vec.length; i++) {
 302             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
 303         }
 304         return v;
 305     }
 306 
 307     @ForceInline
 308     final
 309     float rOpTemplate(float v, FBinOp f) {
 310         float[] vec = vec();
 311         for (int i = 0; i < vec.length; i++) {
 312             v = f.apply(i, v, vec[i]);
 313         }
 314         return v;
 315     }
 316 
 317     // Memory reference
 318 
 319     /*package-private*/
 320     interface FLdOp<M> {
 321         float apply(M memory, int offset, int i);
 322     }
 323 
 324     /*package-private*/
 325     @ForceInline
 326     final
 327     <M> FloatVector ldOp(M memory, int offset,
 328                                   FLdOp<M> f) {
 329         //dummy; no vec = vec();
 330         float[] res = new float[length()];
 331         for (int i = 0; i < res.length; i++) {
 332             res[i] = f.apply(memory, offset, i);
 333         }
 334         return vectorFactory(res);
 335     }
 336 
 337     /*package-private*/
 338     @ForceInline
 339     final
 340     <M> FloatVector ldOp(M memory, int offset,
 341                                   VectorMask<Float> m,
 342                                   FLdOp<M> f) {
 343         //float[] vec = vec();
 344         float[] res = new float[length()];
 345         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 346         for (int i = 0; i < res.length; i++) {
 347             if (mbits[i]) {
 348                 res[i] = f.apply(memory, offset, i);
 349             }
 350         }
 351         return vectorFactory(res);
 352     }
 353 
 354     interface FStOp<M> {
 355         void apply(M memory, int offset, int i, float a);
 356     }
 357 
 358     /*package-private*/
 359     @ForceInline
 360     final
 361     <M> void stOp(M memory, int offset,
 362                   FStOp<M> f) {
 363         float[] vec = vec();
 364         for (int i = 0; i < vec.length; i++) {
 365             f.apply(memory, offset, i, vec[i]);
 366         }
 367     }
 368 
 369     /*package-private*/
 370     @ForceInline
 371     final
 372     <M> void stOp(M memory, int offset,
 373                   VectorMask<Float> m,
 374                   FStOp<M> f) {
 375         float[] vec = vec();
 376         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 377         for (int i = 0; i < vec.length; i++) {
 378             if (mbits[i]) {
 379                 f.apply(memory, offset, i, vec[i]);
 380             }
 381         }
 382     }
 383 
 384     // Binary test
 385 
 386     /*package-private*/
 387     interface FBinTest {
 388         boolean apply(int cond, int i, float a, float b);
 389     }
 390 
 391     /*package-private*/
 392     @ForceInline
 393     final
 394     AbstractMask<Float> bTest(int cond,
 395                                   Vector<Float> o,
 396                                   FBinTest f) {
 397         float[] vec1 = vec();
 398         float[] vec2 = ((FloatVector)o).vec();
 399         boolean[] bits = new boolean[length()];
 400         for (int i = 0; i < length(); i++){
 401             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 402         }
 403         return maskFactory(bits);
 404     }
 405 
 406 
 407     /*package-private*/
 408     @Override
 409     abstract FloatSpecies vspecies();
 410 
 411     /*package-private*/
 412     @ForceInline
 413     static long toBits(float e) {
 414         return  Float.floatToRawIntBits(e);
 415     }
 416 
 417     /*package-private*/
 418     @ForceInline
 419     static float fromBits(long bits) {
 420         return Float.intBitsToFloat((int)bits);
 421     }
 422 
 423     // Static factories (other than memory operations)
 424 
 425     // Note: A surprising behavior in javadoc
 426     // sometimes makes a lone /** {@inheritDoc} */
 427     // comment drop the method altogether,
 428     // apparently if the method mentions an
 429     // parameter or return type of Vector<Float>
 430     // instead of Vector<E> as originally specified.
 431     // Adding an empty HTML fragment appears to
 432     // nudge javadoc into providing the desired
 433     // inherited documentation.  We use the HTML
 434     // comment <!--workaround--> for this.
 435 
 436     /**
 437      * Returns a vector of the given species
 438      * where all lane elements are set to
 439      * zero, the default primitive value.
 440      *
 441      * @param species species of the desired zero vector
 442      * @return a zero vector
 443      */
 444     @ForceInline
 445     public static FloatVector zero(VectorSpecies<Float> species) {
 446         FloatSpecies vsp = (FloatSpecies) species;
 447         return VectorSupport.fromBitsCoerced(vsp.vectorType(), float.class, species.length(),
 448                         toBits(0.0f), MODE_BROADCAST, vsp,
 449                         ((bits_, s_) -> s_.rvOp(i -> bits_)));
 450     }
 451 
 452     /**
 453      * Returns a vector of the same species as this one
 454      * where all lane elements are set to
 455      * the primitive value {@code e}.
 456      *
 457      * The contents of the current vector are discarded;
 458      * only the species is relevant to this operation.
 459      *
 460      * <p> This method returns the value of this expression:
 461      * {@code FloatVector.broadcast(this.species(), e)}.
 462      *
 463      * @apiNote
 464      * Unlike the similar method named {@code broadcast()}
 465      * in the supertype {@code Vector}, this method does not
 466      * need to validate its argument, and cannot throw
 467      * {@code IllegalArgumentException}.  This method is
 468      * therefore preferable to the supertype method.
 469      *
 470      * @param e the value to broadcast
 471      * @return a vector where all lane elements are set to
 472      *         the primitive value {@code e}
 473      * @see #broadcast(VectorSpecies,long)
 474      * @see Vector#broadcast(long)
 475      * @see VectorSpecies#broadcast(long)
 476      */
 477     public abstract FloatVector broadcast(float e);
 478 
 479     /**
 480      * Returns a vector of the given species
 481      * where all lane elements are set to
 482      * the primitive value {@code e}.
 483      *
 484      * @param species species of the desired vector
 485      * @param e the value to broadcast
 486      * @return a vector where all lane elements are set to
 487      *         the primitive value {@code e}
 488      * @see #broadcast(long)
 489      * @see Vector#broadcast(long)
 490      * @see VectorSpecies#broadcast(long)
 491      */
 492     @ForceInline
 493     public static FloatVector broadcast(VectorSpecies<Float> species, float e) {
 494         FloatSpecies vsp = (FloatSpecies) species;
 495         return vsp.broadcast(e);
 496     }
 497 
 498     /*package-private*/
 499     @ForceInline
 500     final FloatVector broadcastTemplate(float e) {
 501         FloatSpecies vsp = vspecies();
 502         return vsp.broadcast(e);
 503     }
 504 
 505     /**
 506      * {@inheritDoc} <!--workaround-->
 507      * @apiNote
 508      * When working with vector subtypes like {@code FloatVector},
 509      * {@linkplain #broadcast(float) the more strongly typed method}
 510      * is typically selected.  It can be explicitly selected
 511      * using a cast: {@code v.broadcast((float)e)}.
 512      * The two expressions will produce numerically identical results.
 513      */
 514     @Override
 515     public abstract FloatVector broadcast(long e);
 516 
 517     /**
 518      * Returns a vector of the given species
 519      * where all lane elements are set to
 520      * the primitive value {@code e}.
 521      *
 522      * The {@code long} value must be accurately representable
 523      * by the {@code ETYPE} of the vector species, so that
 524      * {@code e==(long)(ETYPE)e}.
 525      *
 526      * @param species species of the desired vector
 527      * @param e the value to broadcast
 528      * @return a vector where all lane elements are set to
 529      *         the primitive value {@code e}
 530      * @throws IllegalArgumentException
 531      *         if the given {@code long} value cannot
 532      *         be represented by the vector's {@code ETYPE}
 533      * @see #broadcast(VectorSpecies,float)
 534      * @see VectorSpecies#checkValue(long)
 535      */
 536     @ForceInline
 537     public static FloatVector broadcast(VectorSpecies<Float> species, long e) {
 538         FloatSpecies vsp = (FloatSpecies) species;
 539         return vsp.broadcast(e);
 540     }
 541 
 542     /*package-private*/
 543     @ForceInline
 544     final FloatVector broadcastTemplate(long e) {
 545         return vspecies().broadcast(e);
 546     }
 547 
 548     // Unary lanewise support
 549 
 550     /**
 551      * {@inheritDoc} <!--workaround-->
 552      */
 553     public abstract
 554     FloatVector lanewise(VectorOperators.Unary op);
 555 
 556     @ForceInline
 557     final
 558     FloatVector lanewiseTemplate(VectorOperators.Unary op) {
 559         if (opKind(op, VO_SPECIAL)) {
 560             if (op == ZOMO) {
 561                 return blend(broadcast(-1), compare(NE, 0));
 562             }
 563         }
 564         int opc = opCode(op);
 565         return VectorSupport.unaryOp(
 566             opc, getClass(), null, float.class, length(),
 567             this, null,
 568             UN_IMPL.find(op, opc, FloatVector::unaryOperations));
 569     }
 570 
 571     /**
 572      * {@inheritDoc} <!--workaround-->
 573      */
 574     @Override
 575     public abstract
 576     FloatVector lanewise(VectorOperators.Unary op,
 577                                   VectorMask<Float> m);
 578     @ForceInline
 579     final
 580     FloatVector lanewiseTemplate(VectorOperators.Unary op,
 581                                           Class<? extends VectorMask<Float>> maskClass,
 582                                           VectorMask<Float> m) {
 583         m.check(maskClass, this);
 584         if (opKind(op, VO_SPECIAL)) {
 585             if (op == ZOMO) {
 586                 return blend(broadcast(-1), compare(NE, 0, m));
 587             }
 588         }
 589         int opc = opCode(op);
 590         return VectorSupport.unaryOp(
 591             opc, getClass(), maskClass, float.class, length(),
 592             this, m,
 593             UN_IMPL.find(op, opc, FloatVector::unaryOperations));
 594     }
 595 
 596     private static final
 597     ImplCache<Unary, UnaryOperation<FloatVector, VectorMask<Float>>>
 598         UN_IMPL = new ImplCache<>(Unary.class, FloatVector.class);
 599 
 600     private static UnaryOperation<FloatVector, VectorMask<Float>> unaryOperations(int opc_) {
 601         switch (opc_) {
 602             case VECTOR_OP_NEG: return (v0, m) ->
 603                     v0.uOp(m, (i, a) -> (float) -a);
 604             case VECTOR_OP_ABS: return (v0, m) ->
 605                     v0.uOp(m, (i, a) -> (float) Math.abs(a));
 606             case VECTOR_OP_SIN: return (v0, m) ->
 607                     v0.uOp(m, (i, a) -> (float) Math.sin(a));
 608             case VECTOR_OP_COS: return (v0, m) ->
 609                     v0.uOp(m, (i, a) -> (float) Math.cos(a));
 610             case VECTOR_OP_TAN: return (v0, m) ->
 611                     v0.uOp(m, (i, a) -> (float) Math.tan(a));
 612             case VECTOR_OP_ASIN: return (v0, m) ->
 613                     v0.uOp(m, (i, a) -> (float) Math.asin(a));
 614             case VECTOR_OP_ACOS: return (v0, m) ->
 615                     v0.uOp(m, (i, a) -> (float) Math.acos(a));
 616             case VECTOR_OP_ATAN: return (v0, m) ->
 617                     v0.uOp(m, (i, a) -> (float) Math.atan(a));
 618             case VECTOR_OP_EXP: return (v0, m) ->
 619                     v0.uOp(m, (i, a) -> (float) Math.exp(a));
 620             case VECTOR_OP_LOG: return (v0, m) ->
 621                     v0.uOp(m, (i, a) -> (float) Math.log(a));
 622             case VECTOR_OP_LOG10: return (v0, m) ->
 623                     v0.uOp(m, (i, a) -> (float) Math.log10(a));
 624             case VECTOR_OP_SQRT: return (v0, m) ->
 625                     v0.uOp(m, (i, a) -> (float) Math.sqrt(a));
 626             case VECTOR_OP_CBRT: return (v0, m) ->
 627                     v0.uOp(m, (i, a) -> (float) Math.cbrt(a));
 628             case VECTOR_OP_SINH: return (v0, m) ->
 629                     v0.uOp(m, (i, a) -> (float) Math.sinh(a));
 630             case VECTOR_OP_COSH: return (v0, m) ->
 631                     v0.uOp(m, (i, a) -> (float) Math.cosh(a));
 632             case VECTOR_OP_TANH: return (v0, m) ->
 633                     v0.uOp(m, (i, a) -> (float) Math.tanh(a));
 634             case VECTOR_OP_EXPM1: return (v0, m) ->
 635                     v0.uOp(m, (i, a) -> (float) Math.expm1(a));
 636             case VECTOR_OP_LOG1P: return (v0, m) ->
 637                     v0.uOp(m, (i, a) -> (float) Math.log1p(a));
 638             default: return null;
 639         }
 640     }
 641 
 642     // Binary lanewise support
 643 
 644     /**
 645      * {@inheritDoc} <!--workaround-->
 646      * @see #lanewise(VectorOperators.Binary,float)
 647      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 648      */
 649     @Override
 650     public abstract
 651     FloatVector lanewise(VectorOperators.Binary op,
 652                                   Vector<Float> v);
 653     @ForceInline
 654     final
 655     FloatVector lanewiseTemplate(VectorOperators.Binary op,
 656                                           Vector<Float> v) {
 657         FloatVector that = (FloatVector) v;
 658         that.check(this);
 659 
 660         if (opKind(op, VO_SPECIAL )) {
 661             if (op == FIRST_NONZERO) {
 662                 // FIXME: Support this in the JIT.
 663                 VectorMask<Integer> thisNZ
 664                     = this.viewAsIntegralLanes().compare(NE, (int) 0);
 665                 that = that.blend((float) 0, thisNZ.cast(vspecies()));
 666                 op = OR_UNCHECKED;
 667                 // FIXME: Support OR_UNCHECKED on float/double also!
 668                 return this.viewAsIntegralLanes()
 669                     .lanewise(op, that.viewAsIntegralLanes())
 670                     .viewAsFloatingLanes();
 671             }
 672         }
 673 
 674         int opc = opCode(op);
 675         return VectorSupport.binaryOp(
 676             opc, getClass(), null, float.class, length(),
 677             this, that, null,
 678             BIN_IMPL.find(op, opc, FloatVector::binaryOperations));
 679     }
 680 
 681     /**
 682      * {@inheritDoc} <!--workaround-->
 683      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 684      */
 685     @Override
 686     public abstract
 687     FloatVector lanewise(VectorOperators.Binary op,
 688                                   Vector<Float> v,
 689                                   VectorMask<Float> m);
 690     @ForceInline
 691     final
 692     FloatVector lanewiseTemplate(VectorOperators.Binary op,
 693                                           Class<? extends VectorMask<Float>> maskClass,
 694                                           Vector<Float> v, VectorMask<Float> m) {
 695         FloatVector that = (FloatVector) v;
 696         that.check(this);
 697         m.check(maskClass, this);
 698 
 699         if (opKind(op, VO_SPECIAL )) {
 700             if (op == FIRST_NONZERO) {
 701                 return blend(lanewise(op, v), m);
 702             }
 703         }
 704 
 705         int opc = opCode(op);
 706         return VectorSupport.binaryOp(
 707             opc, getClass(), maskClass, float.class, length(),
 708             this, that, m,
 709             BIN_IMPL.find(op, opc, FloatVector::binaryOperations));
 710     }
 711 
 712     private static final
 713     ImplCache<Binary, BinaryOperation<FloatVector, VectorMask<Float>>>
 714         BIN_IMPL = new ImplCache<>(Binary.class, FloatVector.class);
 715 
 716     private static BinaryOperation<FloatVector, VectorMask<Float>> binaryOperations(int opc_) {
 717         switch (opc_) {
 718             case VECTOR_OP_ADD: return (v0, v1, vm) ->
 719                     v0.bOp(v1, vm, (i, a, b) -> (float)(a + b));
 720             case VECTOR_OP_SUB: return (v0, v1, vm) ->
 721                     v0.bOp(v1, vm, (i, a, b) -> (float)(a - b));
 722             case VECTOR_OP_MUL: return (v0, v1, vm) ->
 723                     v0.bOp(v1, vm, (i, a, b) -> (float)(a * b));
 724             case VECTOR_OP_DIV: return (v0, v1, vm) ->
 725                     v0.bOp(v1, vm, (i, a, b) -> (float)(a / b));
 726             case VECTOR_OP_MAX: return (v0, v1, vm) ->
 727                     v0.bOp(v1, vm, (i, a, b) -> (float)Math.max(a, b));
 728             case VECTOR_OP_MIN: return (v0, v1, vm) ->
 729                     v0.bOp(v1, vm, (i, a, b) -> (float)Math.min(a, b));
 730             case VECTOR_OP_OR: return (v0, v1, vm) ->
 731                     v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
 732             case VECTOR_OP_ATAN2: return (v0, v1, vm) ->
 733                     v0.bOp(v1, vm, (i, a, b) -> (float) Math.atan2(a, b));
 734             case VECTOR_OP_POW: return (v0, v1, vm) ->
 735                     v0.bOp(v1, vm, (i, a, b) -> (float) Math.pow(a, b));
 736             case VECTOR_OP_HYPOT: return (v0, v1, vm) ->
 737                     v0.bOp(v1, vm, (i, a, b) -> (float) Math.hypot(a, b));
 738             default: return null;
 739         }
 740     }
 741 
 742     // FIXME: Maybe all of the public final methods in this file (the
 743     // simple ones that just call lanewise) should be pushed down to
 744     // the X-VectorBits template.  They can't optimize properly at
 745     // this level, and must rely on inlining.  Does it work?
 746     // (If it works, of course keep the code here.)
 747 
 748     /**
 749      * Combines the lane values of this vector
 750      * with the value of a broadcast scalar.
 751      *
 752      * This is a lane-wise binary operation which applies
 753      * the selected operation to each lane.
 754      * The return value will be equal to this expression:
 755      * {@code this.lanewise(op, this.broadcast(e))}.
 756      *
 757      * @param op the operation used to process lane values
 758      * @param e the input scalar
 759      * @return the result of applying the operation lane-wise
 760      *         to the two input vectors
 761      * @throws UnsupportedOperationException if this vector does
 762      *         not support the requested operation
 763      * @see #lanewise(VectorOperators.Binary,Vector)
 764      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 765      */
 766     @ForceInline
 767     public final
 768     FloatVector lanewise(VectorOperators.Binary op,
 769                                   float e) {
 770         return lanewise(op, broadcast(e));
 771     }
 772 
 773     /**
 774      * Combines the lane values of this vector
 775      * with the value of a broadcast scalar,
 776      * with selection of lane elements controlled by a mask.
 777      *
 778      * This is a masked lane-wise binary operation which applies
 779      * the selected operation to each lane.
 780      * The return value will be equal to this expression:
 781      * {@code this.lanewise(op, this.broadcast(e), m)}.
 782      *
 783      * @param op the operation used to process lane values
 784      * @param e the input scalar
 785      * @param m the mask controlling lane selection
 786      * @return the result of applying the operation lane-wise
 787      *         to the input vector and the scalar
 788      * @throws UnsupportedOperationException if this vector does
 789      *         not support the requested operation
 790      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 791      * @see #lanewise(VectorOperators.Binary,float)
 792      */
 793     @ForceInline
 794     public final
 795     FloatVector lanewise(VectorOperators.Binary op,
 796                                   float e,
 797                                   VectorMask<Float> m) {
 798         return lanewise(op, broadcast(e), m);
 799     }
 800 
 801     /**
 802      * {@inheritDoc} <!--workaround-->
 803      * @apiNote
 804      * When working with vector subtypes like {@code FloatVector},
 805      * {@linkplain #lanewise(VectorOperators.Binary,float)
 806      * the more strongly typed method}
 807      * is typically selected.  It can be explicitly selected
 808      * using a cast: {@code v.lanewise(op,(float)e)}.
 809      * The two expressions will produce numerically identical results.
 810      */
 811     @ForceInline
 812     public final
 813     FloatVector lanewise(VectorOperators.Binary op,
 814                                   long e) {
 815         float e1 = (float) e;
 816         if ((long)e1 != e) {
 817             vspecies().checkValue(e);  // for exception
 818         }
 819         return lanewise(op, e1);
 820     }
 821 
 822     /**
 823      * {@inheritDoc} <!--workaround-->
 824      * @apiNote
 825      * When working with vector subtypes like {@code FloatVector},
 826      * {@linkplain #lanewise(VectorOperators.Binary,float,VectorMask)
 827      * the more strongly typed method}
 828      * is typically selected.  It can be explicitly selected
 829      * using a cast: {@code v.lanewise(op,(float)e,m)}.
 830      * The two expressions will produce numerically identical results.
 831      */
 832     @ForceInline
 833     public final
 834     FloatVector lanewise(VectorOperators.Binary op,
 835                                   long e, VectorMask<Float> m) {
 836         float e1 = (float) e;
 837         if ((long)e1 != e) {
 838             vspecies().checkValue(e);  // for exception
 839         }
 840         return lanewise(op, e1, m);
 841     }
 842 
 843 
 844     // Ternary lanewise support
 845 
 846     // Ternary operators come in eight variations:
 847     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 848     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 849 
 850     // It is annoying to support all of these variations of masking
 851     // and broadcast, but it would be more surprising not to continue
 852     // the obvious pattern started by unary and binary.
 853 
 854    /**
 855      * {@inheritDoc} <!--workaround-->
 856      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 857      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 858      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
 859      * @see #lanewise(VectorOperators.Ternary,float,float)
 860      * @see #lanewise(VectorOperators.Ternary,Vector,float)
 861      * @see #lanewise(VectorOperators.Ternary,float,Vector)
 862      */
 863     @Override
 864     public abstract
 865     FloatVector lanewise(VectorOperators.Ternary op,
 866                                                   Vector<Float> v1,
 867                                                   Vector<Float> v2);
 868     @ForceInline
 869     final
 870     FloatVector lanewiseTemplate(VectorOperators.Ternary op,
 871                                           Vector<Float> v1,
 872                                           Vector<Float> v2) {
 873         FloatVector that = (FloatVector) v1;
 874         FloatVector tother = (FloatVector) v2;
 875         // It's a word: https://www.dictionary.com/browse/tother
 876         // See also Chapter 11 of Dickens, Our Mutual Friend:
 877         // "Totherest Governor," replied Mr Riderhood...
 878         that.check(this);
 879         tother.check(this);
 880         int opc = opCode(op);
 881         return VectorSupport.ternaryOp(
 882             opc, getClass(), null, float.class, length(),
 883             this, that, tother, null,
 884             TERN_IMPL.find(op, opc, FloatVector::ternaryOperations));
 885     }
 886 
 887     /**
 888      * {@inheritDoc} <!--workaround-->
 889      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 890      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 891      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
 892      */
 893     @Override
 894     public abstract
 895     FloatVector lanewise(VectorOperators.Ternary op,
 896                                   Vector<Float> v1,
 897                                   Vector<Float> v2,
 898                                   VectorMask<Float> m);
 899     @ForceInline
 900     final
 901     FloatVector lanewiseTemplate(VectorOperators.Ternary op,
 902                                           Class<? extends VectorMask<Float>> maskClass,
 903                                           Vector<Float> v1,
 904                                           Vector<Float> v2,
 905                                           VectorMask<Float> m) {
 906         FloatVector that = (FloatVector) v1;
 907         FloatVector tother = (FloatVector) v2;
 908         // It's a word: https://www.dictionary.com/browse/tother
 909         // See also Chapter 11 of Dickens, Our Mutual Friend:
 910         // "Totherest Governor," replied Mr Riderhood...
 911         that.check(this);
 912         tother.check(this);
 913         m.check(maskClass, this);
 914 
 915         int opc = opCode(op);
 916         return VectorSupport.ternaryOp(
 917             opc, getClass(), maskClass, float.class, length(),
 918             this, that, tother, m,
 919             TERN_IMPL.find(op, opc, FloatVector::ternaryOperations));
 920     }
 921 
 922     private static final
 923     ImplCache<Ternary, TernaryOperation<FloatVector, VectorMask<Float>>>
 924         TERN_IMPL = new ImplCache<>(Ternary.class, FloatVector.class);
 925 
 926     private static TernaryOperation<FloatVector, VectorMask<Float>> ternaryOperations(int opc_) {
 927         switch (opc_) {
 928             case VECTOR_OP_FMA: return (v0, v1_, v2_, m) ->
 929                     v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
 930             default: return null;
 931         }
 932     }
 933 
 934     /**
 935      * Combines the lane values of this vector
 936      * with the values of two broadcast scalars.
 937      *
 938      * This is a lane-wise ternary operation which applies
 939      * the selected operation to each lane.
 940      * The return value will be equal to this expression:
 941      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
 942      *
 943      * @param op the operation used to combine lane values
 944      * @param e1 the first input scalar
 945      * @param e2 the second input scalar
 946      * @return the result of applying the operation lane-wise
 947      *         to the input vector and the scalars
 948      * @throws UnsupportedOperationException if this vector does
 949      *         not support the requested operation
 950      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 951      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 952      */
 953     @ForceInline
 954     public final
 955     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
 956                                   float e1,
 957                                   float e2) {
 958         return lanewise(op, broadcast(e1), broadcast(e2));
 959     }
 960 
 961     /**
 962      * Combines the lane values of this vector
 963      * with the values of two broadcast scalars,
 964      * with selection of lane elements controlled by a mask.
 965      *
 966      * This is a masked lane-wise ternary operation which applies
 967      * the selected operation to each lane.
 968      * The return value will be equal to this expression:
 969      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
 970      *
 971      * @param op the operation used to combine lane values
 972      * @param e1 the first input scalar
 973      * @param e2 the second input scalar
 974      * @param m the mask controlling lane selection
 975      * @return the result of applying the operation lane-wise
 976      *         to the input vector and the scalars
 977      * @throws UnsupportedOperationException if this vector does
 978      *         not support the requested operation
 979      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
 980      * @see #lanewise(VectorOperators.Ternary,float,float)
 981      */
 982     @ForceInline
 983     public final
 984     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
 985                                   float e1,
 986                                   float e2,
 987                                   VectorMask<Float> m) {
 988         return lanewise(op, broadcast(e1), broadcast(e2), m);
 989     }
 990 
 991     /**
 992      * Combines the lane values of this vector
 993      * with the values of another vector and a broadcast scalar.
 994      *
 995      * This is a lane-wise ternary operation which applies
 996      * the selected operation to each lane.
 997      * The return value will be equal to this expression:
 998      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
 999      *
1000      * @param op the operation used to combine lane values
1001      * @param v1 the other input vector
1002      * @param e2 the input scalar
1003      * @return the result of applying the operation lane-wise
1004      *         to the input vectors and the scalar
1005      * @throws UnsupportedOperationException if this vector does
1006      *         not support the requested operation
1007      * @see #lanewise(VectorOperators.Ternary,float,float)
1008      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
1009      */
1010     @ForceInline
1011     public final
1012     FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1013                                   Vector<Float> v1,
1014                                   float e2) {
1015         return lanewise(op, v1, broadcast(e2));
1016     }
1017 
1018     /**
1019      * Combines the lane values of this vector
1020      * with the values of another vector and a broadcast scalar,
1021      * with selection of lane elements controlled by a mask.
1022      *
1023      * This is a masked lane-wise ternary operation which applies
1024      * the selected operation to each lane.
1025      * The return value will be equal to this expression:
1026      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1027      *
1028      * @param op the operation used to combine lane values
1029      * @param v1 the other input vector
1030      * @param e2 the input scalar
1031      * @param m the mask controlling lane selection
1032      * @return the result of applying the operation lane-wise
1033      *         to the input vectors and the scalar
1034      * @throws UnsupportedOperationException if this vector does
1035      *         not support the requested operation
1036      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1037      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
1038      * @see #lanewise(VectorOperators.Ternary,Vector,float)
1039      */
1040     @ForceInline
1041     public final
1042     FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1043                                   Vector<Float> v1,
1044                                   float e2,
1045                                   VectorMask<Float> m) {
1046         return lanewise(op, v1, broadcast(e2), m);
1047     }
1048 
1049     /**
1050      * Combines the lane values of this vector
1051      * with the values of another vector and a broadcast scalar.
1052      *
1053      * This is a lane-wise ternary operation which applies
1054      * the selected operation to each lane.
1055      * The return value will be equal to this expression:
1056      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1057      *
1058      * @param op the operation used to combine lane values
1059      * @param e1 the input scalar
1060      * @param v2 the other input vector
1061      * @return the result of applying the operation lane-wise
1062      *         to the input vectors and the scalar
1063      * @throws UnsupportedOperationException if this vector does
1064      *         not support the requested operation
1065      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1066      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
1067      */
1068     @ForceInline
1069     public final
1070     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1071                                   float e1,
1072                                   Vector<Float> v2) {
1073         return lanewise(op, broadcast(e1), v2);
1074     }
1075 
1076     /**
1077      * Combines the lane values of this vector
1078      * with the values of another vector and a broadcast scalar,
1079      * with selection of lane elements controlled by a mask.
1080      *
1081      * This is a masked lane-wise ternary operation which applies
1082      * the selected operation to each lane.
1083      * The return value will be equal to this expression:
1084      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1085      *
1086      * @param op the operation used to combine lane values
1087      * @param e1 the input scalar
1088      * @param v2 the other input vector
1089      * @param m the mask controlling lane selection
1090      * @return the result of applying the operation lane-wise
1091      *         to the input vectors and the scalar
1092      * @throws UnsupportedOperationException if this vector does
1093      *         not support the requested operation
1094      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1095      * @see #lanewise(VectorOperators.Ternary,float,Vector)
1096      */
1097     @ForceInline
1098     public final
1099     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1100                                   float e1,
1101                                   Vector<Float> v2,
1102                                   VectorMask<Float> m) {
1103         return lanewise(op, broadcast(e1), v2, m);
1104     }
1105 
1106     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1107     // https://en.wikipedia.org/wiki/Ogdoad
1108 
1109     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1110     //
1111     // These include masked and non-masked versions.
1112     // This subclass adds broadcast (masked or not).
1113 
1114     /**
1115      * {@inheritDoc} <!--workaround-->
1116      * @see #add(float)
1117      */
1118     @Override
1119     @ForceInline
1120     public final FloatVector add(Vector<Float> v) {
1121         return lanewise(ADD, v);
1122     }
1123 
1124     /**
1125      * Adds this vector to the broadcast of an input scalar.
1126      *
1127      * This is a lane-wise binary operation which applies
1128      * the primitive addition operation ({@code +}) to each lane.
1129      *
1130      * This method is also equivalent to the expression
1131      * {@link #lanewise(VectorOperators.Binary,float)
1132      *    lanewise}{@code (}{@link VectorOperators#ADD
1133      *    ADD}{@code , e)}.
1134      *
1135      * @param e the input scalar
1136      * @return the result of adding each lane of this vector to the scalar
1137      * @see #add(Vector)
1138      * @see #broadcast(float)
1139      * @see #add(float,VectorMask)
1140      * @see VectorOperators#ADD
1141      * @see #lanewise(VectorOperators.Binary,Vector)
1142      * @see #lanewise(VectorOperators.Binary,float)
1143      */
1144     @ForceInline
1145     public final
1146     FloatVector add(float e) {
1147         return lanewise(ADD, e);
1148     }
1149 
1150     /**
1151      * {@inheritDoc} <!--workaround-->
1152      * @see #add(float,VectorMask)
1153      */
1154     @Override
1155     @ForceInline
1156     public final FloatVector add(Vector<Float> v,
1157                                           VectorMask<Float> m) {
1158         return lanewise(ADD, v, m);
1159     }
1160 
1161     /**
1162      * Adds this vector to the broadcast of an input scalar,
1163      * selecting lane elements controlled by a mask.
1164      *
1165      * This is a masked lane-wise binary operation which applies
1166      * the primitive addition operation ({@code +}) to each lane.
1167      *
1168      * This method is also equivalent to the expression
1169      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1170      *    lanewise}{@code (}{@link VectorOperators#ADD
1171      *    ADD}{@code , s, m)}.
1172      *
1173      * @param e the input scalar
1174      * @param m the mask controlling lane selection
1175      * @return the result of adding each lane of this vector to the scalar
1176      * @see #add(Vector,VectorMask)
1177      * @see #broadcast(float)
1178      * @see #add(float)
1179      * @see VectorOperators#ADD
1180      * @see #lanewise(VectorOperators.Binary,Vector)
1181      * @see #lanewise(VectorOperators.Binary,float)
1182      */
1183     @ForceInline
1184     public final FloatVector add(float e,
1185                                           VectorMask<Float> m) {
1186         return lanewise(ADD, e, m);
1187     }
1188 
1189     /**
1190      * {@inheritDoc} <!--workaround-->
1191      * @see #sub(float)
1192      */
1193     @Override
1194     @ForceInline
1195     public final FloatVector sub(Vector<Float> v) {
1196         return lanewise(SUB, v);
1197     }
1198 
1199     /**
1200      * Subtracts an input scalar from this vector.
1201      *
1202      * This is a masked lane-wise binary operation which applies
1203      * the primitive subtraction operation ({@code -}) to each lane.
1204      *
1205      * This method is also equivalent to the expression
1206      * {@link #lanewise(VectorOperators.Binary,float)
1207      *    lanewise}{@code (}{@link VectorOperators#SUB
1208      *    SUB}{@code , e)}.
1209      *
1210      * @param e the input scalar
1211      * @return the result of subtracting the scalar from each lane of this vector
1212      * @see #sub(Vector)
1213      * @see #broadcast(float)
1214      * @see #sub(float,VectorMask)
1215      * @see VectorOperators#SUB
1216      * @see #lanewise(VectorOperators.Binary,Vector)
1217      * @see #lanewise(VectorOperators.Binary,float)
1218      */
1219     @ForceInline
1220     public final FloatVector sub(float e) {
1221         return lanewise(SUB, e);
1222     }
1223 
1224     /**
1225      * {@inheritDoc} <!--workaround-->
1226      * @see #sub(float,VectorMask)
1227      */
1228     @Override
1229     @ForceInline
1230     public final FloatVector sub(Vector<Float> v,
1231                                           VectorMask<Float> m) {
1232         return lanewise(SUB, v, m);
1233     }
1234 
1235     /**
1236      * Subtracts an input scalar from this vector
1237      * under the control of a mask.
1238      *
1239      * This is a masked lane-wise binary operation which applies
1240      * the primitive subtraction operation ({@code -}) to each lane.
1241      *
1242      * This method is also equivalent to the expression
1243      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1244      *    lanewise}{@code (}{@link VectorOperators#SUB
1245      *    SUB}{@code , s, m)}.
1246      *
1247      * @param e the input scalar
1248      * @param m the mask controlling lane selection
1249      * @return the result of subtracting the scalar from each lane of this vector
1250      * @see #sub(Vector,VectorMask)
1251      * @see #broadcast(float)
1252      * @see #sub(float)
1253      * @see VectorOperators#SUB
1254      * @see #lanewise(VectorOperators.Binary,Vector)
1255      * @see #lanewise(VectorOperators.Binary,float)
1256      */
1257     @ForceInline
1258     public final FloatVector sub(float e,
1259                                           VectorMask<Float> m) {
1260         return lanewise(SUB, e, m);
1261     }
1262 
1263     /**
1264      * {@inheritDoc} <!--workaround-->
1265      * @see #mul(float)
1266      */
1267     @Override
1268     @ForceInline
1269     public final FloatVector mul(Vector<Float> v) {
1270         return lanewise(MUL, v);
1271     }
1272 
1273     /**
1274      * Multiplies this vector by the broadcast of an input scalar.
1275      *
1276      * This is a lane-wise binary operation which applies
1277      * the primitive multiplication operation ({@code *}) to each lane.
1278      *
1279      * This method is also equivalent to the expression
1280      * {@link #lanewise(VectorOperators.Binary,float)
1281      *    lanewise}{@code (}{@link VectorOperators#MUL
1282      *    MUL}{@code , e)}.
1283      *
1284      * @param e the input scalar
1285      * @return the result of multiplying this vector by the given scalar
1286      * @see #mul(Vector)
1287      * @see #broadcast(float)
1288      * @see #mul(float,VectorMask)
1289      * @see VectorOperators#MUL
1290      * @see #lanewise(VectorOperators.Binary,Vector)
1291      * @see #lanewise(VectorOperators.Binary,float)
1292      */
1293     @ForceInline
1294     public final FloatVector mul(float e) {
1295         return lanewise(MUL, e);
1296     }
1297 
1298     /**
1299      * {@inheritDoc} <!--workaround-->
1300      * @see #mul(float,VectorMask)
1301      */
1302     @Override
1303     @ForceInline
1304     public final FloatVector mul(Vector<Float> v,
1305                                           VectorMask<Float> m) {
1306         return lanewise(MUL, v, m);
1307     }
1308 
1309     /**
1310      * Multiplies this vector by the broadcast of an input scalar,
1311      * selecting lane elements controlled by a mask.
1312      *
1313      * This is a masked lane-wise binary operation which applies
1314      * the primitive multiplication operation ({@code *}) to each lane.
1315      *
1316      * This method is also equivalent to the expression
1317      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1318      *    lanewise}{@code (}{@link VectorOperators#MUL
1319      *    MUL}{@code , s, m)}.
1320      *
1321      * @param e the input scalar
1322      * @param m the mask controlling lane selection
1323      * @return the result of muling each lane of this vector to the scalar
1324      * @see #mul(Vector,VectorMask)
1325      * @see #broadcast(float)
1326      * @see #mul(float)
1327      * @see VectorOperators#MUL
1328      * @see #lanewise(VectorOperators.Binary,Vector)
1329      * @see #lanewise(VectorOperators.Binary,float)
1330      */
1331     @ForceInline
1332     public final FloatVector mul(float e,
1333                                           VectorMask<Float> m) {
1334         return lanewise(MUL, e, m);
1335     }
1336 
1337     /**
1338      * {@inheritDoc} <!--workaround-->
1339      * @apiNote Because the underlying scalar operator is an IEEE
1340      * floating point number, division by zero in fact will
1341      * not throw an exception, but will yield a signed
1342      * infinity or NaN.
1343      */
1344     @Override
1345     @ForceInline
1346     public final FloatVector div(Vector<Float> v) {
1347         return lanewise(DIV, v);
1348     }
1349 
1350     /**
1351      * Divides this vector by the broadcast of an input scalar.
1352      *
1353      * This is a lane-wise binary operation which applies
1354      * the primitive division operation ({@code /}) to each lane.
1355      *
1356      * This method is also equivalent to the expression
1357      * {@link #lanewise(VectorOperators.Binary,float)
1358      *    lanewise}{@code (}{@link VectorOperators#DIV
1359      *    DIV}{@code , e)}.
1360      *
1361      * @apiNote Because the underlying scalar operator is an IEEE
1362      * floating point number, division by zero in fact will
1363      * not throw an exception, but will yield a signed
1364      * infinity or NaN.
1365      *
1366      * @param e the input scalar
1367      * @return the result of dividing each lane of this vector by the scalar
1368      * @see #div(Vector)
1369      * @see #broadcast(float)
1370      * @see #div(float,VectorMask)
1371      * @see VectorOperators#DIV
1372      * @see #lanewise(VectorOperators.Binary,Vector)
1373      * @see #lanewise(VectorOperators.Binary,float)
1374      */
1375     @ForceInline
1376     public final FloatVector div(float e) {
1377         return lanewise(DIV, e);
1378     }
1379 
1380     /**
1381      * {@inheritDoc} <!--workaround-->
1382      * @see #div(float,VectorMask)
1383      * @apiNote Because the underlying scalar operator is an IEEE
1384      * floating point number, division by zero in fact will
1385      * not throw an exception, but will yield a signed
1386      * infinity or NaN.
1387      */
1388     @Override
1389     @ForceInline
1390     public final FloatVector div(Vector<Float> v,
1391                                           VectorMask<Float> m) {
1392         return lanewise(DIV, v, m);
1393     }
1394 
1395     /**
1396      * Divides this vector by the broadcast of an input scalar,
1397      * selecting lane elements controlled by a mask.
1398      *
1399      * This is a masked lane-wise binary operation which applies
1400      * the primitive division operation ({@code /}) to each lane.
1401      *
1402      * This method is also equivalent to the expression
1403      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1404      *    lanewise}{@code (}{@link VectorOperators#DIV
1405      *    DIV}{@code , s, m)}.
1406      *
1407      * @apiNote Because the underlying scalar operator is an IEEE
1408      * floating point number, division by zero in fact will
1409      * not throw an exception, but will yield a signed
1410      * infinity or NaN.
1411      *
1412      * @param e the input scalar
1413      * @param m the mask controlling lane selection
1414      * @return the result of dividing each lane of this vector by the scalar
1415      * @see #div(Vector,VectorMask)
1416      * @see #broadcast(float)
1417      * @see #div(float)
1418      * @see VectorOperators#DIV
1419      * @see #lanewise(VectorOperators.Binary,Vector)
1420      * @see #lanewise(VectorOperators.Binary,float)
1421      */
1422     @ForceInline
1423     public final FloatVector div(float e,
1424                                           VectorMask<Float> m) {
1425         return lanewise(DIV, e, m);
1426     }
1427 
1428     /// END OF FULL-SERVICE BINARY METHODS
1429 
1430     /// SECOND-TIER BINARY METHODS
1431     //
1432     // There are no masked versions.
1433 
1434     /**
1435      * {@inheritDoc} <!--workaround-->
1436      * @apiNote
1437      * For this method, floating point negative
1438      * zero {@code -0.0} is treated as a value distinct from, and less
1439      * than the default value (positive zero).
1440      */
1441     @Override
1442     @ForceInline
1443     public final FloatVector min(Vector<Float> v) {
1444         return lanewise(MIN, v);
1445     }
1446 
1447     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1448     /**
1449      * Computes the smaller of this vector and the broadcast of an input scalar.
1450      *
1451      * This is a lane-wise binary operation which applies the
1452      * operation {@code Math.min()} to each pair of
1453      * corresponding lane values.
1454      *
1455      * This method is also equivalent to the expression
1456      * {@link #lanewise(VectorOperators.Binary,float)
1457      *    lanewise}{@code (}{@link VectorOperators#MIN
1458      *    MIN}{@code , e)}.
1459      *
1460      * @param e the input scalar
1461      * @return the result of multiplying this vector by the given scalar
1462      * @see #min(Vector)
1463      * @see #broadcast(float)
1464      * @see VectorOperators#MIN
1465      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1466      * @apiNote
1467      * For this method, floating point negative
1468      * zero {@code -0.0} is treated as a value distinct from, and less
1469      * than the default value (positive zero).
1470      */
1471     @ForceInline
1472     public final FloatVector min(float e) {
1473         return lanewise(MIN, e);
1474     }
1475 
1476     /**
1477      * {@inheritDoc} <!--workaround-->
1478      * @apiNote
1479      * For this method, floating point negative
1480      * zero {@code -0.0} is treated as a value distinct from, and less
1481      * than the default value (positive zero).
1482      */
1483     @Override
1484     @ForceInline
1485     public final FloatVector max(Vector<Float> v) {
1486         return lanewise(MAX, v);
1487     }
1488 
1489     /**
1490      * Computes the larger of this vector and the broadcast of an input scalar.
1491      *
1492      * This is a lane-wise binary operation which applies the
1493      * operation {@code Math.max()} to each pair of
1494      * corresponding lane values.
1495      *
1496      * This method is also equivalent to the expression
1497      * {@link #lanewise(VectorOperators.Binary,float)
1498      *    lanewise}{@code (}{@link VectorOperators#MAX
1499      *    MAX}{@code , e)}.
1500      *
1501      * @param e the input scalar
1502      * @return the result of multiplying this vector by the given scalar
1503      * @see #max(Vector)
1504      * @see #broadcast(float)
1505      * @see VectorOperators#MAX
1506      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1507      * @apiNote
1508      * For this method, floating point negative
1509      * zero {@code -0.0} is treated as a value distinct from, and less
1510      * than the default value (positive zero).
1511      */
1512     @ForceInline
1513     public final FloatVector max(float e) {
1514         return lanewise(MAX, e);
1515     }
1516 
1517 
1518     // common FP operator: pow
1519     /**
1520      * Raises this vector to the power of a second input vector.
1521      *
1522      * This is a lane-wise binary operation which applies an operation
1523      * conforming to the specification of
1524      * {@link Math#pow Math.pow(a,b)}
1525      * to each pair of corresponding lane values.
1526      * The operation is adapted to cast the operands and the result,
1527      * specifically widening {@code float} operands to {@code double}
1528      * operands and narrowing the {@code double} result to a {@code float}
1529      * result.
1530      *
1531      * This method is also equivalent to the expression
1532      * {@link #lanewise(VectorOperators.Binary,Vector)
1533      *    lanewise}{@code (}{@link VectorOperators#POW
1534      *    POW}{@code , b)}.
1535      *
1536      * <p>
1537      * This is not a full-service named operation like
1538      * {@link #add(Vector) add}.  A masked version of
1539      * this operation is not directly available
1540      * but may be obtained via the masked version of
1541      * {@code lanewise}.
1542      *
1543      * @param b a vector exponent by which to raise this vector
1544      * @return the {@code b}-th power of this vector
1545      * @see #pow(float)
1546      * @see VectorOperators#POW
1547      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1548      */
1549     @ForceInline
1550     public final FloatVector pow(Vector<Float> b) {
1551         return lanewise(POW, b);
1552     }
1553 
1554     /**
1555      * Raises this vector to a scalar power.
1556      *
1557      * This is a lane-wise binary operation which applies an operation
1558      * conforming to the specification of
1559      * {@link Math#pow Math.pow(a,b)}
1560      * to each pair of corresponding lane values.
1561      * The operation is adapted to cast the operands and the result,
1562      * specifically widening {@code float} operands to {@code double}
1563      * operands and narrowing the {@code double} result to a {@code float}
1564      * result.
1565      *
1566      * This method is also equivalent to the expression
1567      * {@link #lanewise(VectorOperators.Binary,Vector)
1568      *    lanewise}{@code (}{@link VectorOperators#POW
1569      *    POW}{@code , b)}.
1570      *
1571      * @param b a scalar exponent by which to raise this vector
1572      * @return the {@code b}-th power of this vector
1573      * @see #pow(Vector)
1574      * @see VectorOperators#POW
1575      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1576      */
1577     @ForceInline
1578     public final FloatVector pow(float b) {
1579         return lanewise(POW, b);
1580     }
1581 
1582     /// UNARY METHODS
1583 
1584     /**
1585      * {@inheritDoc} <!--workaround-->
1586      */
1587     @Override
1588     @ForceInline
1589     public final
1590     FloatVector neg() {
1591         return lanewise(NEG);
1592     }
1593 
1594     /**
1595      * {@inheritDoc} <!--workaround-->
1596      */
1597     @Override
1598     @ForceInline
1599     public final
1600     FloatVector abs() {
1601         return lanewise(ABS);
1602     }
1603 
1604 
1605     // sqrt
1606     /**
1607      * Computes the square root of this vector.
1608      *
1609      * This is a lane-wise unary operation which applies an operation
1610      * conforming to the specification of
1611      * {@link Math#sqrt Math.sqrt(a)}
1612      * to each lane value.
1613      * The operation is adapted to cast the operand and the result,
1614      * specifically widening the {@code float} operand to a {@code double}
1615      * operand and narrowing the {@code double} result to a {@code float}
1616      * result.
1617      *
1618      * This method is also equivalent to the expression
1619      * {@link #lanewise(VectorOperators.Unary)
1620      *    lanewise}{@code (}{@link VectorOperators#SQRT
1621      *    SQRT}{@code )}.
1622      *
1623      * @return the square root of this vector
1624      * @see VectorOperators#SQRT
1625      * @see #lanewise(VectorOperators.Unary,VectorMask)
1626      */
1627     @ForceInline
1628     public final FloatVector sqrt() {
1629         return lanewise(SQRT);
1630     }
1631 
1632     /// COMPARISONS
1633 
1634     /**
1635      * {@inheritDoc} <!--workaround-->
1636      */
1637     @Override
1638     @ForceInline
1639     public final
1640     VectorMask<Float> eq(Vector<Float> v) {
1641         return compare(EQ, v);
1642     }
1643 
1644     /**
1645      * Tests if this vector is equal to an input scalar.
1646      *
1647      * This is a lane-wise binary test operation which applies
1648      * the primitive equals operation ({@code ==}) to each lane.
1649      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1650      *
1651      * @param e the input scalar
1652      * @return the result mask of testing if this vector
1653      *         is equal to {@code e}
1654      * @see #compare(VectorOperators.Comparison,float)
1655      */
1656     @ForceInline
1657     public final
1658     VectorMask<Float> eq(float e) {
1659         return compare(EQ, e);
1660     }
1661 
1662     /**
1663      * {@inheritDoc} <!--workaround-->
1664      */
1665     @Override
1666     @ForceInline
1667     public final
1668     VectorMask<Float> lt(Vector<Float> v) {
1669         return compare(LT, v);
1670     }
1671 
1672     /**
1673      * Tests if this vector is less than an input scalar.
1674      *
1675      * This is a lane-wise binary test operation which applies
1676      * the primitive less than operation ({@code <}) to each lane.
1677      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1678      *
1679      * @param e the input scalar
1680      * @return the mask result of testing if this vector
1681      *         is less than the input scalar
1682      * @see #compare(VectorOperators.Comparison,float)
1683      */
1684     @ForceInline
1685     public final
1686     VectorMask<Float> lt(float e) {
1687         return compare(LT, e);
1688     }
1689 
1690     /**
1691      * {@inheritDoc} <!--workaround-->
1692      */
1693     @Override
1694     public abstract
1695     VectorMask<Float> test(VectorOperators.Test op);
1696 
1697     /*package-private*/
1698     @ForceInline
1699     final
1700     <M extends VectorMask<Float>>
1701     M testTemplate(Class<M> maskType, Test op) {
1702         FloatSpecies vsp = vspecies();
1703         if (opKind(op, VO_SPECIAL)) {
1704             IntVector bits = this.viewAsIntegralLanes();
1705             VectorMask<Integer> m;
1706             if (op == IS_DEFAULT) {
1707                 m = bits.compare(EQ, (int) 0);
1708             } else if (op == IS_NEGATIVE) {
1709                 m = bits.compare(LT, (int) 0);
1710             }
1711             else if (op == IS_FINITE ||
1712                      op == IS_NAN ||
1713                      op == IS_INFINITE) {
1714                 // first kill the sign:
1715                 bits = bits.and(Integer.MAX_VALUE);
1716                 // next find the bit pattern for infinity:
1717                 int infbits = (int) toBits(Float.POSITIVE_INFINITY);
1718                 // now compare:
1719                 if (op == IS_FINITE) {
1720                     m = bits.compare(LT, infbits);
1721                 } else if (op == IS_NAN) {
1722                     m = bits.compare(GT, infbits);
1723                 } else {
1724                     m = bits.compare(EQ, infbits);
1725                 }
1726             }
1727             else {
1728                 throw new AssertionError(op);
1729             }
1730             return maskType.cast(m.cast(vsp));
1731         }
1732         int opc = opCode(op);
1733         throw new AssertionError(op);
1734     }
1735 
1736     /**
1737      * {@inheritDoc} <!--workaround-->
1738      */
1739     @Override
1740     public abstract
1741     VectorMask<Float> test(VectorOperators.Test op,
1742                                   VectorMask<Float> m);
1743 
1744     /*package-private*/
1745     @ForceInline
1746     final
1747     <M extends VectorMask<Float>>
1748     M testTemplate(Class<M> maskType, Test op, M mask) {
1749         FloatSpecies vsp = vspecies();
1750         mask.check(maskType, this);
1751         if (opKind(op, VO_SPECIAL)) {
1752             IntVector bits = this.viewAsIntegralLanes();
1753             VectorMask<Integer> m = mask.cast(IntVector.species(shape()));
1754             if (op == IS_DEFAULT) {
1755                 m = bits.compare(EQ, (int) 0, m);
1756             } else if (op == IS_NEGATIVE) {
1757                 m = bits.compare(LT, (int) 0, m);
1758             }
1759             else if (op == IS_FINITE ||
1760                      op == IS_NAN ||
1761                      op == IS_INFINITE) {
1762                 // first kill the sign:
1763                 bits = bits.and(Integer.MAX_VALUE);
1764                 // next find the bit pattern for infinity:
1765                 int infbits = (int) toBits(Float.POSITIVE_INFINITY);
1766                 // now compare:
1767                 if (op == IS_FINITE) {
1768                     m = bits.compare(LT, infbits, m);
1769                 } else if (op == IS_NAN) {
1770                     m = bits.compare(GT, infbits, m);
1771                 } else {
1772                     m = bits.compare(EQ, infbits, m);
1773                 }
1774             }
1775             else {
1776                 throw new AssertionError(op);
1777             }
1778             return maskType.cast(m.cast(vsp));
1779         }
1780         int opc = opCode(op);
1781         throw new AssertionError(op);
1782     }
1783 
1784     /**
1785      * {@inheritDoc} <!--workaround-->
1786      */
1787     @Override
1788     public abstract
1789     VectorMask<Float> compare(VectorOperators.Comparison op, Vector<Float> v);
1790 
1791     /*package-private*/
1792     @ForceInline
1793     final
1794     <M extends VectorMask<Float>>
1795     M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v) {
1796         FloatVector that = (FloatVector) v;
1797         that.check(this);
1798         int opc = opCode(op);
1799         return VectorSupport.compare(
1800             opc, getClass(), maskType, float.class, length(),
1801             this, that, null,
1802             (cond, v0, v1, m1) -> {
1803                 AbstractMask<Float> m
1804                     = v0.bTest(cond, v1, (cond_, i, a, b)
1805                                -> compareWithOp(cond, a, b));
1806                 @SuppressWarnings("unchecked")
1807                 M m2 = (M) m;
1808                 return m2;
1809             });
1810     }
1811 
1812     /*package-private*/
1813     @ForceInline
1814     final
1815     <M extends VectorMask<Float>>
1816     M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v, M m) {
1817         FloatVector that = (FloatVector) v;
1818         that.check(this);
1819         m.check(maskType, this);
1820         int opc = opCode(op);
1821         return VectorSupport.compare(
1822             opc, getClass(), maskType, float.class, length(),
1823             this, that, m,
1824             (cond, v0, v1, m1) -> {
1825                 AbstractMask<Float> cmpM
1826                     = v0.bTest(cond, v1, (cond_, i, a, b)
1827                                -> compareWithOp(cond, a, b));
1828                 @SuppressWarnings("unchecked")
1829                 M m2 = (M) cmpM.and(m1);
1830                 return m2;
1831             });
1832     }
1833 
1834     @ForceInline
1835     private static boolean compareWithOp(int cond, float a, float b) {
1836         return switch (cond) {
1837             case BT_eq -> a == b;
1838             case BT_ne -> a != b;
1839             case BT_lt -> a < b;
1840             case BT_le -> a <= b;
1841             case BT_gt -> a > b;
1842             case BT_ge -> a >= b;
1843             default -> throw new AssertionError();
1844         };
1845     }
1846 
1847     /**
1848      * Tests this vector by comparing it with an input scalar,
1849      * according to the given comparison operation.
1850      *
1851      * This is a lane-wise binary test operation which applies
1852      * the comparison operation to each lane.
1853      * <p>
1854      * The result is the same as
1855      * {@code compare(op, broadcast(species(), e))}.
1856      * That is, the scalar may be regarded as broadcast to
1857      * a vector of the same species, and then compared
1858      * against the original vector, using the selected
1859      * comparison operation.
1860      *
1861      * @param op the operation used to compare lane values
1862      * @param e the input scalar
1863      * @return the mask result of testing lane-wise if this vector
1864      *         compares to the input, according to the selected
1865      *         comparison operator
1866      * @see FloatVector#compare(VectorOperators.Comparison,Vector)
1867      * @see #eq(float)
1868      * @see #lt(float)
1869      */
1870     public abstract
1871     VectorMask<Float> compare(Comparison op, float e);
1872 
1873     /*package-private*/
1874     @ForceInline
1875     final
1876     <M extends VectorMask<Float>>
1877     M compareTemplate(Class<M> maskType, Comparison op, float e) {
1878         return compareTemplate(maskType, op, broadcast(e));
1879     }
1880 
1881     /**
1882      * Tests this vector by comparing it with an input scalar,
1883      * according to the given comparison operation,
1884      * in lanes selected by a mask.
1885      *
1886      * This is a masked lane-wise binary test operation which applies
1887      * to each pair of corresponding lane values.
1888      *
1889      * The returned result is equal to the expression
1890      * {@code compare(op,s).and(m)}.
1891      *
1892      * @param op the operation used to compare lane values
1893      * @param e the input scalar
1894      * @param m the mask controlling lane selection
1895      * @return the mask result of testing lane-wise if this vector
1896      *         compares to the input, according to the selected
1897      *         comparison operator,
1898      *         and only in the lanes selected by the mask
1899      * @see FloatVector#compare(VectorOperators.Comparison,Vector,VectorMask)
1900      */
1901     @ForceInline
1902     public final VectorMask<Float> compare(VectorOperators.Comparison op,
1903                                                float e,
1904                                                VectorMask<Float> m) {
1905         return compare(op, broadcast(e), m);
1906     }
1907 
1908     /**
1909      * {@inheritDoc} <!--workaround-->
1910      */
1911     @Override
1912     public abstract
1913     VectorMask<Float> compare(Comparison op, long e);
1914 
1915     /*package-private*/
1916     @ForceInline
1917     final
1918     <M extends VectorMask<Float>>
1919     M compareTemplate(Class<M> maskType, Comparison op, long e) {
1920         return compareTemplate(maskType, op, broadcast(e));
1921     }
1922 
1923     /**
1924      * {@inheritDoc} <!--workaround-->
1925      */
1926     @Override
1927     @ForceInline
1928     public final
1929     VectorMask<Float> compare(Comparison op, long e, VectorMask<Float> m) {
1930         return compare(op, broadcast(e), m);
1931     }
1932 
1933 
1934 
1935     /**
1936      * {@inheritDoc} <!--workaround-->
1937      */
1938     @Override public abstract
1939     FloatVector blend(Vector<Float> v, VectorMask<Float> m);
1940 
1941     /*package-private*/
1942     @ForceInline
1943     final
1944     <M extends VectorMask<Float>>
1945     FloatVector
1946     blendTemplate(Class<M> maskType, FloatVector v, M m) {
1947         v.check(this);
1948         return VectorSupport.blend(
1949             getClass(), maskType, float.class, length(),
1950             this, v, m,
1951             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
1952     }
1953 
1954     /**
1955      * {@inheritDoc} <!--workaround-->
1956      */
1957     @Override public abstract FloatVector addIndex(int scale);
1958 
1959     /*package-private*/
1960     @ForceInline
1961     final FloatVector addIndexTemplate(int scale) {
1962         FloatSpecies vsp = vspecies();
1963         // make sure VLENGTH*scale doesn't overflow:
1964         vsp.checkScale(scale);
1965         return VectorSupport.indexVector(
1966             getClass(), float.class, length(),
1967             this, scale, vsp,
1968             (v, scale_, s)
1969             -> {
1970                 // If the platform doesn't support an INDEX
1971                 // instruction directly, load IOTA from memory
1972                 // and multiply.
1973                 FloatVector iota = s.iota();
1974                 float sc = (float) scale_;
1975                 return v.add(sc == 1 ? iota : iota.mul(sc));
1976             });
1977     }
1978 
1979     /**
1980      * Replaces selected lanes of this vector with
1981      * a scalar value
1982      * under the control of a mask.
1983      *
1984      * This is a masked lane-wise binary operation which
1985      * selects each lane value from one or the other input.
1986      *
1987      * The returned result is equal to the expression
1988      * {@code blend(broadcast(e),m)}.
1989      *
1990      * @param e the input scalar, containing the replacement lane value
1991      * @param m the mask controlling lane selection of the scalar
1992      * @return the result of blending the lane elements of this vector with
1993      *         the scalar value
1994      */
1995     @ForceInline
1996     public final FloatVector blend(float e,
1997                                             VectorMask<Float> m) {
1998         return blend(broadcast(e), m);
1999     }
2000 
2001     /**
2002      * Replaces selected lanes of this vector with
2003      * a scalar value
2004      * under the control of a mask.
2005      *
2006      * This is a masked lane-wise binary operation which
2007      * selects each lane value from one or the other input.
2008      *
2009      * The returned result is equal to the expression
2010      * {@code blend(broadcast(e),m)}.
2011      *
2012      * @param e the input scalar, containing the replacement lane value
2013      * @param m the mask controlling lane selection of the scalar
2014      * @return the result of blending the lane elements of this vector with
2015      *         the scalar value
2016      */
2017     @ForceInline
2018     public final FloatVector blend(long e,
2019                                             VectorMask<Float> m) {
2020         return blend(broadcast(e), m);
2021     }
2022 
2023     /**
2024      * {@inheritDoc} <!--workaround-->
2025      */
2026     @Override
2027     public abstract
2028     FloatVector slice(int origin, Vector<Float> v1);
2029 
2030     /*package-private*/
2031     final
2032     @ForceInline
2033     FloatVector sliceTemplate(int origin, Vector<Float> v1) {
2034         FloatVector that = (FloatVector) v1;
2035         that.check(this);
2036         Objects.checkIndex(origin, length() + 1);
2037         VectorShuffle<Float> iota = iotaShuffle();
2038         VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin))));
2039         iota = iotaShuffle(origin, 1, true);
2040         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
2041     }
2042 
2043     /**
2044      * {@inheritDoc} <!--workaround-->
2045      */
2046     @Override
2047     @ForceInline
2048     public final
2049     FloatVector slice(int origin,
2050                                Vector<Float> w,
2051                                VectorMask<Float> m) {
2052         return broadcast(0).blend(slice(origin, w), m);
2053     }
2054 
2055     /**
2056      * {@inheritDoc} <!--workaround-->
2057      */
2058     @Override
2059     public abstract
2060     FloatVector slice(int origin);
2061 
2062     /*package-private*/
2063     final
2064     @ForceInline
2065     FloatVector sliceTemplate(int origin) {
2066         Objects.checkIndex(origin, length() + 1);
2067         VectorShuffle<Float> iota = iotaShuffle();
2068         VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin))));
2069         iota = iotaShuffle(origin, 1, true);
2070         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2071     }
2072 
2073     /**
2074      * {@inheritDoc} <!--workaround-->
2075      */
2076     @Override
2077     public abstract
2078     FloatVector unslice(int origin, Vector<Float> w, int part);
2079 
2080     /*package-private*/
2081     final
2082     @ForceInline
2083     FloatVector
2084     unsliceTemplate(int origin, Vector<Float> w, int part) {
2085         FloatVector that = (FloatVector) w;
2086         that.check(this);
2087         Objects.checkIndex(origin, length() + 1);
2088         VectorShuffle<Float> iota = iotaShuffle();
2089         VectorMask<Float> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
2090                                                                   (broadcast((float)(origin))));
2091         iota = iotaShuffle(-origin, 1, true);
2092         return that.blend(this.rearrange(iota), blendMask);
2093     }
2094 
2095     /*package-private*/
2096     final
2097     @ForceInline
2098     <M extends VectorMask<Float>>
2099     FloatVector
2100     unsliceTemplate(Class<M> maskType, int origin, Vector<Float> w, int part, M m) {
2101         FloatVector that = (FloatVector) w;
2102         that.check(this);
2103         FloatVector slice = that.sliceTemplate(origin, that);
2104         slice = slice.blendTemplate(maskType, this, m);
2105         return slice.unsliceTemplate(origin, w, part);
2106     }
2107 
2108     /**
2109      * {@inheritDoc} <!--workaround-->
2110      */
2111     @Override
2112     public abstract
2113     FloatVector unslice(int origin, Vector<Float> w, int part, VectorMask<Float> m);
2114 
2115     /**
2116      * {@inheritDoc} <!--workaround-->
2117      */
2118     @Override
2119     public abstract
2120     FloatVector unslice(int origin);
2121 
2122     /*package-private*/
2123     final
2124     @ForceInline
2125     FloatVector
2126     unsliceTemplate(int origin) {
2127         Objects.checkIndex(origin, length() + 1);
2128         VectorShuffle<Float> iota = iotaShuffle();
2129         VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.GE,
2130                                                                   (broadcast((float)(origin))));
2131         iota = iotaShuffle(-origin, 1, true);
2132         return vspecies().zero().blend(this.rearrange(iota), blendMask);
2133     }
2134 
2135     private ArrayIndexOutOfBoundsException
2136     wrongPartForSlice(int part) {
2137         String msg = String.format("bad part number %d for slice operation",
2138                                    part);
2139         return new ArrayIndexOutOfBoundsException(msg);
2140     }
2141 
2142     /**
2143      * {@inheritDoc} <!--workaround-->
2144      */
2145     @Override
2146     public abstract
2147     FloatVector rearrange(VectorShuffle<Float> m);
2148 
2149     /*package-private*/
2150     @ForceInline
2151     final
2152     <S extends VectorShuffle<Float>>
2153     FloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2154         shuffle.checkIndexes();
2155         return VectorSupport.rearrangeOp(
2156             getClass(), shuffletype, null, float.class, length(),
2157             this, shuffle, null,
2158             (v1, s_, m_) -> v1.uOp((i, a) -> {
2159                 int ei = s_.laneSource(i);
2160                 return v1.lane(ei);
2161             }));
2162     }
2163 
2164     /**
2165      * {@inheritDoc} <!--workaround-->
2166      */
2167     @Override
2168     public abstract
2169     FloatVector rearrange(VectorShuffle<Float> s,
2170                                    VectorMask<Float> m);
2171 
2172     /*package-private*/
2173     @ForceInline
2174     final
2175     <S extends VectorShuffle<Float>, M extends VectorMask<Float>>
2176     FloatVector rearrangeTemplate(Class<S> shuffletype,
2177                                            Class<M> masktype,
2178                                            S shuffle,
2179                                            M m) {
2180 
2181         m.check(masktype, this);
2182         VectorMask<Float> valid = shuffle.laneIsValid();
2183         if (m.andNot(valid).anyTrue()) {
2184             shuffle.checkIndexes();
2185             throw new AssertionError();
2186         }
2187         return VectorSupport.rearrangeOp(
2188                    getClass(), shuffletype, masktype, float.class, length(),
2189                    this, shuffle, m,
2190                    (v1, s_, m_) -> v1.uOp((i, a) -> {
2191                         int ei = s_.laneSource(i);
2192                         return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
2193                    }));
2194     }
2195 
2196     /**
2197      * {@inheritDoc} <!--workaround-->
2198      */
2199     @Override
2200     public abstract
2201     FloatVector rearrange(VectorShuffle<Float> s,
2202                                    Vector<Float> v);
2203 
2204     /*package-private*/
2205     @ForceInline
2206     final
2207     <S extends VectorShuffle<Float>>
2208     FloatVector rearrangeTemplate(Class<S> shuffletype,
2209                                            S shuffle,
2210                                            FloatVector v) {
2211         VectorMask<Float> valid = shuffle.laneIsValid();
2212         @SuppressWarnings("unchecked")
2213         S ws = (S) shuffle.wrapIndexes();
2214         FloatVector r0 =
2215             VectorSupport.rearrangeOp(
2216                 getClass(), shuffletype, null, float.class, length(),
2217                 this, ws, null,
2218                 (v0, s_, m_) -> v0.uOp((i, a) -> {
2219                     int ei = s_.laneSource(i);
2220                     return v0.lane(ei);
2221                 }));
2222         FloatVector r1 =
2223             VectorSupport.rearrangeOp(
2224                 getClass(), shuffletype, null, float.class, length(),
2225                 v, ws, null,
2226                 (v1, s_, m_) -> v1.uOp((i, a) -> {
2227                     int ei = s_.laneSource(i);
2228                     return v1.lane(ei);
2229                 }));
2230         return r1.blend(r0, valid);
2231     }
2232 
2233     @ForceInline
2234     private final
2235     VectorShuffle<Float> toShuffle0(FloatSpecies dsp) {
2236         float[] a = toArray();
2237         int[] sa = new int[a.length];
2238         for (int i = 0; i < a.length; i++) {
2239             sa[i] = (int) a[i];
2240         }
2241         return VectorShuffle.fromArray(dsp, sa, 0);
2242     }
2243 
2244     /*package-private*/
2245     @ForceInline
2246     final
2247     VectorShuffle<Float> toShuffleTemplate(Class<?> shuffleType) {
2248         FloatSpecies vsp = vspecies();
2249         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2250                                      getClass(), float.class, length(),
2251                                      shuffleType, byte.class, length(),
2252                                      this, vsp,
2253                                      FloatVector::toShuffle0);
2254     }
2255 
2256     /**
2257      * {@inheritDoc} <!--workaround-->
2258      */
2259     @Override
2260     public abstract
2261     FloatVector selectFrom(Vector<Float> v);
2262 
2263     /*package-private*/
2264     @ForceInline
2265     final FloatVector selectFromTemplate(FloatVector v) {
2266         return v.rearrange(this.toShuffle());
2267     }
2268 
2269     /**
2270      * {@inheritDoc} <!--workaround-->
2271      */
2272     @Override
2273     public abstract
2274     FloatVector selectFrom(Vector<Float> s, VectorMask<Float> m);
2275 
2276     /*package-private*/
2277     @ForceInline
2278     final FloatVector selectFromTemplate(FloatVector v,
2279                                                   AbstractMask<Float> m) {
2280         return v.rearrange(this.toShuffle(), m);
2281     }
2282 
2283     /// Ternary operations
2284 
2285 
2286     /**
2287      * Multiplies this vector by a second input vector, and sums
2288      * the result with a third.
2289      *
2290      * Extended precision is used for the intermediate result,
2291      * avoiding possible loss of precision from rounding once
2292      * for each of the two operations.
2293      * The result is numerically close to {@code this.mul(b).add(c)},
2294      * and is typically closer to the true mathematical result.
2295      *
2296      * This is a lane-wise ternary operation which applies an operation
2297      * conforming to the specification of
2298      * {@link Math#fma(float,float,float) Math.fma(a,b,c)}
2299      * to each lane.
2300      * The operation is adapted to cast the operands and the result,
2301      * specifically widening {@code float} operands to {@code double}
2302      * operands and narrowing the {@code double} result to a {@code float}
2303      * result.
2304      *
2305      * This method is also equivalent to the expression
2306      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2307      *    lanewise}{@code (}{@link VectorOperators#FMA
2308      *    FMA}{@code , b, c)}.
2309      *
2310      * @param b the second input vector, supplying multiplier values
2311      * @param c the third input vector, supplying addend values
2312      * @return the product of this vector and the second input vector
2313      *         summed with the third input vector, using extended precision
2314      *         for the intermediate result
2315      * @see #fma(float,float)
2316      * @see VectorOperators#FMA
2317      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2318      */
2319     @ForceInline
2320     public final
2321     FloatVector fma(Vector<Float> b, Vector<Float> c) {
2322         return lanewise(FMA, b, c);
2323     }
2324 
2325     /**
2326      * Multiplies this vector by a scalar multiplier, and sums
2327      * the result with a scalar addend.
2328      *
2329      * Extended precision is used for the intermediate result,
2330      * avoiding possible loss of precision from rounding once
2331      * for each of the two operations.
2332      * The result is numerically close to {@code this.mul(b).add(c)},
2333      * and is typically closer to the true mathematical result.
2334      *
2335      * This is a lane-wise ternary operation which applies an operation
2336      * conforming to the specification of
2337      * {@link Math#fma(float,float,float) Math.fma(a,b,c)}
2338      * to each lane.
2339      * The operation is adapted to cast the operands and the result,
2340      * specifically widening {@code float} operands to {@code double}
2341      * operands and narrowing the {@code double} result to a {@code float}
2342      * result.
2343      *
2344      * This method is also equivalent to the expression
2345      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2346      *    lanewise}{@code (}{@link VectorOperators#FMA
2347      *    FMA}{@code , b, c)}.
2348      *
2349      * @param b the scalar multiplier
2350      * @param c the scalar addend
2351      * @return the product of this vector and the scalar multiplier
2352      *         summed with scalar addend, using extended precision
2353      *         for the intermediate result
2354      * @see #fma(Vector,Vector)
2355      * @see VectorOperators#FMA
2356      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
2357      */
2358     @ForceInline
2359     public final
2360     FloatVector fma(float b, float c) {
2361         return lanewise(FMA, b, c);
2362     }
2363 
2364     // Don't bother with (Vector,float) and (float,Vector) overloadings.
2365 
2366     // Type specific horizontal reductions
2367 
2368     /**
2369      * Returns a value accumulated from all the lanes of this vector.
2370      *
2371      * This is an associative cross-lane reduction operation which
2372      * applies the specified operation to all the lane elements.
2373      * <p>
2374      * A few reduction operations do not support arbitrary reordering
2375      * of their operands, yet are included here because of their
2376      * usefulness.
2377      * <ul>
2378      * <li>
2379      * In the case of {@code FIRST_NONZERO}, the reduction returns
2380      * the value from the lowest-numbered non-zero lane.
2381      * (As with {@code MAX} and {@code MIN}, floating point negative
2382      * zero {@code -0.0} is treated as a value distinct from
2383      * the default value, positive zero. So a first-nonzero lane reduction
2384      * might return {@code -0.0} even in the presence of non-zero
2385      * lane values.)
2386      * <li>
2387      * In the case of {@code ADD} and {@code MUL}, the
2388      * precise result will reflect the choice of an arbitrary order
2389      * of operations, which may even vary over time.
2390      * For further details see the section
2391      * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
2392      * <li>
2393      * All other reduction operations are fully commutative and
2394      * associative.  The implementation can choose any order of
2395      * processing, yet it will always produce the same result.
2396      * </ul>
2397      *
2398      * @param op the operation used to combine lane values
2399      * @return the accumulated result
2400      * @throws UnsupportedOperationException if this vector does
2401      *         not support the requested operation
2402      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2403      * @see #add(Vector)
2404      * @see #mul(Vector)
2405      * @see #min(Vector)
2406      * @see #max(Vector)
2407      * @see VectorOperators#FIRST_NONZERO
2408      */
2409     public abstract float reduceLanes(VectorOperators.Associative op);
2410 
2411     /**
2412      * Returns a value accumulated from selected lanes of this vector,
2413      * controlled by a mask.
2414      *
2415      * This is an associative cross-lane reduction operation which
2416      * applies the specified operation to the selected lane elements.
2417      * <p>
2418      * If no elements are selected, an operation-specific identity
2419      * value is returned.
2420      * <ul>
2421      * <li>
2422      * If the operation is
2423      *  {@code ADD}
2424      * or {@code FIRST_NONZERO},
2425      * then the identity value is positive zero, the default {@code float} value.
2426      * <li>
2427      * If the operation is {@code MUL},
2428      * then the identity value is one.
2429      * <li>
2430      * If the operation is {@code MAX},
2431      * then the identity value is {@code Float.NEGATIVE_INFINITY}.
2432      * <li>
2433      * If the operation is {@code MIN},
2434      * then the identity value is {@code Float.POSITIVE_INFINITY}.
2435      * </ul>
2436      * <p>
2437      * A few reduction operations do not support arbitrary reordering
2438      * of their operands, yet are included here because of their
2439      * usefulness.
2440      * <ul>
2441      * <li>
2442      * In the case of {@code FIRST_NONZERO}, the reduction returns
2443      * the value from the lowest-numbered non-zero lane.
2444      * (As with {@code MAX} and {@code MIN}, floating point negative
2445      * zero {@code -0.0} is treated as a value distinct from
2446      * the default value, positive zero. So a first-nonzero lane reduction
2447      * might return {@code -0.0} even in the presence of non-zero
2448      * lane values.)
2449      * <li>
2450      * In the case of {@code ADD} and {@code MUL}, the
2451      * precise result will reflect the choice of an arbitrary order
2452      * of operations, which may even vary over time.
2453      * For further details see the section
2454      * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
2455      * <li>
2456      * All other reduction operations are fully commutative and
2457      * associative.  The implementation can choose any order of
2458      * processing, yet it will always produce the same result.
2459      * </ul>
2460      *
2461      * @param op the operation used to combine lane values
2462      * @param m the mask controlling lane selection
2463      * @return the reduced result accumulated from the selected lane values
2464      * @throws UnsupportedOperationException if this vector does
2465      *         not support the requested operation
2466      * @see #reduceLanes(VectorOperators.Associative)
2467      */
2468     public abstract float reduceLanes(VectorOperators.Associative op,
2469                                        VectorMask<Float> m);
2470 
2471     /*package-private*/
2472     @ForceInline
2473     final
2474     float reduceLanesTemplate(VectorOperators.Associative op,
2475                                Class<? extends VectorMask<Float>> maskClass,
2476                                VectorMask<Float> m) {
2477         m.check(maskClass, this);
2478         if (op == FIRST_NONZERO) {
2479             // FIXME:  The JIT should handle this.
2480             FloatVector v = broadcast((float) 0).blend(this, m);
2481             return v.reduceLanesTemplate(op);
2482         }
2483         int opc = opCode(op);
2484         return fromBits(VectorSupport.reductionCoerced(
2485             opc, getClass(), maskClass, float.class, length(),
2486             this, m,
2487             REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations)));
2488     }
2489 
2490     /*package-private*/
2491     @ForceInline
2492     final
2493     float reduceLanesTemplate(VectorOperators.Associative op) {
2494         if (op == FIRST_NONZERO) {
2495             // FIXME:  The JIT should handle this.
2496             VectorMask<Integer> thisNZ
2497                 = this.viewAsIntegralLanes().compare(NE, (int) 0);
2498             int ft = thisNZ.firstTrue();
2499             return ft < length() ? this.lane(ft) : (float) 0;
2500         }
2501         int opc = opCode(op);
2502         return fromBits(VectorSupport.reductionCoerced(
2503             opc, getClass(), null, float.class, length(),
2504             this, null,
2505             REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations)));
2506     }
2507 
2508     private static final
2509     ImplCache<Associative, ReductionOperation<FloatVector, VectorMask<Float>>>
2510         REDUCE_IMPL = new ImplCache<>(Associative.class, FloatVector.class);
2511 
2512     private static ReductionOperation<FloatVector, VectorMask<Float>> reductionOperations(int opc_) {
2513         switch (opc_) {
2514             case VECTOR_OP_ADD: return (v, m) ->
2515                     toBits(v.rOp((float)0, m, (i, a, b) -> (float)(a + b)));
2516             case VECTOR_OP_MUL: return (v, m) ->
2517                     toBits(v.rOp((float)1, m, (i, a, b) -> (float)(a * b)));
2518             case VECTOR_OP_MIN: return (v, m) ->
2519                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (float) Math.min(a, b)));
2520             case VECTOR_OP_MAX: return (v, m) ->
2521                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (float) Math.max(a, b)));
2522             default: return null;
2523         }
2524     }
2525 
2526     private static final float MIN_OR_INF = Float.NEGATIVE_INFINITY;
2527     private static final float MAX_OR_INF = Float.POSITIVE_INFINITY;
2528 
2529     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2530     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2531                                                      VectorMask<Float> m);
2532 
2533     // Type specific accessors
2534 
2535     /**
2536      * Gets the lane element at lane index {@code i}
2537      *
2538      * @param i the lane index
2539      * @return the lane element at lane index {@code i}
2540      * @throws IllegalArgumentException if the index is is out of range
2541      * ({@code < 0 || >= length()})
2542      */
2543     public abstract float lane(int i);
2544 
2545     /**
2546      * Replaces the lane element of this vector at lane index {@code i} with
2547      * value {@code e}.
2548      *
2549      * This is a cross-lane operation and behaves as if it returns the result
2550      * of blending this vector with an input vector that is the result of
2551      * broadcasting {@code e} and a mask that has only one lane set at lane
2552      * index {@code i}.
2553      *
2554      * @param i the lane index of the lane element to be replaced
2555      * @param e the value to be placed
2556      * @return the result of replacing the lane element of this vector at lane
2557      * index {@code i} with value {@code e}.
2558      * @throws IllegalArgumentException if the index is is out of range
2559      * ({@code < 0 || >= length()})
2560      */
2561     public abstract FloatVector withLane(int i, float e);
2562 
2563     // Memory load operations
2564 
2565     /**
2566      * Returns an array of type {@code float[]}
2567      * containing all the lane values.
2568      * The array length is the same as the vector length.
2569      * The array elements are stored in lane order.
2570      * <p>
2571      * This method behaves as if it stores
2572      * this vector into an allocated array
2573      * (using {@link #intoArray(float[], int) intoArray})
2574      * and returns the array as follows:
2575      * <pre>{@code
2576      *   float[] a = new float[this.length()];
2577      *   this.intoArray(a, 0);
2578      *   return a;
2579      * }</pre>
2580      *
2581      * @return an array containing the lane values of this vector
2582      */
2583     @ForceInline
2584     @Override
2585     public final float[] toArray() {
2586         float[] a = new float[vspecies().laneCount()];
2587         intoArray(a, 0);
2588         return a;
2589     }
2590 
2591     /** {@inheritDoc} <!--workaround-->
2592      */
2593     @ForceInline
2594     @Override
2595     public final int[] toIntArray() {
2596         float[] a = toArray();
2597         int[] res = new int[a.length];
2598         for (int i = 0; i < a.length; i++) {
2599             float e = a[i];
2600             res[i] = (int) FloatSpecies.toIntegralChecked(e, true);
2601         }
2602         return res;
2603     }
2604 
2605     /** {@inheritDoc} <!--workaround-->
2606      */
2607     @ForceInline
2608     @Override
2609     public final long[] toLongArray() {
2610         float[] a = toArray();
2611         long[] res = new long[a.length];
2612         for (int i = 0; i < a.length; i++) {
2613             float e = a[i];
2614             res[i] = FloatSpecies.toIntegralChecked(e, false);
2615         }
2616         return res;
2617     }
2618 
2619     /** {@inheritDoc} <!--workaround-->
2620      * @implNote
2621      * When this method is used on used on vectors
2622      * of type {@code FloatVector},
2623      * there will be no loss of precision.
2624      */
2625     @ForceInline
2626     @Override
2627     public final double[] toDoubleArray() {
2628         float[] a = toArray();
2629         double[] res = new double[a.length];
2630         for (int i = 0; i < a.length; i++) {
2631             res[i] = (double) a[i];
2632         }
2633         return res;
2634     }
2635 
2636     /**
2637      * Loads a vector from a byte array starting at an offset.
2638      * Bytes are composed into primitive lane elements according
2639      * to the specified byte order.
2640      * The vector is arranged into lanes according to
2641      * <a href="Vector.html#lane-order">memory ordering</a>.
2642      * <p>
2643      * This method behaves as if it returns the result of calling
2644      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2645      * fromByteBuffer()} as follows:
2646      * <pre>{@code
2647      * var bb = ByteBuffer.wrap(a);
2648      * var m = species.maskAll(true);
2649      * return fromByteBuffer(species, bb, offset, bo, m);
2650      * }</pre>
2651      *
2652      * @param species species of desired vector
2653      * @param a the byte array
2654      * @param offset the offset into the array
2655      * @param bo the intended byte order
2656      * @return a vector loaded from a byte array
2657      * @throws IndexOutOfBoundsException
2658      *         if {@code offset+N*ESIZE < 0}
2659      *         or {@code offset+(N+1)*ESIZE > a.length}
2660      *         for any lane {@code N} in the vector
2661      */
2662     @ForceInline
2663     public static
2664     FloatVector fromByteArray(VectorSpecies<Float> species,
2665                                        byte[] a, int offset,
2666                                        ByteOrder bo) {
2667         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
2668         FloatSpecies vsp = (FloatSpecies) species;
2669         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
2670     }
2671 
2672     /**
2673      * Loads a vector from a byte array starting at an offset
2674      * and using a mask.
2675      * Lanes where the mask is unset are filled with the default
2676      * value of {@code float} (positive zero).
2677      * Bytes are composed into primitive lane elements according
2678      * to the specified byte order.
2679      * The vector is arranged into lanes according to
2680      * <a href="Vector.html#lane-order">memory ordering</a>.
2681      * <p>
2682      * This method behaves as if it returns the result of calling
2683      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2684      * fromByteBuffer()} as follows:
2685      * <pre>{@code
2686      * var bb = ByteBuffer.wrap(a);
2687      * return fromByteBuffer(species, bb, offset, bo, m);
2688      * }</pre>
2689      *
2690      * @param species species of desired vector
2691      * @param a the byte array
2692      * @param offset the offset into the array
2693      * @param bo the intended byte order
2694      * @param m the mask controlling lane selection
2695      * @return a vector loaded from a byte array
2696      * @throws IndexOutOfBoundsException
2697      *         if {@code offset+N*ESIZE < 0}
2698      *         or {@code offset+(N+1)*ESIZE > a.length}
2699      *         for any lane {@code N} in the vector
2700      *         where the mask is set
2701      */
2702     @ForceInline
2703     public static
2704     FloatVector fromByteArray(VectorSpecies<Float> species,
2705                                        byte[] a, int offset,
2706                                        ByteOrder bo,
2707                                        VectorMask<Float> m) {
2708         FloatSpecies vsp = (FloatSpecies) species;
2709         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2710             return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
2711         }
2712 
2713         // FIXME: optimize
2714         checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
2715         ByteBuffer wb = wrapper(a, bo);
2716         return vsp.ldOp(wb, offset, (AbstractMask<Float>)m,
2717                    (wb_, o, i)  -> wb_.getFloat(o + i * 4));
2718     }
2719 
2720     /**
2721      * Loads a vector from an array of type {@code float[]}
2722      * starting at an offset.
2723      * For each vector lane, where {@code N} is the vector lane index, the
2724      * array element at index {@code offset + N} is placed into the
2725      * resulting vector at lane index {@code N}.
2726      *
2727      * @param species species of desired vector
2728      * @param a the array
2729      * @param offset the offset into the array
2730      * @return the vector loaded from an array
2731      * @throws IndexOutOfBoundsException
2732      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2733      *         for any lane {@code N} in the vector
2734      */
2735     @ForceInline
2736     public static
2737     FloatVector fromArray(VectorSpecies<Float> species,
2738                                    float[] a, int offset) {
2739         offset = checkFromIndexSize(offset, species.length(), a.length);
2740         FloatSpecies vsp = (FloatSpecies) species;
2741         return vsp.dummyVector().fromArray0(a, offset);
2742     }
2743 
2744     /**
2745      * Loads a vector from an array of type {@code float[]}
2746      * starting at an offset and using a mask.
2747      * Lanes where the mask is unset are filled with the default
2748      * value of {@code float} (positive zero).
2749      * For each vector lane, where {@code N} is the vector lane index,
2750      * if the mask lane at index {@code N} is set then the array element at
2751      * index {@code offset + N} is placed into the resulting vector at lane index
2752      * {@code N}, otherwise the default element value is placed into the
2753      * resulting vector at lane index {@code N}.
2754      *
2755      * @param species species of desired vector
2756      * @param a the array
2757      * @param offset the offset into the array
2758      * @param m the mask controlling lane selection
2759      * @return the vector loaded from an array
2760      * @throws IndexOutOfBoundsException
2761      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2762      *         for any lane {@code N} in the vector
2763      *         where the mask is set
2764      */
2765     @ForceInline
2766     public static
2767     FloatVector fromArray(VectorSpecies<Float> species,
2768                                    float[] a, int offset,
2769                                    VectorMask<Float> m) {
2770         FloatSpecies vsp = (FloatSpecies) species;
2771         if (offset >= 0 && offset <= (a.length - species.length())) {
2772             return vsp.dummyVector().fromArray0(a, offset, m);
2773         }
2774 
2775         // FIXME: optimize
2776         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2777         return vsp.vOp(m, i -> a[offset + i]);
2778     }
2779 
2780     /**
2781      * Gathers a new vector composed of elements from an array of type
2782      * {@code float[]},
2783      * using indexes obtained by adding a fixed {@code offset} to a
2784      * series of secondary offsets from an <em>index map</em>.
2785      * The index map is a contiguous sequence of {@code VLENGTH}
2786      * elements in a second array of {@code int}s, starting at a given
2787      * {@code mapOffset}.
2788      * <p>
2789      * For each vector lane, where {@code N} is the vector lane index,
2790      * the lane is loaded from the array
2791      * element {@code a[f(N)]}, where {@code f(N)} is the
2792      * index mapping expression
2793      * {@code offset + indexMap[mapOffset + N]]}.
2794      *
2795      * @param species species of desired vector
2796      * @param a the array
2797      * @param offset the offset into the array, may be negative if relative
2798      * indexes in the index map compensate to produce a value within the
2799      * array bounds
2800      * @param indexMap the index map
2801      * @param mapOffset the offset into the index map
2802      * @return the vector loaded from the indexed elements of the array
2803      * @throws IndexOutOfBoundsException
2804      *         if {@code mapOffset+N < 0}
2805      *         or if {@code mapOffset+N >= indexMap.length},
2806      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2807      *         is an invalid index into {@code a},
2808      *         for any lane {@code N} in the vector
2809      * @see FloatVector#toIntArray()
2810      */
2811     @ForceInline
2812     public static
2813     FloatVector fromArray(VectorSpecies<Float> species,
2814                                    float[] a, int offset,
2815                                    int[] indexMap, int mapOffset) {
2816         FloatSpecies vsp = (FloatSpecies) species;
2817         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
2818         Objects.requireNonNull(a);
2819         Objects.requireNonNull(indexMap);
2820         Class<? extends FloatVector> vectorType = vsp.vectorType();
2821 
2822         // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
2823         IntVector vix = IntVector
2824             .fromArray(isp, indexMap, mapOffset)
2825             .add(offset);
2826 
2827         vix = VectorIntrinsics.checkIndex(vix, a.length);
2828 
2829         return VectorSupport.loadWithMap(
2830             vectorType, null, float.class, vsp.laneCount(),
2831             isp.vectorType(),
2832             a, ARRAY_BASE, vix, null,
2833             a, offset, indexMap, mapOffset, vsp,
2834             (c, idx, iMap, idy, s, vm) ->
2835             s.vOp(n -> c[idx + iMap[idy+n]]));
2836     }
2837 
2838     /**
2839      * Gathers a new vector composed of elements from an array of type
2840      * {@code float[]},
2841      * under the control of a mask, and
2842      * using indexes obtained by adding a fixed {@code offset} to a
2843      * series of secondary offsets from an <em>index map</em>.
2844      * The index map is a contiguous sequence of {@code VLENGTH}
2845      * elements in a second array of {@code int}s, starting at a given
2846      * {@code mapOffset}.
2847      * <p>
2848      * For each vector lane, where {@code N} is the vector lane index,
2849      * if the lane is set in the mask,
2850      * the lane is loaded from the array
2851      * element {@code a[f(N)]}, where {@code f(N)} is the
2852      * index mapping expression
2853      * {@code offset + indexMap[mapOffset + N]]}.
2854      * Unset lanes in the resulting vector are set to zero.
2855      *
2856      * @param species species of desired vector
2857      * @param a the array
2858      * @param offset the offset into the array, may be negative if relative
2859      * indexes in the index map compensate to produce a value within the
2860      * array bounds
2861      * @param indexMap the index map
2862      * @param mapOffset the offset into the index map
2863      * @param m the mask controlling lane selection
2864      * @return the vector loaded from the indexed elements of the array
2865      * @throws IndexOutOfBoundsException
2866      *         if {@code mapOffset+N < 0}
2867      *         or if {@code mapOffset+N >= indexMap.length},
2868      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2869      *         is an invalid index into {@code a},
2870      *         for any lane {@code N} in the vector
2871      *         where the mask is set
2872      * @see FloatVector#toIntArray()
2873      */
2874     @ForceInline
2875     public static
2876     FloatVector fromArray(VectorSpecies<Float> species,
2877                                    float[] a, int offset,
2878                                    int[] indexMap, int mapOffset,
2879                                    VectorMask<Float> m) {
2880         if (m.allTrue()) {
2881             return fromArray(species, a, offset, indexMap, mapOffset);
2882         }
2883         else {
2884             FloatSpecies vsp = (FloatSpecies) species;
2885             return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m);
2886         }
2887     }
2888 
2889 
2890 
2891     /**
2892      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2893      * starting at an offset into the byte buffer.
2894      * Bytes are composed into primitive lane elements according
2895      * to the specified byte order.
2896      * The vector is arranged into lanes according to
2897      * <a href="Vector.html#lane-order">memory ordering</a>.
2898      * <p>
2899      * This method behaves as if it returns the result of calling
2900      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2901      * fromByteBuffer()} as follows:
2902      * <pre>{@code
2903      * var m = species.maskAll(true);
2904      * return fromByteBuffer(species, bb, offset, bo, m);
2905      * }</pre>
2906      *
2907      * @param species species of desired vector
2908      * @param bb the byte buffer
2909      * @param offset the offset into the byte buffer
2910      * @param bo the intended byte order
2911      * @return a vector loaded from a byte buffer
2912      * @throws IndexOutOfBoundsException
2913      *         if {@code offset+N*4 < 0}
2914      *         or {@code offset+N*4 >= bb.limit()}
2915      *         for any lane {@code N} in the vector
2916      */
2917     @ForceInline
2918     public static
2919     FloatVector fromByteBuffer(VectorSpecies<Float> species,
2920                                         ByteBuffer bb, int offset,
2921                                         ByteOrder bo) {
2922         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
2923         FloatSpecies vsp = (FloatSpecies) species;
2924         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
2925     }
2926 
2927     /**
2928      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2929      * starting at an offset into the byte buffer
2930      * and using a mask.
2931      * Lanes where the mask is unset are filled with the default
2932      * value of {@code float} (positive zero).
2933      * Bytes are composed into primitive lane elements according
2934      * to the specified byte order.
2935      * The vector is arranged into lanes according to
2936      * <a href="Vector.html#lane-order">memory ordering</a>.
2937      * <p>
2938      * The following pseudocode illustrates the behavior:
2939      * <pre>{@code
2940      * FloatBuffer eb = bb.duplicate()
2941      *     .position(offset)
2942      *     .order(bo).asFloatBuffer();
2943      * float[] ar = new float[species.length()];
2944      * for (int n = 0; n < ar.length; n++) {
2945      *     if (m.laneIsSet(n)) {
2946      *         ar[n] = eb.get(n);
2947      *     }
2948      * }
2949      * FloatVector r = FloatVector.fromArray(species, ar, 0);
2950      * }</pre>
2951      * @implNote
2952      * This operation is likely to be more efficient if
2953      * the specified byte order is the same as
2954      * {@linkplain ByteOrder#nativeOrder()
2955      * the platform native order},
2956      * since this method will not need to reorder
2957      * the bytes of lane values.
2958      *
2959      * @param species species of desired vector
2960      * @param bb the byte buffer
2961      * @param offset the offset into the byte buffer
2962      * @param bo the intended byte order
2963      * @param m the mask controlling lane selection
2964      * @return a vector loaded from a byte buffer
2965      * @throws IndexOutOfBoundsException
2966      *         if {@code offset+N*4 < 0}
2967      *         or {@code offset+N*4 >= bb.limit()}
2968      *         for any lane {@code N} in the vector
2969      *         where the mask is set
2970      */
2971     @ForceInline
2972     public static
2973     FloatVector fromByteBuffer(VectorSpecies<Float> species,
2974                                         ByteBuffer bb, int offset,
2975                                         ByteOrder bo,
2976                                         VectorMask<Float> m) {
2977         FloatSpecies vsp = (FloatSpecies) species;
2978         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
2979             return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
2980         }
2981 
2982         // FIXME: optimize
2983         checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
2984         ByteBuffer wb = wrapper(bb, bo);
2985         return vsp.ldOp(wb, offset, (AbstractMask<Float>)m,
2986                    (wb_, o, i)  -> wb_.getFloat(o + i * 4));
2987     }
2988 
2989     // Memory store operations
2990 
2991     /**
2992      * Stores this vector into an array of type {@code float[]}
2993      * starting at an offset.
2994      * <p>
2995      * For each vector lane, where {@code N} is the vector lane index,
2996      * the lane element at index {@code N} is stored into the array
2997      * element {@code a[offset+N]}.
2998      *
2999      * @param a the array, of type {@code float[]}
3000      * @param offset the offset into the array
3001      * @throws IndexOutOfBoundsException
3002      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3003      *         for any lane {@code N} in the vector
3004      */
3005     @ForceInline
3006     public final
3007     void intoArray(float[] a, int offset) {
3008         offset = checkFromIndexSize(offset, length(), a.length);
3009         FloatSpecies vsp = vspecies();
3010         VectorSupport.store(
3011             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3012             a, arrayAddress(a, offset),
3013             this,
3014             a, offset,
3015             (arr, off, v)
3016             -> v.stOp(arr, off,
3017                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3018     }
3019 
3020     /**
3021      * Stores this vector into an array of type {@code float[]}
3022      * starting at offset and using a mask.
3023      * <p>
3024      * For each vector lane, where {@code N} is the vector lane index,
3025      * the lane element at index {@code N} is stored into the array
3026      * element {@code a[offset+N]}.
3027      * If the mask lane at {@code N} is unset then the corresponding
3028      * array element {@code a[offset+N]} is left unchanged.
3029      * <p>
3030      * Array range checking is done for lanes where the mask is set.
3031      * Lanes where the mask is unset are not stored and do not need
3032      * to correspond to legitimate elements of {@code a}.
3033      * That is, unset lanes may correspond to array indexes less than
3034      * zero or beyond the end of the array.
3035      *
3036      * @param a the array, of type {@code float[]}
3037      * @param offset the offset into the array
3038      * @param m the mask controlling lane storage
3039      * @throws IndexOutOfBoundsException
3040      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3041      *         for any lane {@code N} in the vector
3042      *         where the mask is set
3043      */
3044     @ForceInline
3045     public final
3046     void intoArray(float[] a, int offset,
3047                    VectorMask<Float> m) {
3048         if (m.allTrue()) {
3049             intoArray(a, offset);
3050         } else {
3051             FloatSpecies vsp = vspecies();
3052             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3053             intoArray0(a, offset, m);
3054         }
3055     }
3056 
3057     /**
3058      * Scatters this vector into an array of type {@code float[]}
3059      * using indexes obtained by adding a fixed {@code offset} to a
3060      * series of secondary offsets from an <em>index map</em>.
3061      * The index map is a contiguous sequence of {@code VLENGTH}
3062      * elements in a second array of {@code int}s, starting at a given
3063      * {@code mapOffset}.
3064      * <p>
3065      * For each vector lane, where {@code N} is the vector lane index,
3066      * the lane element at index {@code N} is stored into the array
3067      * element {@code a[f(N)]}, where {@code f(N)} is the
3068      * index mapping expression
3069      * {@code offset + indexMap[mapOffset + N]]}.
3070      *
3071      * @param a the array
3072      * @param offset an offset to combine with the index map offsets
3073      * @param indexMap the index map
3074      * @param mapOffset the offset into the index map
3075      * @throws IndexOutOfBoundsException
3076      *         if {@code mapOffset+N < 0}
3077      *         or if {@code mapOffset+N >= indexMap.length},
3078      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3079      *         is an invalid index into {@code a},
3080      *         for any lane {@code N} in the vector
3081      * @see FloatVector#toIntArray()
3082      */
3083     @ForceInline
3084     public final
3085     void intoArray(float[] a, int offset,
3086                    int[] indexMap, int mapOffset) {
3087         FloatSpecies vsp = vspecies();
3088         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
3089         // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
3090         IntVector vix = IntVector
3091             .fromArray(isp, indexMap, mapOffset)
3092             .add(offset);
3093 
3094         vix = VectorIntrinsics.checkIndex(vix, a.length);
3095 
3096         VectorSupport.storeWithMap(
3097             vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(),
3098             isp.vectorType(),
3099             a, arrayAddress(a, 0), vix,
3100             this, null,
3101             a, offset, indexMap, mapOffset,
3102             (arr, off, v, map, mo, vm)
3103             -> v.stOp(arr, off,
3104                       (arr_, off_, i, e) -> {
3105                           int j = map[mo + i];
3106                           arr[off + j] = e;
3107                       }));
3108     }
3109 
3110     /**
3111      * Scatters this vector into an array of type {@code float[]},
3112      * under the control of a mask, and
3113      * using indexes obtained by adding a fixed {@code offset} to a
3114      * series of secondary offsets from an <em>index map</em>.
3115      * The index map is a contiguous sequence of {@code VLENGTH}
3116      * elements in a second array of {@code int}s, starting at a given
3117      * {@code mapOffset}.
3118      * <p>
3119      * For each vector lane, where {@code N} is the vector lane index,
3120      * if the mask lane at index {@code N} is set then
3121      * the lane element at index {@code N} is stored into the array
3122      * element {@code a[f(N)]}, where {@code f(N)} is the
3123      * index mapping expression
3124      * {@code offset + indexMap[mapOffset + N]]}.
3125      *
3126      * @param a the array
3127      * @param offset an offset to combine with the index map offsets
3128      * @param indexMap the index map
3129      * @param mapOffset the offset into the index map
3130      * @param m the mask
3131      * @throws IndexOutOfBoundsException
3132      *         if {@code mapOffset+N < 0}
3133      *         or if {@code mapOffset+N >= indexMap.length},
3134      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3135      *         is an invalid index into {@code a},
3136      *         for any lane {@code N} in the vector
3137      *         where the mask is set
3138      * @see FloatVector#toIntArray()
3139      */
3140     @ForceInline
3141     public final
3142     void intoArray(float[] a, int offset,
3143                    int[] indexMap, int mapOffset,
3144                    VectorMask<Float> m) {
3145         if (m.allTrue()) {
3146             intoArray(a, offset, indexMap, mapOffset);
3147         }
3148         else {
3149             intoArray0(a, offset, indexMap, mapOffset, m);
3150         }
3151     }
3152 
3153 
3154 
3155     /**
3156      * {@inheritDoc} <!--workaround-->
3157      */
3158     @Override
3159     @ForceInline
3160     public final
3161     void intoByteArray(byte[] a, int offset,
3162                        ByteOrder bo) {
3163         offset = checkFromIndexSize(offset, byteSize(), a.length);
3164         maybeSwap(bo).intoByteArray0(a, offset);
3165     }
3166 
3167     /**
3168      * {@inheritDoc} <!--workaround-->
3169      */
3170     @Override
3171     @ForceInline
3172     public final
3173     void intoByteArray(byte[] a, int offset,
3174                        ByteOrder bo,
3175                        VectorMask<Float> m) {
3176         if (m.allTrue()) {
3177             intoByteArray(a, offset, bo);
3178         } else {
3179             FloatSpecies vsp = vspecies();
3180             checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
3181             maybeSwap(bo).intoByteArray0(a, offset, m);
3182         }
3183     }
3184 
3185     /**
3186      * {@inheritDoc} <!--workaround-->
3187      */
3188     @Override
3189     @ForceInline
3190     public final
3191     void intoByteBuffer(ByteBuffer bb, int offset,
3192                         ByteOrder bo) {
3193         if (ScopedMemoryAccess.isReadOnly(bb)) {
3194             throw new ReadOnlyBufferException();
3195         }
3196         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3197         maybeSwap(bo).intoByteBuffer0(bb, offset);
3198     }
3199 
3200     /**
3201      * {@inheritDoc} <!--workaround-->
3202      */
3203     @Override
3204     @ForceInline
3205     public final
3206     void intoByteBuffer(ByteBuffer bb, int offset,
3207                         ByteOrder bo,
3208                         VectorMask<Float> m) {
3209         if (m.allTrue()) {
3210             intoByteBuffer(bb, offset, bo);
3211         } else {
3212             if (bb.isReadOnly()) {
3213                 throw new ReadOnlyBufferException();
3214             }
3215             FloatSpecies vsp = vspecies();
3216             checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
3217             maybeSwap(bo).intoByteBuffer0(bb, offset, m);
3218         }
3219     }
3220 
3221     // ================================================
3222 
3223     // Low-level memory operations.
3224     //
3225     // Note that all of these operations *must* inline into a context
3226     // where the exact species of the involved vector is a
3227     // compile-time constant.  Otherwise, the intrinsic generation
3228     // will fail and performance will suffer.
3229     //
3230     // In many cases this is achieved by re-deriving a version of the
3231     // method in each concrete subclass (per species).  The re-derived
3232     // method simply calls one of these generic methods, with exact
3233     // parameters for the controlling metadata, which is either a
3234     // typed vector or constant species instance.
3235 
3236     // Unchecked loading operations in native byte order.
3237     // Caller is responsible for applying index checks, masking, and
3238     // byte swapping.
3239 
3240     /*package-private*/
3241     abstract
3242     FloatVector fromArray0(float[] a, int offset);
3243     @ForceInline
3244     final
3245     FloatVector fromArray0Template(float[] a, int offset) {
3246         FloatSpecies vsp = vspecies();
3247         return VectorSupport.load(
3248             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3249             a, arrayAddress(a, offset),
3250             a, offset, vsp,
3251             (arr, off, s) -> s.ldOp(arr, off,
3252                                     (arr_, off_, i) -> arr_[off_ + i]));
3253     }
3254 
3255     /*package-private*/
3256     abstract
3257     FloatVector fromArray0(float[] a, int offset, VectorMask<Float> m);
3258     @ForceInline
3259     final
3260     <M extends VectorMask<Float>>
3261     FloatVector fromArray0Template(Class<M> maskClass, float[] a, int offset, M m) {
3262         m.check(species());
3263         FloatSpecies vsp = vspecies();
3264         return VectorSupport.loadMasked(
3265             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3266             a, arrayAddress(a, offset), m,
3267             a, offset, vsp,
3268             (arr, off, s, vm) -> s.ldOp(arr, off, vm,
3269                                         (arr_, off_, i) -> arr_[off_ + i]));
3270     }
3271 
3272     /*package-private*/
3273     abstract
3274     FloatVector fromArray0(float[] a, int offset,
3275                                     int[] indexMap, int mapOffset,
3276                                     VectorMask<Float> m);
3277     @ForceInline
3278     final
3279     <M extends VectorMask<Float>>
3280     FloatVector fromArray0Template(Class<M> maskClass, float[] a, int offset,
3281                                             int[] indexMap, int mapOffset, M m) {
3282         FloatSpecies vsp = vspecies();
3283         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
3284         Objects.requireNonNull(a);
3285         Objects.requireNonNull(indexMap);
3286         m.check(vsp);
3287         Class<? extends FloatVector> vectorType = vsp.vectorType();
3288 
3289         // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
3290         IntVector vix = IntVector
3291             .fromArray(isp, indexMap, mapOffset)
3292             .add(offset);
3293 
3294         // FIXME: Check index under mask controlling.
3295         vix = VectorIntrinsics.checkIndex(vix, a.length);
3296 
3297         return VectorSupport.loadWithMap(
3298             vectorType, maskClass, float.class, vsp.laneCount(),
3299             isp.vectorType(),
3300             a, ARRAY_BASE, vix, m,
3301             a, offset, indexMap, mapOffset, vsp,
3302             (c, idx, iMap, idy, s, vm) ->
3303             s.vOp(vm, n -> c[idx + iMap[idy+n]]));
3304     }
3305 
3306 
3307 
3308     @Override
3309     abstract
3310     FloatVector fromByteArray0(byte[] a, int offset);
3311     @ForceInline
3312     final
3313     FloatVector fromByteArray0Template(byte[] a, int offset) {
3314         FloatSpecies vsp = vspecies();
3315         return VectorSupport.load(
3316             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3317             a, byteArrayAddress(a, offset),
3318             a, offset, vsp,
3319             (arr, off, s) -> {
3320                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3321                 return s.ldOp(wb, off,
3322                         (wb_, o, i) -> wb_.getFloat(o + i * 4));
3323             });
3324     }
3325 
3326     abstract
3327     FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m);
3328     @ForceInline
3329     final
3330     <M extends VectorMask<Float>>
3331     FloatVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3332         FloatSpecies vsp = vspecies();
3333         m.check(vsp);
3334         return VectorSupport.loadMasked(
3335             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3336             a, byteArrayAddress(a, offset), m,
3337             a, offset, vsp,
3338             (arr, off, s, vm) -> {
3339                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3340                 return s.ldOp(wb, off, vm,
3341                         (wb_, o, i) -> wb_.getFloat(o + i * 4));
3342             });
3343     }
3344 
3345     abstract
3346     FloatVector fromByteBuffer0(ByteBuffer bb, int offset);
3347     @ForceInline
3348     final
3349     FloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3350         FloatSpecies vsp = vspecies();
3351         return ScopedMemoryAccess.loadFromByteBuffer(
3352                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3353                 bb, offset, vsp,
3354                 (buf, off, s) -> {
3355                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3356                     return s.ldOp(wb, off,
3357                             (wb_, o, i) -> wb_.getFloat(o + i * 4));
3358                 });
3359     }
3360 
3361     abstract
3362     FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m);
3363     @ForceInline
3364     final
3365     <M extends VectorMask<Float>>
3366     FloatVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3367         FloatSpecies vsp = vspecies();
3368         m.check(vsp);
3369         return ScopedMemoryAccess.loadFromByteBufferMasked(
3370                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3371                 bb, offset, m, vsp,
3372                 (buf, off, s, vm) -> {
3373                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3374                     return s.ldOp(wb, off, vm,
3375                             (wb_, o, i) -> wb_.getFloat(o + i * 4));
3376                 });
3377     }
3378 
3379     // Unchecked storing operations in native byte order.
3380     // Caller is responsible for applying index checks, masking, and
3381     // byte swapping.
3382 
3383     abstract
3384     void intoArray0(float[] a, int offset);
3385     @ForceInline
3386     final
3387     void intoArray0Template(float[] a, int offset) {
3388         FloatSpecies vsp = vspecies();
3389         VectorSupport.store(
3390             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3391             a, arrayAddress(a, offset),
3392             this, a, offset,
3393             (arr, off, v)
3394             -> v.stOp(arr, off,
3395                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3396     }
3397 
3398     abstract
3399     void intoArray0(float[] a, int offset, VectorMask<Float> m);
3400     @ForceInline
3401     final
3402     <M extends VectorMask<Float>>
3403     void intoArray0Template(Class<M> maskClass, float[] a, int offset, M m) {
3404         m.check(species());
3405         FloatSpecies vsp = vspecies();
3406         VectorSupport.storeMasked(
3407             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3408             a, arrayAddress(a, offset),
3409             this, m, a, offset,
3410             (arr, off, v, vm)
3411             -> v.stOp(arr, off, vm,
3412                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3413     }
3414 
3415     abstract
3416     void intoArray0(float[] a, int offset,
3417                     int[] indexMap, int mapOffset,
3418                     VectorMask<Float> m);
3419     @ForceInline
3420     final
3421     <M extends VectorMask<Float>>
3422     void intoArray0Template(Class<M> maskClass, float[] a, int offset,
3423                             int[] indexMap, int mapOffset, M m) {
3424         m.check(species());
3425         FloatSpecies vsp = vspecies();
3426         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
3427         // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
3428         IntVector vix = IntVector
3429             .fromArray(isp, indexMap, mapOffset)
3430             .add(offset);
3431 
3432         // FIXME: Check index under mask controlling.
3433         vix = VectorIntrinsics.checkIndex(vix, a.length);
3434 
3435         VectorSupport.storeWithMap(
3436             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3437             isp.vectorType(),
3438             a, arrayAddress(a, 0), vix,
3439             this, m,
3440             a, offset, indexMap, mapOffset,
3441             (arr, off, v, map, mo, vm)
3442             -> v.stOp(arr, off, vm,
3443                       (arr_, off_, i, e) -> {
3444                           int j = map[mo + i];
3445                           arr[off + j] = e;
3446                       }));
3447     }
3448 
3449 
3450     abstract
3451     void intoByteArray0(byte[] a, int offset);
3452     @ForceInline
3453     final
3454     void intoByteArray0Template(byte[] a, int offset) {
3455         FloatSpecies vsp = vspecies();
3456         VectorSupport.store(
3457             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3458             a, byteArrayAddress(a, offset),
3459             this, a, offset,
3460             (arr, off, v) -> {
3461                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3462                 v.stOp(wb, off,
3463                         (tb_, o, i, e) -> tb_.putFloat(o + i * 4, e));
3464             });
3465     }
3466 
3467     abstract
3468     void intoByteArray0(byte[] a, int offset, VectorMask<Float> m);
3469     @ForceInline
3470     final
3471     <M extends VectorMask<Float>>
3472     void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
3473         FloatSpecies vsp = vspecies();
3474         m.check(vsp);
3475         VectorSupport.storeMasked(
3476             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3477             a, byteArrayAddress(a, offset),
3478             this, m, a, offset,
3479             (arr, off, v, vm) -> {
3480                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3481                 v.stOp(wb, off, vm,
3482                         (tb_, o, i, e) -> tb_.putFloat(o + i * 4, e));
3483             });
3484     }
3485 
3486     @ForceInline
3487     final
3488     void intoByteBuffer0(ByteBuffer bb, int offset) {
3489         FloatSpecies vsp = vspecies();
3490         ScopedMemoryAccess.storeIntoByteBuffer(
3491                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3492                 this, bb, offset,
3493                 (buf, off, v) -> {
3494                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3495                     v.stOp(wb, off,
3496                             (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
3497                 });
3498     }
3499 
3500     abstract
3501     void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m);
3502     @ForceInline
3503     final
3504     <M extends VectorMask<Float>>
3505     void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
3506         FloatSpecies vsp = vspecies();
3507         m.check(vsp);
3508         ScopedMemoryAccess.storeIntoByteBufferMasked(
3509                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
3510                 this, m, bb, offset,
3511                 (buf, off, v, vm) -> {
3512                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3513                     v.stOp(wb, off, vm,
3514                             (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
3515                 });
3516     }
3517 
3518 
3519     // End of low-level memory operations.
3520 
3521     private static
3522     void checkMaskFromIndexSize(int offset,
3523                                 FloatSpecies vsp,
3524                                 VectorMask<Float> m,
3525                                 int scale,
3526                                 int limit) {
3527         ((AbstractMask<Float>)m)
3528             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3529     }
3530 
3531     @ForceInline
3532     private void conditionalStoreNYI(int offset,
3533                                      FloatSpecies vsp,
3534                                      VectorMask<Float> m,
3535                                      int scale,
3536                                      int limit) {
3537         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3538             String msg =
3539                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3540                               offset, limit, m, vsp);
3541             throw new AssertionError(msg);
3542         }
3543     }
3544 
3545     /*package-private*/
3546     @Override
3547     @ForceInline
3548     final
3549     FloatVector maybeSwap(ByteOrder bo) {
3550         if (bo != NATIVE_ENDIAN) {
3551             return this.reinterpretAsBytes()
3552                 .rearrange(swapBytesShuffle())
3553                 .reinterpretAsFloats();
3554         }
3555         return this;
3556     }
3557 
3558     static final int ARRAY_SHIFT =
3559         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_FLOAT_INDEX_SCALE);
3560     static final long ARRAY_BASE =
3561         Unsafe.ARRAY_FLOAT_BASE_OFFSET;
3562 
3563     @ForceInline
3564     static long arrayAddress(float[] a, int index) {
3565         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3566     }
3567 
3568 
3569 
3570     @ForceInline
3571     static long byteArrayAddress(byte[] a, int index) {
3572         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3573     }
3574 
3575     // ================================================
3576 
3577     /// Reinterpreting view methods:
3578     //   lanewise reinterpret: viewAsXVector()
3579     //   keep shape, redraw lanes: reinterpretAsEs()
3580 
3581     /**
3582      * {@inheritDoc} <!--workaround-->
3583      */
3584     @ForceInline
3585     @Override
3586     public final ByteVector reinterpretAsBytes() {
3587          // Going to ByteVector, pay close attention to byte order.
3588          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
3589          return asByteVectorRaw();
3590          //return asByteVectorRaw().rearrange(swapBytesShuffle());
3591     }
3592 
3593     /**
3594      * {@inheritDoc} <!--workaround-->
3595      */
3596     @ForceInline
3597     @Override
3598     public final IntVector viewAsIntegralLanes() {
3599         LaneType ilt = LaneType.FLOAT.asIntegral();
3600         return (IntVector) asVectorRaw(ilt);
3601     }
3602 
3603     /**
3604      * {@inheritDoc} <!--workaround-->
3605      */
3606     @ForceInline
3607     @Override
3608     public final
3609     FloatVector
3610     viewAsFloatingLanes() {
3611         return this;
3612     }
3613 
3614     // ================================================
3615 
3616     /// Object methods: toString, equals, hashCode
3617     //
3618     // Object methods are defined as if via Arrays.toString, etc.,
3619     // is applied to the array of elements.  Two equal vectors
3620     // are required to have equal species and equal lane values.
3621 
3622     /**
3623      * Returns a string representation of this vector, of the form
3624      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
3625      * in lane order.
3626      *
3627      * The string is produced as if by a call to {@link
3628      * java.util.Arrays#toString(float[]) Arrays.toString()},
3629      * as appropriate to the {@code float} array returned by
3630      * {@link #toArray this.toArray()}.
3631      *
3632      * @return a string of the form {@code "[0,1,2...]"}
3633      * reporting the lane values of this vector
3634      */
3635     @Override
3636     @ForceInline
3637     public final
3638     String toString() {
3639         // now that toArray is strongly typed, we can define this
3640         return Arrays.toString(toArray());
3641     }
3642 
3643     /**
3644      * {@inheritDoc} <!--workaround-->
3645      */
3646     @Override
3647     @ForceInline
3648     public final
3649     boolean equals(Object obj) {
3650         if (obj instanceof Vector) {
3651             Vector<?> that = (Vector<?>) obj;
3652             if (this.species().equals(that.species())) {
3653                 return this.eq(that.check(this.species())).allTrue();
3654             }
3655         }
3656         return false;
3657     }
3658 
3659     /**
3660      * {@inheritDoc} <!--workaround-->
3661      */
3662     @Override
3663     @ForceInline
3664     public final
3665     int hashCode() {
3666         // now that toArray is strongly typed, we can define this
3667         return Objects.hash(species(), Arrays.hashCode(toArray()));
3668     }
3669 
3670     // ================================================
3671 
3672     // Species
3673 
3674     /**
3675      * Class representing {@link FloatVector}'s of the same {@link VectorShape VectorShape}.
3676      */
3677     /*package-private*/
3678     static final class FloatSpecies extends AbstractSpecies<Float> {
3679         private FloatSpecies(VectorShape shape,
3680                 Class<? extends FloatVector> vectorType,
3681                 Class<? extends AbstractMask<Float>> maskType,
3682                 Function<Object, FloatVector> vectorFactory) {
3683             super(shape, LaneType.of(float.class),
3684                   vectorType, maskType,
3685                   vectorFactory);
3686             assert(this.elementSize() == Float.SIZE);
3687         }
3688 
3689         // Specializing overrides:
3690 
3691         @Override
3692         @ForceInline
3693         public final Class<Float> elementType() {
3694             return float.class;
3695         }
3696 
3697         @Override
3698         @ForceInline
3699         final Class<Float> genericElementType() {
3700             return Float.class;
3701         }
3702 
3703         @SuppressWarnings("unchecked")
3704         @Override
3705         @ForceInline
3706         public final Class<? extends FloatVector> vectorType() {
3707             return (Class<? extends FloatVector>) vectorType;
3708         }
3709 
3710         @Override
3711         @ForceInline
3712         public final long checkValue(long e) {
3713             longToElementBits(e);  // only for exception
3714             return e;
3715         }
3716 
3717         /*package-private*/
3718         @Override
3719         @ForceInline
3720         final FloatVector broadcastBits(long bits) {
3721             return (FloatVector)
3722                 VectorSupport.fromBitsCoerced(
3723                     vectorType, float.class, laneCount,
3724                     bits, MODE_BROADCAST, this,
3725                     (bits_, s_) -> s_.rvOp(i -> bits_));
3726         }
3727 
3728         /*package-private*/
3729         @ForceInline
3730         final FloatVector broadcast(float e) {
3731             return broadcastBits(toBits(e));
3732         }
3733 
3734         @Override
3735         @ForceInline
3736         public final FloatVector broadcast(long e) {
3737             return broadcastBits(longToElementBits(e));
3738         }
3739 
3740         /*package-private*/
3741         final @Override
3742         @ForceInline
3743         long longToElementBits(long value) {
3744             // Do the conversion, and then test it for failure.
3745             float e = (float) value;
3746             if ((long) e != value) {
3747                 throw badElementBits(value, e);
3748             }
3749             return toBits(e);
3750         }
3751 
3752         /*package-private*/
3753         @ForceInline
3754         static long toIntegralChecked(float e, boolean convertToInt) {
3755             long value = convertToInt ? (int) e : (long) e;
3756             if ((float) value != e) {
3757                 throw badArrayBits(e, convertToInt, value);
3758             }
3759             return value;
3760         }
3761 
3762         /* this non-public one is for internal conversions */
3763         @Override
3764         @ForceInline
3765         final FloatVector fromIntValues(int[] values) {
3766             VectorIntrinsics.requireLength(values.length, laneCount);
3767             float[] va = new float[laneCount()];
3768             for (int i = 0; i < va.length; i++) {
3769                 int lv = values[i];
3770                 float v = (float) lv;
3771                 va[i] = v;
3772                 if ((int)v != lv) {
3773                     throw badElementBits(lv, v);
3774                 }
3775             }
3776             return dummyVector().fromArray0(va, 0);
3777         }
3778 
3779         // Virtual constructors
3780 
3781         @ForceInline
3782         @Override final
3783         public FloatVector fromArray(Object a, int offset) {
3784             // User entry point:  Be careful with inputs.
3785             return FloatVector
3786                 .fromArray(this, (float[]) a, offset);
3787         }
3788 
3789         @ForceInline
3790         @Override final
3791         FloatVector dummyVector() {
3792             return (FloatVector) super.dummyVector();
3793         }
3794 
3795         /*package-private*/
3796         final @Override
3797         @ForceInline
3798         FloatVector rvOp(RVOp f) {
3799             float[] res = new float[laneCount()];
3800             for (int i = 0; i < res.length; i++) {
3801                 int bits = (int) f.apply(i);
3802                 res[i] = fromBits(bits);
3803             }
3804             return dummyVector().vectorFactory(res);
3805         }
3806 
3807         FloatVector vOp(FVOp f) {
3808             float[] res = new float[laneCount()];
3809             for (int i = 0; i < res.length; i++) {
3810                 res[i] = f.apply(i);
3811             }
3812             return dummyVector().vectorFactory(res);
3813         }
3814 
3815         FloatVector vOp(VectorMask<Float> m, FVOp f) {
3816             float[] res = new float[laneCount()];
3817             boolean[] mbits = ((AbstractMask<Float>)m).getBits();
3818             for (int i = 0; i < res.length; i++) {
3819                 if (mbits[i]) {
3820                     res[i] = f.apply(i);
3821                 }
3822             }
3823             return dummyVector().vectorFactory(res);
3824         }
3825 
3826         /*package-private*/
3827         @ForceInline
3828         <M> FloatVector ldOp(M memory, int offset,
3829                                       FLdOp<M> f) {
3830             return dummyVector().ldOp(memory, offset, f);
3831         }
3832 
3833         /*package-private*/
3834         @ForceInline
3835         <M> FloatVector ldOp(M memory, int offset,
3836                                       VectorMask<Float> m,
3837                                       FLdOp<M> f) {
3838             return dummyVector().ldOp(memory, offset, m, f);
3839         }
3840 
3841         /*package-private*/
3842         @ForceInline
3843         <M> void stOp(M memory, int offset, FStOp<M> f) {
3844             dummyVector().stOp(memory, offset, f);
3845         }
3846 
3847         /*package-private*/
3848         @ForceInline
3849         <M> void stOp(M memory, int offset,
3850                       AbstractMask<Float> m,
3851                       FStOp<M> f) {
3852             dummyVector().stOp(memory, offset, m, f);
3853         }
3854 
3855         // N.B. Make sure these constant vectors and
3856         // masks load up correctly into registers.
3857         //
3858         // Also, see if we can avoid all that switching.
3859         // Could we cache both vectors and both masks in
3860         // this species object?
3861 
3862         // Zero and iota vector access
3863         @Override
3864         @ForceInline
3865         public final FloatVector zero() {
3866             if ((Class<?>) vectorType() == FloatMaxVector.class)
3867                 return FloatMaxVector.ZERO;
3868             switch (vectorBitSize()) {
3869                 case 64: return Float64Vector.ZERO;
3870                 case 128: return Float128Vector.ZERO;
3871                 case 256: return Float256Vector.ZERO;
3872                 case 512: return Float512Vector.ZERO;
3873             }
3874             throw new AssertionError();
3875         }
3876 
3877         @Override
3878         @ForceInline
3879         public final FloatVector iota() {
3880             if ((Class<?>) vectorType() == FloatMaxVector.class)
3881                 return FloatMaxVector.IOTA;
3882             switch (vectorBitSize()) {
3883                 case 64: return Float64Vector.IOTA;
3884                 case 128: return Float128Vector.IOTA;
3885                 case 256: return Float256Vector.IOTA;
3886                 case 512: return Float512Vector.IOTA;
3887             }
3888             throw new AssertionError();
3889         }
3890 
3891         // Mask access
3892         @Override
3893         @ForceInline
3894         public final VectorMask<Float> maskAll(boolean bit) {
3895             if ((Class<?>) vectorType() == FloatMaxVector.class)
3896                 return FloatMaxVector.FloatMaxMask.maskAll(bit);
3897             switch (vectorBitSize()) {
3898                 case 64: return Float64Vector.Float64Mask.maskAll(bit);
3899                 case 128: return Float128Vector.Float128Mask.maskAll(bit);
3900                 case 256: return Float256Vector.Float256Mask.maskAll(bit);
3901                 case 512: return Float512Vector.Float512Mask.maskAll(bit);
3902             }
3903             throw new AssertionError();
3904         }
3905     }
3906 
3907     /**
3908      * Finds a species for an element type of {@code float} and shape.
3909      *
3910      * @param s the shape
3911      * @return a species for an element type of {@code float} and shape
3912      * @throws IllegalArgumentException if no such species exists for the shape
3913      */
3914     static FloatSpecies species(VectorShape s) {
3915         Objects.requireNonNull(s);
3916         switch (s.switchKey) {
3917             case VectorShape.SK_64_BIT: return (FloatSpecies) SPECIES_64;
3918             case VectorShape.SK_128_BIT: return (FloatSpecies) SPECIES_128;
3919             case VectorShape.SK_256_BIT: return (FloatSpecies) SPECIES_256;
3920             case VectorShape.SK_512_BIT: return (FloatSpecies) SPECIES_512;
3921             case VectorShape.SK_Max_BIT: return (FloatSpecies) SPECIES_MAX;
3922             default: throw new IllegalArgumentException("Bad shape: " + s);
3923         }
3924     }
3925 
3926     /** Species representing {@link FloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
3927     public static final VectorSpecies<Float> SPECIES_64
3928         = new FloatSpecies(VectorShape.S_64_BIT,
3929                             Float64Vector.class,
3930                             Float64Vector.Float64Mask.class,
3931                             Float64Vector::new);
3932 
3933     /** Species representing {@link FloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
3934     public static final VectorSpecies<Float> SPECIES_128
3935         = new FloatSpecies(VectorShape.S_128_BIT,
3936                             Float128Vector.class,
3937                             Float128Vector.Float128Mask.class,
3938                             Float128Vector::new);
3939 
3940     /** Species representing {@link FloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
3941     public static final VectorSpecies<Float> SPECIES_256
3942         = new FloatSpecies(VectorShape.S_256_BIT,
3943                             Float256Vector.class,
3944                             Float256Vector.Float256Mask.class,
3945                             Float256Vector::new);
3946 
3947     /** Species representing {@link FloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
3948     public static final VectorSpecies<Float> SPECIES_512
3949         = new FloatSpecies(VectorShape.S_512_BIT,
3950                             Float512Vector.class,
3951                             Float512Vector.Float512Mask.class,
3952                             Float512Vector::new);
3953 
3954     /** Species representing {@link FloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
3955     public static final VectorSpecies<Float> SPECIES_MAX
3956         = new FloatSpecies(VectorShape.S_Max_BIT,
3957                             FloatMaxVector.class,
3958                             FloatMaxVector.FloatMaxMask.class,
3959                             FloatMaxVector::new);
3960 
3961     /**
3962      * Preferred species for {@link FloatVector}s.
3963      * A preferred species is a species of maximal bit-size for the platform.
3964      */
3965     public static final VectorSpecies<Float> SPECIES_PREFERRED
3966         = (FloatSpecies) VectorSpecies.ofPreferred(float.class);
3967 }