1 /*
   2  * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.nio.ReadOnlyBufferException;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.BinaryOperator;
  33 import java.util.function.Function;
  34 import java.util.function.UnaryOperator;
  35 
  36 import jdk.internal.misc.ScopedMemoryAccess;
  37 import jdk.internal.misc.Unsafe;
  38 import jdk.internal.vm.annotation.ForceInline;
  39 import jdk.internal.vm.vector.VectorSupport;
  40 
  41 import static jdk.internal.vm.vector.VectorSupport.*;
  42 import static jdk.incubator.vector.VectorIntrinsics.*;
  43 
  44 import static jdk.incubator.vector.VectorOperators.*;
  45 
  46 // -- This file was mechanically generated: Do not edit! -- //
  47 
  48 /**
  49  * A specialized {@link Vector} representing an ordered immutable sequence of
  50  * {@code float} values.
  51  */
  52 @SuppressWarnings("cast")  // warning: redundant cast
  53 public abstract class FloatVector extends AbstractVector<Float> {
  54 
  55     FloatVector(float[] vec) {
  56         super(vec);
  57     }
  58 
  59     static final int FORBID_OPCODE_KIND = VO_NOFP;
  60 
  61     @ForceInline
  62     static int opCode(Operator op) {
  63         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  64     }
  65     @ForceInline
  66     static int opCode(Operator op, int requireKind) {
  67         requireKind |= VO_OPCODE_VALID;
  68         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  69     }
  70     @ForceInline
  71     static boolean opKind(Operator op, int bit) {
  72         return VectorOperators.opKind(op, bit);
  73     }
  74 
  75     // Virtualized factories and operators,
  76     // coded with portable definitions.
  77     // These are all @ForceInline in case
  78     // they need to be used performantly.
  79     // The various shape-specific subclasses
  80     // also specialize them by wrapping
  81     // them in a call like this:
  82     //    return (Byte128Vector)
  83     //       super.bOp((Byte128Vector) o);
  84     // The purpose of that is to forcibly inline
  85     // the generic definition from this file
  86     // into a sharply type- and size-specific
  87     // wrapper in the subclass file, so that
  88     // the JIT can specialize the code.
  89     // The code is only inlined and expanded
  90     // if it gets hot.  Think of it as a cheap
  91     // and lazy version of C++ templates.
  92 
  93     // Virtualized getter
  94 
  95     /*package-private*/
  96     abstract float[] vec();
  97 
  98     // Virtualized constructors
  99 
 100     /**
 101      * Build a vector directly using my own constructor.
 102      * It is an error if the array is aliased elsewhere.
 103      */
 104     /*package-private*/
 105     abstract FloatVector vectorFactory(float[] vec);
 106 
 107     /**
 108      * Build a mask directly using my species.
 109      * It is an error if the array is aliased elsewhere.
 110      */
 111     /*package-private*/
 112     @ForceInline
 113     final
 114     AbstractMask<Float> maskFactory(boolean[] bits) {
 115         return vspecies().maskFactory(bits);
 116     }
 117 
 118     // Constant loader (takes dummy as vector arg)
 119     interface FVOp {
 120         float apply(int i);
 121     }
 122 
 123     /*package-private*/
 124     @ForceInline
 125     final
 126     FloatVector vOp(FVOp f) {
 127         float[] res = new float[length()];
 128         for (int i = 0; i < res.length; i++) {
 129             res[i] = f.apply(i);
 130         }
 131         return vectorFactory(res);
 132     }
 133 
 134     @ForceInline
 135     final
 136     FloatVector vOp(VectorMask<Float> m, FVOp f) {
 137         float[] res = new float[length()];
 138         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 139         for (int i = 0; i < res.length; i++) {
 140             if (mbits[i]) {
 141                 res[i] = f.apply(i);
 142             }
 143         }
 144         return vectorFactory(res);
 145     }
 146 
 147     // Unary operator
 148 
 149     /*package-private*/
 150     interface FUnOp {
 151         float apply(int i, float a);
 152     }
 153 
 154     /*package-private*/
 155     abstract
 156     FloatVector uOp(FUnOp f);
 157     @ForceInline
 158     final
 159     FloatVector uOpTemplate(FUnOp f) {
 160         float[] vec = vec();
 161         float[] res = new float[length()];
 162         for (int i = 0; i < res.length; i++) {
 163             res[i] = f.apply(i, vec[i]);
 164         }
 165         return vectorFactory(res);
 166     }
 167 
 168     /*package-private*/
 169     abstract
 170     FloatVector uOp(VectorMask<Float> m,
 171                              FUnOp f);
 172     @ForceInline
 173     final
 174     FloatVector uOpTemplate(VectorMask<Float> m,
 175                                      FUnOp f) {
 176         float[] vec = vec();
 177         float[] res = new float[length()];
 178         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 179         for (int i = 0; i < res.length; i++) {
 180             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 181         }
 182         return vectorFactory(res);
 183     }
 184 
 185     // Binary operator
 186 
 187     /*package-private*/
 188     interface FBinOp {
 189         float apply(int i, float a, float b);
 190     }
 191 
 192     /*package-private*/
 193     abstract
 194     FloatVector bOp(Vector<Float> o,
 195                              FBinOp f);
 196     @ForceInline
 197     final
 198     FloatVector bOpTemplate(Vector<Float> o,
 199                                      FBinOp f) {
 200         float[] res = new float[length()];
 201         float[] vec1 = this.vec();
 202         float[] vec2 = ((FloatVector)o).vec();
 203         for (int i = 0; i < res.length; i++) {
 204             res[i] = f.apply(i, vec1[i], vec2[i]);
 205         }
 206         return vectorFactory(res);
 207     }
 208 
 209     /*package-private*/
 210     abstract
 211     FloatVector bOp(Vector<Float> o,
 212                              VectorMask<Float> m,
 213                              FBinOp f);
 214     @ForceInline
 215     final
 216     FloatVector bOpTemplate(Vector<Float> o,
 217                                      VectorMask<Float> m,
 218                                      FBinOp f) {
 219         float[] res = new float[length()];
 220         float[] vec1 = this.vec();
 221         float[] vec2 = ((FloatVector)o).vec();
 222         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 223         for (int i = 0; i < res.length; i++) {
 224             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 225         }
 226         return vectorFactory(res);
 227     }
 228 
 229     // Ternary operator
 230 
 231     /*package-private*/
 232     interface FTriOp {
 233         float apply(int i, float a, float b, float c);
 234     }
 235 
 236     /*package-private*/
 237     abstract
 238     FloatVector tOp(Vector<Float> o1,
 239                              Vector<Float> o2,
 240                              FTriOp f);
 241     @ForceInline
 242     final
 243     FloatVector tOpTemplate(Vector<Float> o1,
 244                                      Vector<Float> o2,
 245                                      FTriOp f) {
 246         float[] res = new float[length()];
 247         float[] vec1 = this.vec();
 248         float[] vec2 = ((FloatVector)o1).vec();
 249         float[] vec3 = ((FloatVector)o2).vec();
 250         for (int i = 0; i < res.length; i++) {
 251             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 252         }
 253         return vectorFactory(res);
 254     }
 255 
 256     /*package-private*/
 257     abstract
 258     FloatVector tOp(Vector<Float> o1,
 259                              Vector<Float> o2,
 260                              VectorMask<Float> m,
 261                              FTriOp f);
 262     @ForceInline
 263     final
 264     FloatVector tOpTemplate(Vector<Float> o1,
 265                                      Vector<Float> o2,
 266                                      VectorMask<Float> m,
 267                                      FTriOp f) {
 268         float[] res = new float[length()];
 269         float[] vec1 = this.vec();
 270         float[] vec2 = ((FloatVector)o1).vec();
 271         float[] vec3 = ((FloatVector)o2).vec();
 272         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 273         for (int i = 0; i < res.length; i++) {
 274             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 275         }
 276         return vectorFactory(res);
 277     }
 278 
 279     // Reduction operator
 280 
 281     /*package-private*/
 282     abstract
 283     float rOp(float v, FBinOp f);
 284     @ForceInline
 285     final
 286     float rOpTemplate(float v, FBinOp f) {
 287         float[] vec = vec();
 288         for (int i = 0; i < vec.length; i++) {
 289             v = f.apply(i, v, vec[i]);
 290         }
 291         return v;
 292     }
 293 
 294     // Memory reference
 295 
 296     /*package-private*/
 297     interface FLdOp<M> {
 298         float apply(M memory, int offset, int i);
 299     }
 300 
 301     /*package-private*/
 302     @ForceInline
 303     final
 304     <M> FloatVector ldOp(M memory, int offset,
 305                                   FLdOp<M> f) {
 306         //dummy; no vec = vec();
 307         float[] res = new float[length()];
 308         for (int i = 0; i < res.length; i++) {
 309             res[i] = f.apply(memory, offset, i);
 310         }
 311         return vectorFactory(res);
 312     }
 313 
 314     /*package-private*/
 315     @ForceInline
 316     final
 317     <M> FloatVector ldOp(M memory, int offset,
 318                                   VectorMask<Float> m,
 319                                   FLdOp<M> f) {
 320         //float[] vec = vec();
 321         float[] res = new float[length()];
 322         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 323         for (int i = 0; i < res.length; i++) {
 324             if (mbits[i]) {
 325                 res[i] = f.apply(memory, offset, i);
 326             }
 327         }
 328         return vectorFactory(res);
 329     }
 330 
 331     interface FStOp<M> {
 332         void apply(M memory, int offset, int i, float a);
 333     }
 334 
 335     /*package-private*/
 336     @ForceInline
 337     final
 338     <M> void stOp(M memory, int offset,
 339                   FStOp<M> f) {
 340         float[] vec = vec();
 341         for (int i = 0; i < vec.length; i++) {
 342             f.apply(memory, offset, i, vec[i]);
 343         }
 344     }
 345 
 346     /*package-private*/
 347     @ForceInline
 348     final
 349     <M> void stOp(M memory, int offset,
 350                   VectorMask<Float> m,
 351                   FStOp<M> f) {
 352         float[] vec = vec();
 353         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 354         for (int i = 0; i < vec.length; i++) {
 355             if (mbits[i]) {
 356                 f.apply(memory, offset, i, vec[i]);
 357             }
 358         }
 359     }
 360 
 361     // Binary test
 362 
 363     /*package-private*/
 364     interface FBinTest {
 365         boolean apply(int cond, int i, float a, float b);
 366     }
 367 
 368     /*package-private*/
 369     @ForceInline
 370     final
 371     AbstractMask<Float> bTest(int cond,
 372                                   Vector<Float> o,
 373                                   FBinTest f) {
 374         float[] vec1 = vec();
 375         float[] vec2 = ((FloatVector)o).vec();
 376         boolean[] bits = new boolean[length()];
 377         for (int i = 0; i < length(); i++){
 378             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 379         }
 380         return maskFactory(bits);
 381     }
 382 
 383 
 384     /*package-private*/
 385     @Override
 386     abstract FloatSpecies vspecies();
 387 
 388     /*package-private*/
 389     @ForceInline
 390     static long toBits(float e) {
 391         return  Float.floatToRawIntBits(e);
 392     }
 393 
 394     /*package-private*/
 395     @ForceInline
 396     static float fromBits(long bits) {
 397         return Float.intBitsToFloat((int)bits);
 398     }
 399 
 400     // Static factories (other than memory operations)
 401 
 402     // Note: A surprising behavior in javadoc
 403     // sometimes makes a lone /** {@inheritDoc} */
 404     // comment drop the method altogether,
 405     // apparently if the method mentions an
 406     // parameter or return type of Vector<Float>
 407     // instead of Vector<E> as originally specified.
 408     // Adding an empty HTML fragment appears to
 409     // nudge javadoc into providing the desired
 410     // inherited documentation.  We use the HTML
 411     // comment <!--workaround--> for this.
 412 
 413     /**
 414      * Returns a vector of the given species
 415      * where all lane elements are set to
 416      * zero, the default primitive value.
 417      *
 418      * @param species species of the desired zero vector
 419      * @return a zero vector
 420      */
 421     @ForceInline
 422     public static FloatVector zero(VectorSpecies<Float> species) {
 423         FloatSpecies vsp = (FloatSpecies) species;
 424         return VectorSupport.broadcastCoerced(vsp.vectorType(), float.class, species.length(),
 425                         toBits(0.0f), vsp,
 426                         ((bits_, s_) -> s_.rvOp(i -> bits_)));
 427     }
 428 
 429     /**
 430      * Returns a vector of the same species as this one
 431      * where all lane elements are set to
 432      * the primitive value {@code e}.
 433      *
 434      * The contents of the current vector are discarded;
 435      * only the species is relevant to this operation.
 436      *
 437      * <p> This method returns the value of this expression:
 438      * {@code FloatVector.broadcast(this.species(), e)}.
 439      *
 440      * @apiNote
 441      * Unlike the similar method named {@code broadcast()}
 442      * in the supertype {@code Vector}, this method does not
 443      * need to validate its argument, and cannot throw
 444      * {@code IllegalArgumentException}.  This method is
 445      * therefore preferable to the supertype method.
 446      *
 447      * @param e the value to broadcast
 448      * @return a vector where all lane elements are set to
 449      *         the primitive value {@code e}
 450      * @see #broadcast(VectorSpecies,long)
 451      * @see Vector#broadcast(long)
 452      * @see VectorSpecies#broadcast(long)
 453      */
 454     public abstract FloatVector broadcast(float e);
 455 
 456     /**
 457      * Returns a vector of the given species
 458      * where all lane elements are set to
 459      * the primitive value {@code e}.
 460      *
 461      * @param species species of the desired vector
 462      * @param e the value to broadcast
 463      * @return a vector where all lane elements are set to
 464      *         the primitive value {@code e}
 465      * @see #broadcast(long)
 466      * @see Vector#broadcast(long)
 467      * @see VectorSpecies#broadcast(long)
 468      */
 469     @ForceInline
 470     public static FloatVector broadcast(VectorSpecies<Float> species, float e) {
 471         FloatSpecies vsp = (FloatSpecies) species;
 472         return vsp.broadcast(e);
 473     }
 474 
 475     /*package-private*/
 476     @ForceInline
 477     final FloatVector broadcastTemplate(float e) {
 478         FloatSpecies vsp = vspecies();
 479         return vsp.broadcast(e);
 480     }
 481 
 482     /**
 483      * {@inheritDoc} <!--workaround-->
 484      * @apiNote
 485      * When working with vector subtypes like {@code FloatVector},
 486      * {@linkplain #broadcast(float) the more strongly typed method}
 487      * is typically selected.  It can be explicitly selected
 488      * using a cast: {@code v.broadcast((float)e)}.
 489      * The two expressions will produce numerically identical results.
 490      */
 491     @Override
 492     public abstract FloatVector broadcast(long e);
 493 
 494     /**
 495      * Returns a vector of the given species
 496      * where all lane elements are set to
 497      * the primitive value {@code e}.
 498      *
 499      * The {@code long} value must be accurately representable
 500      * by the {@code ETYPE} of the vector species, so that
 501      * {@code e==(long)(ETYPE)e}.
 502      *
 503      * @param species species of the desired vector
 504      * @param e the value to broadcast
 505      * @return a vector where all lane elements are set to
 506      *         the primitive value {@code e}
 507      * @throws IllegalArgumentException
 508      *         if the given {@code long} value cannot
 509      *         be represented by the vector's {@code ETYPE}
 510      * @see #broadcast(VectorSpecies,float)
 511      * @see VectorSpecies#checkValue(long)
 512      */
 513     @ForceInline
 514     public static FloatVector broadcast(VectorSpecies<Float> species, long e) {
 515         FloatSpecies vsp = (FloatSpecies) species;
 516         return vsp.broadcast(e);
 517     }
 518 
 519     /*package-private*/
 520     @ForceInline
 521     final FloatVector broadcastTemplate(long e) {
 522         return vspecies().broadcast(e);
 523     }
 524 
 525     // Unary lanewise support
 526 
 527     /**
 528      * {@inheritDoc} <!--workaround-->
 529      */
 530     public abstract
 531     FloatVector lanewise(VectorOperators.Unary op);
 532 
 533     @ForceInline
 534     final
 535     FloatVector lanewiseTemplate(VectorOperators.Unary op) {
 536         if (opKind(op, VO_SPECIAL)) {
 537             if (op == ZOMO) {
 538                 return blend(broadcast(-1), compare(NE, 0));
 539             }
 540         }
 541         int opc = opCode(op);
 542         return VectorSupport.unaryOp(
 543             opc, getClass(), float.class, length(),
 544             this,
 545             UN_IMPL.find(op, opc, (opc_) -> {
 546               switch (opc_) {
 547                 case VECTOR_OP_NEG: return v0 ->
 548                         v0.uOp((i, a) -> (float) -a);
 549                 case VECTOR_OP_ABS: return v0 ->
 550                         v0.uOp((i, a) -> (float) Math.abs(a));
 551                 case VECTOR_OP_SIN: return v0 ->
 552                         v0.uOp((i, a) -> (float) Math.sin(a));
 553                 case VECTOR_OP_COS: return v0 ->
 554                         v0.uOp((i, a) -> (float) Math.cos(a));
 555                 case VECTOR_OP_TAN: return v0 ->
 556                         v0.uOp((i, a) -> (float) Math.tan(a));
 557                 case VECTOR_OP_ASIN: return v0 ->
 558                         v0.uOp((i, a) -> (float) Math.asin(a));
 559                 case VECTOR_OP_ACOS: return v0 ->
 560                         v0.uOp((i, a) -> (float) Math.acos(a));
 561                 case VECTOR_OP_ATAN: return v0 ->
 562                         v0.uOp((i, a) -> (float) Math.atan(a));
 563                 case VECTOR_OP_EXP: return v0 ->
 564                         v0.uOp((i, a) -> (float) Math.exp(a));
 565                 case VECTOR_OP_LOG: return v0 ->
 566                         v0.uOp((i, a) -> (float) Math.log(a));
 567                 case VECTOR_OP_LOG10: return v0 ->
 568                         v0.uOp((i, a) -> (float) Math.log10(a));
 569                 case VECTOR_OP_SQRT: return v0 ->
 570                         v0.uOp((i, a) -> (float) Math.sqrt(a));
 571                 case VECTOR_OP_CBRT: return v0 ->
 572                         v0.uOp((i, a) -> (float) Math.cbrt(a));
 573                 case VECTOR_OP_SINH: return v0 ->
 574                         v0.uOp((i, a) -> (float) Math.sinh(a));
 575                 case VECTOR_OP_COSH: return v0 ->
 576                         v0.uOp((i, a) -> (float) Math.cosh(a));
 577                 case VECTOR_OP_TANH: return v0 ->
 578                         v0.uOp((i, a) -> (float) Math.tanh(a));
 579                 case VECTOR_OP_EXPM1: return v0 ->
 580                         v0.uOp((i, a) -> (float) Math.expm1(a));
 581                 case VECTOR_OP_LOG1P: return v0 ->
 582                         v0.uOp((i, a) -> (float) Math.log1p(a));
 583                 default: return null;
 584               }}));
 585     }
 586     private static final
 587     ImplCache<Unary,UnaryOperator<FloatVector>> UN_IMPL
 588         = new ImplCache<>(Unary.class, FloatVector.class);
 589 
 590     /**
 591      * {@inheritDoc} <!--workaround-->
 592      */
 593     @ForceInline
 594     public final
 595     FloatVector lanewise(VectorOperators.Unary op,
 596                                   VectorMask<Float> m) {
 597         return blend(lanewise(op), m);
 598     }
 599 
 600     // Binary lanewise support
 601 
 602     /**
 603      * {@inheritDoc} <!--workaround-->
 604      * @see #lanewise(VectorOperators.Binary,float)
 605      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 606      */
 607     @Override
 608     public abstract
 609     FloatVector lanewise(VectorOperators.Binary op,
 610                                   Vector<Float> v);
 611     @ForceInline
 612     final
 613     FloatVector lanewiseTemplate(VectorOperators.Binary op,
 614                                           Vector<Float> v) {
 615         FloatVector that = (FloatVector) v;
 616         that.check(this);
 617         if (opKind(op, VO_SPECIAL )) {
 618             if (op == FIRST_NONZERO) {
 619                 // FIXME: Support this in the JIT.
 620                 VectorMask<Integer> thisNZ
 621                     = this.viewAsIntegralLanes().compare(NE, (int) 0);
 622                 that = that.blend((float) 0, thisNZ.cast(vspecies()));
 623                 op = OR_UNCHECKED;
 624                 // FIXME: Support OR_UNCHECKED on float/double also!
 625                 return this.viewAsIntegralLanes()
 626                     .lanewise(op, that.viewAsIntegralLanes())
 627                     .viewAsFloatingLanes();
 628             }
 629         }
 630         int opc = opCode(op);
 631         return VectorSupport.binaryOp(
 632             opc, getClass(), float.class, length(),
 633             this, that,
 634             BIN_IMPL.find(op, opc, (opc_) -> {
 635               switch (opc_) {
 636                 case VECTOR_OP_ADD: return (v0, v1) ->
 637                         v0.bOp(v1, (i, a, b) -> (float)(a + b));
 638                 case VECTOR_OP_SUB: return (v0, v1) ->
 639                         v0.bOp(v1, (i, a, b) -> (float)(a - b));
 640                 case VECTOR_OP_MUL: return (v0, v1) ->
 641                         v0.bOp(v1, (i, a, b) -> (float)(a * b));
 642                 case VECTOR_OP_DIV: return (v0, v1) ->
 643                         v0.bOp(v1, (i, a, b) -> (float)(a / b));
 644                 case VECTOR_OP_MAX: return (v0, v1) ->
 645                         v0.bOp(v1, (i, a, b) -> (float)Math.max(a, b));
 646                 case VECTOR_OP_MIN: return (v0, v1) ->
 647                         v0.bOp(v1, (i, a, b) -> (float)Math.min(a, b));
 648                 case VECTOR_OP_ATAN2: return (v0, v1) ->
 649                         v0.bOp(v1, (i, a, b) -> (float) Math.atan2(a, b));
 650                 case VECTOR_OP_POW: return (v0, v1) ->
 651                         v0.bOp(v1, (i, a, b) -> (float) Math.pow(a, b));
 652                 case VECTOR_OP_HYPOT: return (v0, v1) ->
 653                         v0.bOp(v1, (i, a, b) -> (float) Math.hypot(a, b));
 654                 default: return null;
 655                 }}));
 656     }
 657     private static final
 658     ImplCache<Binary,BinaryOperator<FloatVector>> BIN_IMPL
 659         = new ImplCache<>(Binary.class, FloatVector.class);
 660 
 661     /**
 662      * {@inheritDoc} <!--workaround-->
 663      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 664      */
 665     @ForceInline
 666     public final
 667     FloatVector lanewise(VectorOperators.Binary op,
 668                                   Vector<Float> v,
 669                                   VectorMask<Float> m) {
 670         return blend(lanewise(op, v), m);
 671     }
 672     // FIXME: Maybe all of the public final methods in this file (the
 673     // simple ones that just call lanewise) should be pushed down to
 674     // the X-VectorBits template.  They can't optimize properly at
 675     // this level, and must rely on inlining.  Does it work?
 676     // (If it works, of course keep the code here.)
 677 
 678     /**
 679      * Combines the lane values of this vector
 680      * with the value of a broadcast scalar.
 681      *
 682      * This is a lane-wise binary operation which applies
 683      * the selected operation to each lane.
 684      * The return value will be equal to this expression:
 685      * {@code this.lanewise(op, this.broadcast(e))}.
 686      *
 687      * @param op the operation used to process lane values
 688      * @param e the input scalar
 689      * @return the result of applying the operation lane-wise
 690      *         to the two input vectors
 691      * @throws UnsupportedOperationException if this vector does
 692      *         not support the requested operation
 693      * @see #lanewise(VectorOperators.Binary,Vector)
 694      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 695      */
 696     @ForceInline
 697     public final
 698     FloatVector lanewise(VectorOperators.Binary op,
 699                                   float e) {
 700         return lanewise(op, broadcast(e));
 701     }
 702 
 703     /**
 704      * Combines the lane values of this vector
 705      * with the value of a broadcast scalar,
 706      * with selection of lane elements controlled by a mask.
 707      *
 708      * This is a masked lane-wise binary operation which applies
 709      * the selected operation to each lane.
 710      * The return value will be equal to this expression:
 711      * {@code this.lanewise(op, this.broadcast(e), m)}.
 712      *
 713      * @param op the operation used to process lane values
 714      * @param e the input scalar
 715      * @param m the mask controlling lane selection
 716      * @return the result of applying the operation lane-wise
 717      *         to the input vector and the scalar
 718      * @throws UnsupportedOperationException if this vector does
 719      *         not support the requested operation
 720      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 721      * @see #lanewise(VectorOperators.Binary,float)
 722      */
 723     @ForceInline
 724     public final
 725     FloatVector lanewise(VectorOperators.Binary op,
 726                                   float e,
 727                                   VectorMask<Float> m) {
 728         return blend(lanewise(op, e), m);
 729     }
 730 
 731     /**
 732      * {@inheritDoc} <!--workaround-->
 733      * @apiNote
 734      * When working with vector subtypes like {@code FloatVector},
 735      * {@linkplain #lanewise(VectorOperators.Binary,float)
 736      * the more strongly typed method}
 737      * is typically selected.  It can be explicitly selected
 738      * using a cast: {@code v.lanewise(op,(float)e)}.
 739      * The two expressions will produce numerically identical results.
 740      */
 741     @ForceInline
 742     public final
 743     FloatVector lanewise(VectorOperators.Binary op,
 744                                   long e) {
 745         float e1 = (float) e;
 746         if ((long)e1 != e
 747             ) {
 748             vspecies().checkValue(e);  // for exception
 749         }
 750         return lanewise(op, e1);
 751     }
 752 
 753     /**
 754      * {@inheritDoc} <!--workaround-->
 755      * @apiNote
 756      * When working with vector subtypes like {@code FloatVector},
 757      * {@linkplain #lanewise(VectorOperators.Binary,float,VectorMask)
 758      * the more strongly typed method}
 759      * is typically selected.  It can be explicitly selected
 760      * using a cast: {@code v.lanewise(op,(float)e,m)}.
 761      * The two expressions will produce numerically identical results.
 762      */
 763     @ForceInline
 764     public final
 765     FloatVector lanewise(VectorOperators.Binary op,
 766                                   long e, VectorMask<Float> m) {
 767         return blend(lanewise(op, e), m);
 768     }
 769 
 770 
 771     // Ternary lanewise support
 772 
 773     // Ternary operators come in eight variations:
 774     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 775     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 776 
 777     // It is annoying to support all of these variations of masking
 778     // and broadcast, but it would be more surprising not to continue
 779     // the obvious pattern started by unary and binary.
 780 
 781    /**
 782      * {@inheritDoc} <!--workaround-->
 783      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 784      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 785      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
 786      * @see #lanewise(VectorOperators.Ternary,float,float)
 787      * @see #lanewise(VectorOperators.Ternary,Vector,float)
 788      * @see #lanewise(VectorOperators.Ternary,float,Vector)
 789      */
 790     @Override
 791     public abstract
 792     FloatVector lanewise(VectorOperators.Ternary op,
 793                                                   Vector<Float> v1,
 794                                                   Vector<Float> v2);
 795     @ForceInline
 796     final
 797     FloatVector lanewiseTemplate(VectorOperators.Ternary op,
 798                                           Vector<Float> v1,
 799                                           Vector<Float> v2) {
 800         FloatVector that = (FloatVector) v1;
 801         FloatVector tother = (FloatVector) v2;
 802         // It's a word: https://www.dictionary.com/browse/tother
 803         // See also Chapter 11 of Dickens, Our Mutual Friend:
 804         // "Totherest Governor," replied Mr Riderhood...
 805         that.check(this);
 806         tother.check(this);
 807         int opc = opCode(op);
 808         return VectorSupport.ternaryOp(
 809             opc, getClass(), float.class, length(),
 810             this, that, tother,
 811             TERN_IMPL.find(op, opc, (opc_) -> {
 812               switch (opc_) {
 813                 case VECTOR_OP_FMA: return (v0, v1_, v2_) ->
 814                         v0.tOp(v1_, v2_, (i, a, b, c) -> Math.fma(a, b, c));
 815                 default: return null;
 816                 }}));
 817     }
 818     private static final
 819     ImplCache<Ternary,TernaryOperation<FloatVector>> TERN_IMPL
 820         = new ImplCache<>(Ternary.class, FloatVector.class);
 821 
 822     /**
 823      * {@inheritDoc} <!--workaround-->
 824      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 825      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 826      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
 827      */
 828     @ForceInline
 829     public final
 830     FloatVector lanewise(VectorOperators.Ternary op,
 831                                   Vector<Float> v1,
 832                                   Vector<Float> v2,
 833                                   VectorMask<Float> m) {
 834         return blend(lanewise(op, v1, v2), m);
 835     }
 836 
 837     /**
 838      * Combines the lane values of this vector
 839      * with the values of two broadcast scalars.
 840      *
 841      * This is a lane-wise ternary operation which applies
 842      * the selected operation to each lane.
 843      * The return value will be equal to this expression:
 844      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
 845      *
 846      * @param op the operation used to combine lane values
 847      * @param e1 the first input scalar
 848      * @param e2 the second input scalar
 849      * @return the result of applying the operation lane-wise
 850      *         to the input vector and the scalars
 851      * @throws UnsupportedOperationException if this vector does
 852      *         not support the requested operation
 853      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 854      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 855      */
 856     @ForceInline
 857     public final
 858     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
 859                                   float e1,
 860                                   float e2) {
 861         return lanewise(op, broadcast(e1), broadcast(e2));
 862     }
 863 
 864     /**
 865      * Combines the lane values of this vector
 866      * with the values of two broadcast scalars,
 867      * with selection of lane elements controlled by a mask.
 868      *
 869      * This is a masked lane-wise ternary operation which applies
 870      * the selected operation to each lane.
 871      * The return value will be equal to this expression:
 872      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
 873      *
 874      * @param op the operation used to combine lane values
 875      * @param e1 the first input scalar
 876      * @param e2 the second input scalar
 877      * @param m the mask controlling lane selection
 878      * @return the result of applying the operation lane-wise
 879      *         to the input vector and the scalars
 880      * @throws UnsupportedOperationException if this vector does
 881      *         not support the requested operation
 882      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
 883      * @see #lanewise(VectorOperators.Ternary,float,float)
 884      */
 885     @ForceInline
 886     public final
 887     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
 888                                   float e1,
 889                                   float e2,
 890                                   VectorMask<Float> m) {
 891         return blend(lanewise(op, e1, e2), m);
 892     }
 893 
 894     /**
 895      * Combines the lane values of this vector
 896      * with the values of another vector and a broadcast scalar.
 897      *
 898      * This is a lane-wise ternary operation which applies
 899      * the selected operation to each lane.
 900      * The return value will be equal to this expression:
 901      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
 902      *
 903      * @param op the operation used to combine lane values
 904      * @param v1 the other input vector
 905      * @param e2 the input scalar
 906      * @return the result of applying the operation lane-wise
 907      *         to the input vectors and the scalar
 908      * @throws UnsupportedOperationException if this vector does
 909      *         not support the requested operation
 910      * @see #lanewise(VectorOperators.Ternary,float,float)
 911      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 912      */
 913     @ForceInline
 914     public final
 915     FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
 916                                   Vector<Float> v1,
 917                                   float e2) {
 918         return lanewise(op, v1, broadcast(e2));
 919     }
 920 
 921     /**
 922      * Combines the lane values of this vector
 923      * with the values of another vector and a broadcast scalar,
 924      * with selection of lane elements controlled by a mask.
 925      *
 926      * This is a masked lane-wise ternary operation which applies
 927      * the selected operation to each lane.
 928      * The return value will be equal to this expression:
 929      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
 930      *
 931      * @param op the operation used to combine lane values
 932      * @param v1 the other input vector
 933      * @param e2 the input scalar
 934      * @param m the mask controlling lane selection
 935      * @return the result of applying the operation lane-wise
 936      *         to the input vectors and the scalar
 937      * @throws UnsupportedOperationException if this vector does
 938      *         not support the requested operation
 939      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 940      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 941      * @see #lanewise(VectorOperators.Ternary,Vector,float)
 942      */
 943     @ForceInline
 944     public final
 945     FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
 946                                   Vector<Float> v1,
 947                                   float e2,
 948                                   VectorMask<Float> m) {
 949         return blend(lanewise(op, v1, e2), m);
 950     }
 951 
 952     /**
 953      * Combines the lane values of this vector
 954      * with the values of another vector and a broadcast scalar.
 955      *
 956      * This is a lane-wise ternary operation which applies
 957      * the selected operation to each lane.
 958      * The return value will be equal to this expression:
 959      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
 960      *
 961      * @param op the operation used to combine lane values
 962      * @param e1 the input scalar
 963      * @param v2 the other input vector
 964      * @return the result of applying the operation lane-wise
 965      *         to the input vectors and the scalar
 966      * @throws UnsupportedOperationException if this vector does
 967      *         not support the requested operation
 968      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 969      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
 970      */
 971     @ForceInline
 972     public final
 973     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
 974                                   float e1,
 975                                   Vector<Float> v2) {
 976         return lanewise(op, broadcast(e1), v2);
 977     }
 978 
 979     /**
 980      * Combines the lane values of this vector
 981      * with the values of another vector and a broadcast scalar,
 982      * with selection of lane elements controlled by a mask.
 983      *
 984      * This is a masked lane-wise ternary operation which applies
 985      * the selected operation to each lane.
 986      * The return value will be equal to this expression:
 987      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
 988      *
 989      * @param op the operation used to combine lane values
 990      * @param e1 the input scalar
 991      * @param v2 the other input vector
 992      * @param m the mask controlling lane selection
 993      * @return the result of applying the operation lane-wise
 994      *         to the input vectors and the scalar
 995      * @throws UnsupportedOperationException if this vector does
 996      *         not support the requested operation
 997      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
 998      * @see #lanewise(VectorOperators.Ternary,float,Vector)
 999      */
1000     @ForceInline
1001     public final
1002     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1003                                   float e1,
1004                                   Vector<Float> v2,
1005                                   VectorMask<Float> m) {
1006         return blend(lanewise(op, e1, v2), m);
1007     }
1008 
1009     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1010     // https://en.wikipedia.org/wiki/Ogdoad
1011 
1012     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1013     //
1014     // These include masked and non-masked versions.
1015     // This subclass adds broadcast (masked or not).
1016 
1017     /**
1018      * {@inheritDoc} <!--workaround-->
1019      * @see #add(float)
1020      */
1021     @Override
1022     @ForceInline
1023     public final FloatVector add(Vector<Float> v) {
1024         return lanewise(ADD, v);
1025     }
1026 
1027     /**
1028      * Adds this vector to the broadcast of an input scalar.
1029      *
1030      * This is a lane-wise binary operation which applies
1031      * the primitive addition operation ({@code +}) to each lane.
1032      *
1033      * This method is also equivalent to the expression
1034      * {@link #lanewise(VectorOperators.Binary,float)
1035      *    lanewise}{@code (}{@link VectorOperators#ADD
1036      *    ADD}{@code , e)}.
1037      *
1038      * @param e the input scalar
1039      * @return the result of adding each lane of this vector to the scalar
1040      * @see #add(Vector)
1041      * @see #broadcast(float)
1042      * @see #add(float,VectorMask)
1043      * @see VectorOperators#ADD
1044      * @see #lanewise(VectorOperators.Binary,Vector)
1045      * @see #lanewise(VectorOperators.Binary,float)
1046      */
1047     @ForceInline
1048     public final
1049     FloatVector add(float e) {
1050         return lanewise(ADD, e);
1051     }
1052 
1053     /**
1054      * {@inheritDoc} <!--workaround-->
1055      * @see #add(float,VectorMask)
1056      */
1057     @Override
1058     @ForceInline
1059     public final FloatVector add(Vector<Float> v,
1060                                           VectorMask<Float> m) {
1061         return lanewise(ADD, v, m);
1062     }
1063 
1064     /**
1065      * Adds this vector to the broadcast of an input scalar,
1066      * selecting lane elements controlled by a mask.
1067      *
1068      * This is a masked lane-wise binary operation which applies
1069      * the primitive addition operation ({@code +}) to each lane.
1070      *
1071      * This method is also equivalent to the expression
1072      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1073      *    lanewise}{@code (}{@link VectorOperators#ADD
1074      *    ADD}{@code , s, m)}.
1075      *
1076      * @param e the input scalar
1077      * @param m the mask controlling lane selection
1078      * @return the result of adding each lane of this vector to the scalar
1079      * @see #add(Vector,VectorMask)
1080      * @see #broadcast(float)
1081      * @see #add(float)
1082      * @see VectorOperators#ADD
1083      * @see #lanewise(VectorOperators.Binary,Vector)
1084      * @see #lanewise(VectorOperators.Binary,float)
1085      */
1086     @ForceInline
1087     public final FloatVector add(float e,
1088                                           VectorMask<Float> m) {
1089         return lanewise(ADD, e, m);
1090     }
1091 
1092     /**
1093      * {@inheritDoc} <!--workaround-->
1094      * @see #sub(float)
1095      */
1096     @Override
1097     @ForceInline
1098     public final FloatVector sub(Vector<Float> v) {
1099         return lanewise(SUB, v);
1100     }
1101 
1102     /**
1103      * Subtracts an input scalar from this vector.
1104      *
1105      * This is a masked lane-wise binary operation which applies
1106      * the primitive subtraction operation ({@code -}) to each lane.
1107      *
1108      * This method is also equivalent to the expression
1109      * {@link #lanewise(VectorOperators.Binary,float)
1110      *    lanewise}{@code (}{@link VectorOperators#SUB
1111      *    SUB}{@code , e)}.
1112      *
1113      * @param e the input scalar
1114      * @return the result of subtracting the scalar from each lane of this vector
1115      * @see #sub(Vector)
1116      * @see #broadcast(float)
1117      * @see #sub(float,VectorMask)
1118      * @see VectorOperators#SUB
1119      * @see #lanewise(VectorOperators.Binary,Vector)
1120      * @see #lanewise(VectorOperators.Binary,float)
1121      */
1122     @ForceInline
1123     public final FloatVector sub(float e) {
1124         return lanewise(SUB, e);
1125     }
1126 
1127     /**
1128      * {@inheritDoc} <!--workaround-->
1129      * @see #sub(float,VectorMask)
1130      */
1131     @Override
1132     @ForceInline
1133     public final FloatVector sub(Vector<Float> v,
1134                                           VectorMask<Float> m) {
1135         return lanewise(SUB, v, m);
1136     }
1137 
1138     /**
1139      * Subtracts an input scalar from this vector
1140      * under the control of a mask.
1141      *
1142      * This is a masked lane-wise binary operation which applies
1143      * the primitive subtraction operation ({@code -}) to each lane.
1144      *
1145      * This method is also equivalent to the expression
1146      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1147      *    lanewise}{@code (}{@link VectorOperators#SUB
1148      *    SUB}{@code , s, m)}.
1149      *
1150      * @param e the input scalar
1151      * @param m the mask controlling lane selection
1152      * @return the result of subtracting the scalar from each lane of this vector
1153      * @see #sub(Vector,VectorMask)
1154      * @see #broadcast(float)
1155      * @see #sub(float)
1156      * @see VectorOperators#SUB
1157      * @see #lanewise(VectorOperators.Binary,Vector)
1158      * @see #lanewise(VectorOperators.Binary,float)
1159      */
1160     @ForceInline
1161     public final FloatVector sub(float e,
1162                                           VectorMask<Float> m) {
1163         return lanewise(SUB, e, m);
1164     }
1165 
1166     /**
1167      * {@inheritDoc} <!--workaround-->
1168      * @see #mul(float)
1169      */
1170     @Override
1171     @ForceInline
1172     public final FloatVector mul(Vector<Float> v) {
1173         return lanewise(MUL, v);
1174     }
1175 
1176     /**
1177      * Multiplies this vector by the broadcast of an input scalar.
1178      *
1179      * This is a lane-wise binary operation which applies
1180      * the primitive multiplication operation ({@code *}) to each lane.
1181      *
1182      * This method is also equivalent to the expression
1183      * {@link #lanewise(VectorOperators.Binary,float)
1184      *    lanewise}{@code (}{@link VectorOperators#MUL
1185      *    MUL}{@code , e)}.
1186      *
1187      * @param e the input scalar
1188      * @return the result of multiplying this vector by the given scalar
1189      * @see #mul(Vector)
1190      * @see #broadcast(float)
1191      * @see #mul(float,VectorMask)
1192      * @see VectorOperators#MUL
1193      * @see #lanewise(VectorOperators.Binary,Vector)
1194      * @see #lanewise(VectorOperators.Binary,float)
1195      */
1196     @ForceInline
1197     public final FloatVector mul(float e) {
1198         return lanewise(MUL, e);
1199     }
1200 
1201     /**
1202      * {@inheritDoc} <!--workaround-->
1203      * @see #mul(float,VectorMask)
1204      */
1205     @Override
1206     @ForceInline
1207     public final FloatVector mul(Vector<Float> v,
1208                                           VectorMask<Float> m) {
1209         return lanewise(MUL, v, m);
1210     }
1211 
1212     /**
1213      * Multiplies this vector by the broadcast of an input scalar,
1214      * selecting lane elements controlled by a mask.
1215      *
1216      * This is a masked lane-wise binary operation which applies
1217      * the primitive multiplication operation ({@code *}) to each lane.
1218      *
1219      * This method is also equivalent to the expression
1220      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1221      *    lanewise}{@code (}{@link VectorOperators#MUL
1222      *    MUL}{@code , s, m)}.
1223      *
1224      * @param e the input scalar
1225      * @param m the mask controlling lane selection
1226      * @return the result of muling each lane of this vector to the scalar
1227      * @see #mul(Vector,VectorMask)
1228      * @see #broadcast(float)
1229      * @see #mul(float)
1230      * @see VectorOperators#MUL
1231      * @see #lanewise(VectorOperators.Binary,Vector)
1232      * @see #lanewise(VectorOperators.Binary,float)
1233      */
1234     @ForceInline
1235     public final FloatVector mul(float e,
1236                                           VectorMask<Float> m) {
1237         return lanewise(MUL, e, m);
1238     }
1239 
1240     /**
1241      * {@inheritDoc} <!--workaround-->
1242      * @apiNote Because the underlying scalar operator is an IEEE
1243      * floating point number, division by zero in fact will
1244      * not throw an exception, but will yield a signed
1245      * infinity or NaN.
1246      */
1247     @Override
1248     @ForceInline
1249     public final FloatVector div(Vector<Float> v) {
1250         return lanewise(DIV, v);
1251     }
1252 
1253     /**
1254      * Divides this vector by the broadcast of an input scalar.
1255      *
1256      * This is a lane-wise binary operation which applies
1257      * the primitive division operation ({@code /}) to each lane.
1258      *
1259      * This method is also equivalent to the expression
1260      * {@link #lanewise(VectorOperators.Binary,float)
1261      *    lanewise}{@code (}{@link VectorOperators#DIV
1262      *    DIV}{@code , e)}.
1263      *
1264      * @apiNote Because the underlying scalar operator is an IEEE
1265      * floating point number, division by zero in fact will
1266      * not throw an exception, but will yield a signed
1267      * infinity or NaN.
1268      *
1269      * @param e the input scalar
1270      * @return the result of dividing each lane of this vector by the scalar
1271      * @see #div(Vector)
1272      * @see #broadcast(float)
1273      * @see #div(float,VectorMask)
1274      * @see VectorOperators#DIV
1275      * @see #lanewise(VectorOperators.Binary,Vector)
1276      * @see #lanewise(VectorOperators.Binary,float)
1277      */
1278     @ForceInline
1279     public final FloatVector div(float e) {
1280         return lanewise(DIV, e);
1281     }
1282 
1283     /**
1284      * {@inheritDoc} <!--workaround-->
1285      * @see #div(float,VectorMask)
1286      * @apiNote Because the underlying scalar operator is an IEEE
1287      * floating point number, division by zero in fact will
1288      * not throw an exception, but will yield a signed
1289      * infinity or NaN.
1290      */
1291     @Override
1292     @ForceInline
1293     public final FloatVector div(Vector<Float> v,
1294                                           VectorMask<Float> m) {
1295         return lanewise(DIV, v, m);
1296     }
1297 
1298     /**
1299      * Divides this vector by the broadcast of an input scalar,
1300      * selecting lane elements controlled by a mask.
1301      *
1302      * This is a masked lane-wise binary operation which applies
1303      * the primitive division operation ({@code /}) to each lane.
1304      *
1305      * This method is also equivalent to the expression
1306      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1307      *    lanewise}{@code (}{@link VectorOperators#DIV
1308      *    DIV}{@code , s, m)}.
1309      *
1310      * @apiNote Because the underlying scalar operator is an IEEE
1311      * floating point number, division by zero in fact will
1312      * not throw an exception, but will yield a signed
1313      * infinity or NaN.
1314      *
1315      * @param e the input scalar
1316      * @param m the mask controlling lane selection
1317      * @return the result of dividing each lane of this vector by the scalar
1318      * @see #div(Vector,VectorMask)
1319      * @see #broadcast(float)
1320      * @see #div(float)
1321      * @see VectorOperators#DIV
1322      * @see #lanewise(VectorOperators.Binary,Vector)
1323      * @see #lanewise(VectorOperators.Binary,float)
1324      */
1325     @ForceInline
1326     public final FloatVector div(float e,
1327                                           VectorMask<Float> m) {
1328         return lanewise(DIV, e, m);
1329     }
1330 
1331     /// END OF FULL-SERVICE BINARY METHODS
1332 
1333     /// SECOND-TIER BINARY METHODS
1334     //
1335     // There are no masked versions.
1336 
1337     /**
1338      * {@inheritDoc} <!--workaround-->
1339      * @apiNote
1340      * For this method, floating point negative
1341      * zero {@code -0.0} is treated as a value distinct from, and less
1342      * than the default value (positive zero).
1343      */
1344     @Override
1345     @ForceInline
1346     public final FloatVector min(Vector<Float> v) {
1347         return lanewise(MIN, v);
1348     }
1349 
1350     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1351     /**
1352      * Computes the smaller of this vector and the broadcast of an input scalar.
1353      *
1354      * This is a lane-wise binary operation which applies the
1355      * operation {@code Math.min()} to each pair of
1356      * corresponding lane values.
1357      *
1358      * This method is also equivalent to the expression
1359      * {@link #lanewise(VectorOperators.Binary,float)
1360      *    lanewise}{@code (}{@link VectorOperators#MIN
1361      *    MIN}{@code , e)}.
1362      *
1363      * @param e the input scalar
1364      * @return the result of multiplying this vector by the given scalar
1365      * @see #min(Vector)
1366      * @see #broadcast(float)
1367      * @see VectorOperators#MIN
1368      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1369      * @apiNote
1370      * For this method, floating point negative
1371      * zero {@code -0.0} is treated as a value distinct from, and less
1372      * than the default value (positive zero).
1373      */
1374     @ForceInline
1375     public final FloatVector min(float e) {
1376         return lanewise(MIN, e);
1377     }
1378 
1379     /**
1380      * {@inheritDoc} <!--workaround-->
1381      * @apiNote
1382      * For this method, floating point negative
1383      * zero {@code -0.0} is treated as a value distinct from, and less
1384      * than the default value (positive zero).
1385      */
1386     @Override
1387     @ForceInline
1388     public final FloatVector max(Vector<Float> v) {
1389         return lanewise(MAX, v);
1390     }
1391 
1392     /**
1393      * Computes the larger of this vector and the broadcast of an input scalar.
1394      *
1395      * This is a lane-wise binary operation which applies the
1396      * operation {@code Math.max()} to each pair of
1397      * corresponding lane values.
1398      *
1399      * This method is also equivalent to the expression
1400      * {@link #lanewise(VectorOperators.Binary,float)
1401      *    lanewise}{@code (}{@link VectorOperators#MAX
1402      *    MAX}{@code , e)}.
1403      *
1404      * @param e the input scalar
1405      * @return the result of multiplying this vector by the given scalar
1406      * @see #max(Vector)
1407      * @see #broadcast(float)
1408      * @see VectorOperators#MAX
1409      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1410      * @apiNote
1411      * For this method, floating point negative
1412      * zero {@code -0.0} is treated as a value distinct from, and less
1413      * than the default value (positive zero).
1414      */
1415     @ForceInline
1416     public final FloatVector max(float e) {
1417         return lanewise(MAX, e);
1418     }
1419 
1420 
1421     // common FP operator: pow
1422     /**
1423      * Raises this vector to the power of a second input vector.
1424      *
1425      * This is a lane-wise binary operation which applies an operation
1426      * conforming to the specification of
1427      * {@link Math#pow Math.pow(a,b)}
1428      * to each pair of corresponding lane values.
1429      * The operation is adapted to cast the operands and the result,
1430      * specifically widening {@code float} operands to {@code double}
1431      * operands and narrowing the {@code double} result to a {@code float}
1432      * result.
1433      *
1434      * This method is also equivalent to the expression
1435      * {@link #lanewise(VectorOperators.Binary,Vector)
1436      *    lanewise}{@code (}{@link VectorOperators#POW
1437      *    POW}{@code , b)}.
1438      *
1439      * <p>
1440      * This is not a full-service named operation like
1441      * {@link #add(Vector) add}.  A masked version of
1442      * this operation is not directly available
1443      * but may be obtained via the masked version of
1444      * {@code lanewise}.
1445      *
1446      * @param b a vector exponent by which to raise this vector
1447      * @return the {@code b}-th power of this vector
1448      * @see #pow(float)
1449      * @see VectorOperators#POW
1450      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1451      */
1452     @ForceInline
1453     public final FloatVector pow(Vector<Float> b) {
1454         return lanewise(POW, b);
1455     }
1456 
1457     /**
1458      * Raises this vector to a scalar power.
1459      *
1460      * This is a lane-wise binary operation which applies an operation
1461      * conforming to the specification of
1462      * {@link Math#pow Math.pow(a,b)}
1463      * to each pair of corresponding lane values.
1464      * The operation is adapted to cast the operands and the result,
1465      * specifically widening {@code float} operands to {@code double}
1466      * operands and narrowing the {@code double} result to a {@code float}
1467      * result.
1468      *
1469      * This method is also equivalent to the expression
1470      * {@link #lanewise(VectorOperators.Binary,Vector)
1471      *    lanewise}{@code (}{@link VectorOperators#POW
1472      *    POW}{@code , b)}.
1473      *
1474      * @param b a scalar exponent by which to raise this vector
1475      * @return the {@code b}-th power of this vector
1476      * @see #pow(Vector)
1477      * @see VectorOperators#POW
1478      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1479      */
1480     @ForceInline
1481     public final FloatVector pow(float b) {
1482         return lanewise(POW, b);
1483     }
1484 
1485     /// UNARY METHODS
1486 
1487     /**
1488      * {@inheritDoc} <!--workaround-->
1489      */
1490     @Override
1491     @ForceInline
1492     public final
1493     FloatVector neg() {
1494         return lanewise(NEG);
1495     }
1496 
1497     /**
1498      * {@inheritDoc} <!--workaround-->
1499      */
1500     @Override
1501     @ForceInline
1502     public final
1503     FloatVector abs() {
1504         return lanewise(ABS);
1505     }
1506 
1507 
1508     // sqrt
1509     /**
1510      * Computes the square root of this vector.
1511      *
1512      * This is a lane-wise unary operation which applies an operation
1513      * conforming to the specification of
1514      * {@link Math#sqrt Math.sqrt(a)}
1515      * to each lane value.
1516      * The operation is adapted to cast the operand and the result,
1517      * specifically widening the {@code float} operand to a {@code double}
1518      * operand and narrowing the {@code double} result to a {@code float}
1519      * result.
1520      *
1521      * This method is also equivalent to the expression
1522      * {@link #lanewise(VectorOperators.Unary)
1523      *    lanewise}{@code (}{@link VectorOperators#SQRT
1524      *    SQRT}{@code )}.
1525      *
1526      * @return the square root of this vector
1527      * @see VectorOperators#SQRT
1528      * @see #lanewise(VectorOperators.Unary,VectorMask)
1529      */
1530     @ForceInline
1531     public final FloatVector sqrt() {
1532         return lanewise(SQRT);
1533     }
1534 
1535     /// COMPARISONS
1536 
1537     /**
1538      * {@inheritDoc} <!--workaround-->
1539      */
1540     @Override
1541     @ForceInline
1542     public final
1543     VectorMask<Float> eq(Vector<Float> v) {
1544         return compare(EQ, v);
1545     }
1546 
1547     /**
1548      * Tests if this vector is equal to an input scalar.
1549      *
1550      * This is a lane-wise binary test operation which applies
1551      * the primitive equals operation ({@code ==}) to each lane.
1552      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1553      *
1554      * @param e the input scalar
1555      * @return the result mask of testing if this vector
1556      *         is equal to {@code e}
1557      * @see #compare(VectorOperators.Comparison,float)
1558      */
1559     @ForceInline
1560     public final
1561     VectorMask<Float> eq(float e) {
1562         return compare(EQ, e);
1563     }
1564 
1565     /**
1566      * {@inheritDoc} <!--workaround-->
1567      */
1568     @Override
1569     @ForceInline
1570     public final
1571     VectorMask<Float> lt(Vector<Float> v) {
1572         return compare(LT, v);
1573     }
1574 
1575     /**
1576      * Tests if this vector is less than an input scalar.
1577      *
1578      * This is a lane-wise binary test operation which applies
1579      * the primitive less than operation ({@code <}) to each lane.
1580      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1581      *
1582      * @param e the input scalar
1583      * @return the mask result of testing if this vector
1584      *         is less than the input scalar
1585      * @see #compare(VectorOperators.Comparison,float)
1586      */
1587     @ForceInline
1588     public final
1589     VectorMask<Float> lt(float e) {
1590         return compare(LT, e);
1591     }
1592 
1593     /**
1594      * {@inheritDoc} <!--workaround-->
1595      */
1596     @Override
1597     public abstract
1598     VectorMask<Float> test(VectorOperators.Test op);
1599 
1600     /*package-private*/
1601     @ForceInline
1602     final
1603     <M extends VectorMask<Float>>
1604     M testTemplate(Class<M> maskType, Test op) {
1605         FloatSpecies vsp = vspecies();
1606         if (opKind(op, VO_SPECIAL)) {
1607             IntVector bits = this.viewAsIntegralLanes();
1608             VectorMask<Integer> m;
1609             if (op == IS_DEFAULT) {
1610                 m = bits.compare(EQ, (int) 0);
1611             } else if (op == IS_NEGATIVE) {
1612                 m = bits.compare(LT, (int) 0);
1613             }
1614             else if (op == IS_FINITE ||
1615                      op == IS_NAN ||
1616                      op == IS_INFINITE) {
1617                 // first kill the sign:
1618                 bits = bits.and(Integer.MAX_VALUE);
1619                 // next find the bit pattern for infinity:
1620                 int infbits = (int) toBits(Float.POSITIVE_INFINITY);
1621                 // now compare:
1622                 if (op == IS_FINITE) {
1623                     m = bits.compare(LT, infbits);
1624                 } else if (op == IS_NAN) {
1625                     m = bits.compare(GT, infbits);
1626                 } else {
1627                     m = bits.compare(EQ, infbits);
1628                 }
1629             }
1630             else {
1631                 throw new AssertionError(op);
1632             }
1633             return maskType.cast(m.cast(this.vspecies()));
1634         }
1635         int opc = opCode(op);
1636         throw new AssertionError(op);
1637     }
1638 
1639     /**
1640      * {@inheritDoc} <!--workaround-->
1641      */
1642     @Override
1643     @ForceInline
1644     public final
1645     VectorMask<Float> test(VectorOperators.Test op,
1646                                   VectorMask<Float> m) {
1647         return test(op).and(m);
1648     }
1649 
1650     /**
1651      * {@inheritDoc} <!--workaround-->
1652      */
1653     @Override
1654     public abstract
1655     VectorMask<Float> compare(VectorOperators.Comparison op, Vector<Float> v);
1656 
1657     /*package-private*/
1658     @ForceInline
1659     final
1660     <M extends VectorMask<Float>>
1661     M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v) {
1662         Objects.requireNonNull(v);
1663         FloatSpecies vsp = vspecies();
1664         FloatVector that = (FloatVector) v;
1665         that.check(this);
1666         int opc = opCode(op);
1667         return VectorSupport.compare(
1668             opc, getClass(), maskType, float.class, length(),
1669             this, that,
1670             (cond, v0, v1) -> {
1671                 AbstractMask<Float> m
1672                     = v0.bTest(cond, v1, (cond_, i, a, b)
1673                                -> compareWithOp(cond, a, b));
1674                 @SuppressWarnings("unchecked")
1675                 M m2 = (M) m;
1676                 return m2;
1677             });
1678     }
1679 
1680     @ForceInline
1681     private static boolean compareWithOp(int cond, float a, float b) {
1682         return switch (cond) {
1683             case BT_eq -> a == b;
1684             case BT_ne -> a != b;
1685             case BT_lt -> a < b;
1686             case BT_le -> a <= b;
1687             case BT_gt -> a > b;
1688             case BT_ge -> a >= b;
1689             default -> throw new AssertionError();
1690         };
1691     }
1692 
1693     /**
1694      * {@inheritDoc} <!--workaround-->
1695      */
1696     @Override
1697     @ForceInline
1698     public final
1699     VectorMask<Float> compare(VectorOperators.Comparison op,
1700                                   Vector<Float> v,
1701                                   VectorMask<Float> m) {
1702         return compare(op, v).and(m);
1703     }
1704 
1705     /**
1706      * Tests this vector by comparing it with an input scalar,
1707      * according to the given comparison operation.
1708      *
1709      * This is a lane-wise binary test operation which applies
1710      * the comparison operation to each lane.
1711      * <p>
1712      * The result is the same as
1713      * {@code compare(op, broadcast(species(), e))}.
1714      * That is, the scalar may be regarded as broadcast to
1715      * a vector of the same species, and then compared
1716      * against the original vector, using the selected
1717      * comparison operation.
1718      *
1719      * @param op the operation used to compare lane values
1720      * @param e the input scalar
1721      * @return the mask result of testing lane-wise if this vector
1722      *         compares to the input, according to the selected
1723      *         comparison operator
1724      * @see FloatVector#compare(VectorOperators.Comparison,Vector)
1725      * @see #eq(float)
1726      * @see #lt(float)
1727      */
1728     public abstract
1729     VectorMask<Float> compare(Comparison op, float e);
1730 
1731     /*package-private*/
1732     @ForceInline
1733     final
1734     <M extends VectorMask<Float>>
1735     M compareTemplate(Class<M> maskType, Comparison op, float e) {
1736         return compareTemplate(maskType, op, broadcast(e));
1737     }
1738 
1739     /**
1740      * Tests this vector by comparing it with an input scalar,
1741      * according to the given comparison operation,
1742      * in lanes selected by a mask.
1743      *
1744      * This is a masked lane-wise binary test operation which applies
1745      * to each pair of corresponding lane values.
1746      *
1747      * The returned result is equal to the expression
1748      * {@code compare(op,s).and(m)}.
1749      *
1750      * @param op the operation used to compare lane values
1751      * @param e the input scalar
1752      * @param m the mask controlling lane selection
1753      * @return the mask result of testing lane-wise if this vector
1754      *         compares to the input, according to the selected
1755      *         comparison operator,
1756      *         and only in the lanes selected by the mask
1757      * @see FloatVector#compare(VectorOperators.Comparison,Vector,VectorMask)
1758      */
1759     @ForceInline
1760     public final VectorMask<Float> compare(VectorOperators.Comparison op,
1761                                                float e,
1762                                                VectorMask<Float> m) {
1763         return compare(op, e).and(m);
1764     }
1765 
1766     /**
1767      * {@inheritDoc} <!--workaround-->
1768      */
1769     @Override
1770     public abstract
1771     VectorMask<Float> compare(Comparison op, long e);
1772 
1773     /*package-private*/
1774     @ForceInline
1775     final
1776     <M extends VectorMask<Float>>
1777     M compareTemplate(Class<M> maskType, Comparison op, long e) {
1778         return compareTemplate(maskType, op, broadcast(e));
1779     }
1780 
1781     /**
1782      * {@inheritDoc} <!--workaround-->
1783      */
1784     @Override
1785     @ForceInline
1786     public final
1787     VectorMask<Float> compare(Comparison op, long e, VectorMask<Float> m) {
1788         return compare(op, broadcast(e), m);
1789     }
1790 
1791 
1792 
1793     /**
1794      * {@inheritDoc} <!--workaround-->
1795      */
1796     @Override public abstract
1797     FloatVector blend(Vector<Float> v, VectorMask<Float> m);
1798 
1799     /*package-private*/
1800     @ForceInline
1801     final
1802     <M extends VectorMask<Float>>
1803     FloatVector
1804     blendTemplate(Class<M> maskType, FloatVector v, M m) {
1805         v.check(this);
1806         return VectorSupport.blend(
1807             getClass(), maskType, float.class, length(),
1808             this, v, m,
1809             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
1810     }
1811 
1812     /**
1813      * {@inheritDoc} <!--workaround-->
1814      */
1815     @Override public abstract FloatVector addIndex(int scale);
1816 
1817     /*package-private*/
1818     @ForceInline
1819     final FloatVector addIndexTemplate(int scale) {
1820         FloatSpecies vsp = vspecies();
1821         // make sure VLENGTH*scale doesn't overflow:
1822         vsp.checkScale(scale);
1823         return VectorSupport.indexVector(
1824             getClass(), float.class, length(),
1825             this, scale, vsp,
1826             (v, scale_, s)
1827             -> {
1828                 // If the platform doesn't support an INDEX
1829                 // instruction directly, load IOTA from memory
1830                 // and multiply.
1831                 FloatVector iota = s.iota();
1832                 float sc = (float) scale_;
1833                 return v.add(sc == 1 ? iota : iota.mul(sc));
1834             });
1835     }
1836 
1837     /**
1838      * Replaces selected lanes of this vector with
1839      * a scalar value
1840      * under the control of a mask.
1841      *
1842      * This is a masked lane-wise binary operation which
1843      * selects each lane value from one or the other input.
1844      *
1845      * The returned result is equal to the expression
1846      * {@code blend(broadcast(e),m)}.
1847      *
1848      * @param e the input scalar, containing the replacement lane value
1849      * @param m the mask controlling lane selection of the scalar
1850      * @return the result of blending the lane elements of this vector with
1851      *         the scalar value
1852      */
1853     @ForceInline
1854     public final FloatVector blend(float e,
1855                                             VectorMask<Float> m) {
1856         return blend(broadcast(e), m);
1857     }
1858 
1859     /**
1860      * Replaces selected lanes of this vector with
1861      * a scalar value
1862      * under the control of a mask.
1863      *
1864      * This is a masked lane-wise binary operation which
1865      * selects each lane value from one or the other input.
1866      *
1867      * The returned result is equal to the expression
1868      * {@code blend(broadcast(e),m)}.
1869      *
1870      * @param e the input scalar, containing the replacement lane value
1871      * @param m the mask controlling lane selection of the scalar
1872      * @return the result of blending the lane elements of this vector with
1873      *         the scalar value
1874      */
1875     @ForceInline
1876     public final FloatVector blend(long e,
1877                                             VectorMask<Float> m) {
1878         return blend(broadcast(e), m);
1879     }
1880 
1881     /**
1882      * {@inheritDoc} <!--workaround-->
1883      */
1884     @Override
1885     public abstract
1886     FloatVector slice(int origin, Vector<Float> v1);
1887 
1888     /*package-private*/
1889     final
1890     @ForceInline
1891     FloatVector sliceTemplate(int origin, Vector<Float> v1) {
1892         FloatVector that = (FloatVector) v1;
1893         that.check(this);
1894         Objects.checkIndex(origin, length() + 1);
1895         VectorShuffle<Float> iota = iotaShuffle();
1896         VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin))));
1897         iota = iotaShuffle(origin, 1, true);
1898         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
1899     }
1900 
1901     /**
1902      * {@inheritDoc} <!--workaround-->
1903      */
1904     @Override
1905     @ForceInline
1906     public final
1907     FloatVector slice(int origin,
1908                                Vector<Float> w,
1909                                VectorMask<Float> m) {
1910         return broadcast(0).blend(slice(origin, w), m);
1911     }
1912 
1913     /**
1914      * {@inheritDoc} <!--workaround-->
1915      */
1916     @Override
1917     public abstract
1918     FloatVector slice(int origin);
1919 
1920     /*package-private*/
1921     final
1922     @ForceInline
1923     FloatVector sliceTemplate(int origin) {
1924         Objects.checkIndex(origin, length() + 1);
1925         VectorShuffle<Float> iota = iotaShuffle();
1926         VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin))));
1927         iota = iotaShuffle(origin, 1, true);
1928         return vspecies().zero().blend(this.rearrange(iota), blendMask);
1929     }
1930 
1931     /**
1932      * {@inheritDoc} <!--workaround-->
1933      */
1934     @Override
1935     public abstract
1936     FloatVector unslice(int origin, Vector<Float> w, int part);
1937 
1938     /*package-private*/
1939     final
1940     @ForceInline
1941     FloatVector
1942     unsliceTemplate(int origin, Vector<Float> w, int part) {
1943         FloatVector that = (FloatVector) w;
1944         that.check(this);
1945         Objects.checkIndex(origin, length() + 1);
1946         VectorShuffle<Float> iota = iotaShuffle();
1947         VectorMask<Float> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
1948                                                                   (broadcast((float)(origin))));
1949         iota = iotaShuffle(-origin, 1, true);
1950         return that.blend(this.rearrange(iota), blendMask);
1951     }
1952 
1953     /*package-private*/
1954     final
1955     @ForceInline
1956     <M extends VectorMask<Float>>
1957     FloatVector
1958     unsliceTemplate(Class<M> maskType, int origin, Vector<Float> w, int part, M m) {
1959         FloatVector that = (FloatVector) w;
1960         that.check(this);
1961         FloatVector slice = that.sliceTemplate(origin, that);
1962         slice = slice.blendTemplate(maskType, this, m);
1963         return slice.unsliceTemplate(origin, w, part);
1964     }
1965 
1966     /**
1967      * {@inheritDoc} <!--workaround-->
1968      */
1969     @Override
1970     public abstract
1971     FloatVector unslice(int origin, Vector<Float> w, int part, VectorMask<Float> m);
1972 
1973     /**
1974      * {@inheritDoc} <!--workaround-->
1975      */
1976     @Override
1977     public abstract
1978     FloatVector unslice(int origin);
1979 
1980     /*package-private*/
1981     final
1982     @ForceInline
1983     FloatVector
1984     unsliceTemplate(int origin) {
1985         Objects.checkIndex(origin, length() + 1);
1986         VectorShuffle<Float> iota = iotaShuffle();
1987         VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.GE,
1988                                                                   (broadcast((float)(origin))));
1989         iota = iotaShuffle(-origin, 1, true);
1990         return vspecies().zero().blend(this.rearrange(iota), blendMask);
1991     }
1992 
1993     private ArrayIndexOutOfBoundsException
1994     wrongPartForSlice(int part) {
1995         String msg = String.format("bad part number %d for slice operation",
1996                                    part);
1997         return new ArrayIndexOutOfBoundsException(msg);
1998     }
1999 
2000     /**
2001      * {@inheritDoc} <!--workaround-->
2002      */
2003     @Override
2004     public abstract
2005     FloatVector rearrange(VectorShuffle<Float> m);
2006 
2007     /*package-private*/
2008     @ForceInline
2009     final
2010     <S extends VectorShuffle<Float>>
2011     FloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2012         shuffle.checkIndexes();
2013         return VectorSupport.rearrangeOp(
2014             getClass(), shuffletype, float.class, length(),
2015             this, shuffle,
2016             (v1, s_) -> v1.uOp((i, a) -> {
2017                 int ei = s_.laneSource(i);
2018                 return v1.lane(ei);
2019             }));
2020     }
2021 
2022     /**
2023      * {@inheritDoc} <!--workaround-->
2024      */
2025     @Override
2026     public abstract
2027     FloatVector rearrange(VectorShuffle<Float> s,
2028                                    VectorMask<Float> m);
2029 
2030     /*package-private*/
2031     @ForceInline
2032     final
2033     <S extends VectorShuffle<Float>>
2034     FloatVector rearrangeTemplate(Class<S> shuffletype,
2035                                            S shuffle,
2036                                            VectorMask<Float> m) {
2037         FloatVector unmasked =
2038             VectorSupport.rearrangeOp(
2039                 getClass(), shuffletype, float.class, length(),
2040                 this, shuffle,
2041                 (v1, s_) -> v1.uOp((i, a) -> {
2042                     int ei = s_.laneSource(i);
2043                     return ei < 0 ? 0 : v1.lane(ei);
2044                 }));
2045         VectorMask<Float> valid = shuffle.laneIsValid();
2046         if (m.andNot(valid).anyTrue()) {
2047             shuffle.checkIndexes();
2048             throw new AssertionError();
2049         }
2050         return broadcast((float)0).blend(unmasked, m);
2051     }
2052 
2053     /**
2054      * {@inheritDoc} <!--workaround-->
2055      */
2056     @Override
2057     public abstract
2058     FloatVector rearrange(VectorShuffle<Float> s,
2059                                    Vector<Float> v);
2060 
2061     /*package-private*/
2062     @ForceInline
2063     final
2064     <S extends VectorShuffle<Float>>
2065     FloatVector rearrangeTemplate(Class<S> shuffletype,
2066                                            S shuffle,
2067                                            FloatVector v) {
2068         VectorMask<Float> valid = shuffle.laneIsValid();
2069         @SuppressWarnings("unchecked")
2070         S ws = (S) shuffle.wrapIndexes();
2071         FloatVector r0 =
2072             VectorSupport.rearrangeOp(
2073                 getClass(), shuffletype, float.class, length(),
2074                 this, ws,
2075                 (v0, s_) -> v0.uOp((i, a) -> {
2076                     int ei = s_.laneSource(i);
2077                     return v0.lane(ei);
2078                 }));
2079         FloatVector r1 =
2080             VectorSupport.rearrangeOp(
2081                 getClass(), shuffletype, float.class, length(),
2082                 v, ws,
2083                 (v1, s_) -> v1.uOp((i, a) -> {
2084                     int ei = s_.laneSource(i);
2085                     return v1.lane(ei);
2086                 }));
2087         return r1.blend(r0, valid);
2088     }
2089 
2090     @ForceInline
2091     private final
2092     VectorShuffle<Float> toShuffle0(FloatSpecies dsp) {
2093         float[] a = toArray();
2094         int[] sa = new int[a.length];
2095         for (int i = 0; i < a.length; i++) {
2096             sa[i] = (int) a[i];
2097         }
2098         return VectorShuffle.fromArray(dsp, sa, 0);
2099     }
2100 
2101     /*package-private*/
2102     @ForceInline
2103     final
2104     VectorShuffle<Float> toShuffleTemplate(Class<?> shuffleType) {
2105         FloatSpecies vsp = vspecies();
2106         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
2107                                      getClass(), float.class, length(),
2108                                      shuffleType, byte.class, length(),
2109                                      this, vsp,
2110                                      FloatVector::toShuffle0);
2111     }
2112 
2113     /**
2114      * {@inheritDoc} <!--workaround-->
2115      */
2116     @Override
2117     public abstract
2118     FloatVector selectFrom(Vector<Float> v);
2119 
2120     /*package-private*/
2121     @ForceInline
2122     final FloatVector selectFromTemplate(FloatVector v) {
2123         return v.rearrange(this.toShuffle());
2124     }
2125 
2126     /**
2127      * {@inheritDoc} <!--workaround-->
2128      */
2129     @Override
2130     public abstract
2131     FloatVector selectFrom(Vector<Float> s, VectorMask<Float> m);
2132 
2133     /*package-private*/
2134     @ForceInline
2135     final FloatVector selectFromTemplate(FloatVector v,
2136                                                   AbstractMask<Float> m) {
2137         return v.rearrange(this.toShuffle(), m);
2138     }
2139 
2140     /// Ternary operations
2141 
2142 
2143     /**
2144      * Multiplies this vector by a second input vector, and sums
2145      * the result with a third.
2146      *
2147      * Extended precision is used for the intermediate result,
2148      * avoiding possible loss of precision from rounding once
2149      * for each of the two operations.
2150      * The result is numerically close to {@code this.mul(b).add(c)},
2151      * and is typically closer to the true mathematical result.
2152      *
2153      * This is a lane-wise ternary operation which applies an operation
2154      * conforming to the specification of
2155      * {@link Math#fma(float,float,float) Math.fma(a,b,c)}
2156      * to each lane.
2157      * The operation is adapted to cast the operands and the result,
2158      * specifically widening {@code float} operands to {@code double}
2159      * operands and narrowing the {@code double} result to a {@code float}
2160      * result.
2161      *
2162      * This method is also equivalent to the expression
2163      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2164      *    lanewise}{@code (}{@link VectorOperators#FMA
2165      *    FMA}{@code , b, c)}.
2166      *
2167      * @param b the second input vector, supplying multiplier values
2168      * @param c the third input vector, supplying addend values
2169      * @return the product of this vector and the second input vector
2170      *         summed with the third input vector, using extended precision
2171      *         for the intermediate result
2172      * @see #fma(float,float)
2173      * @see VectorOperators#FMA
2174      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2175      */
2176     @ForceInline
2177     public final
2178     FloatVector fma(Vector<Float> b, Vector<Float> c) {
2179         return lanewise(FMA, b, c);
2180     }
2181 
2182     /**
2183      * Multiplies this vector by a scalar multiplier, and sums
2184      * the result with a scalar addend.
2185      *
2186      * Extended precision is used for the intermediate result,
2187      * avoiding possible loss of precision from rounding once
2188      * for each of the two operations.
2189      * The result is numerically close to {@code this.mul(b).add(c)},
2190      * and is typically closer to the true mathematical result.
2191      *
2192      * This is a lane-wise ternary operation which applies an operation
2193      * conforming to the specification of
2194      * {@link Math#fma(float,float,float) Math.fma(a,b,c)}
2195      * to each lane.
2196      * The operation is adapted to cast the operands and the result,
2197      * specifically widening {@code float} operands to {@code double}
2198      * operands and narrowing the {@code double} result to a {@code float}
2199      * result.
2200      *
2201      * This method is also equivalent to the expression
2202      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2203      *    lanewise}{@code (}{@link VectorOperators#FMA
2204      *    FMA}{@code , b, c)}.
2205      *
2206      * @param b the scalar multiplier
2207      * @param c the scalar addend
2208      * @return the product of this vector and the scalar multiplier
2209      *         summed with scalar addend, using extended precision
2210      *         for the intermediate result
2211      * @see #fma(Vector,Vector)
2212      * @see VectorOperators#FMA
2213      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
2214      */
2215     @ForceInline
2216     public final
2217     FloatVector fma(float b, float c) {
2218         return lanewise(FMA, b, c);
2219     }
2220 
2221     // Don't bother with (Vector,float) and (float,Vector) overloadings.
2222 
2223     // Type specific horizontal reductions
2224 
2225     /**
2226      * Returns a value accumulated from all the lanes of this vector.
2227      *
2228      * This is an associative cross-lane reduction operation which
2229      * applies the specified operation to all the lane elements.
2230      * <p>
2231      * A few reduction operations do not support arbitrary reordering
2232      * of their operands, yet are included here because of their
2233      * usefulness.
2234      * <ul>
2235      * <li>
2236      * In the case of {@code FIRST_NONZERO}, the reduction returns
2237      * the value from the lowest-numbered non-zero lane.
2238      * (As with {@code MAX} and {@code MIN}, floating point negative
2239      * zero {@code -0.0} is treated as a value distinct from
2240      * the default value, positive zero. So a first-nonzero lane reduction
2241      * might return {@code -0.0} even in the presence of non-zero
2242      * lane values.)
2243      * <li>
2244      * In the case of {@code ADD} and {@code MUL}, the
2245      * precise result will reflect the choice of an arbitrary order
2246      * of operations, which may even vary over time.
2247      * For further details see the section
2248      * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
2249      * <li>
2250      * All other reduction operations are fully commutative and
2251      * associative.  The implementation can choose any order of
2252      * processing, yet it will always produce the same result.
2253      * </ul>
2254      *
2255      * @param op the operation used to combine lane values
2256      * @return the accumulated result
2257      * @throws UnsupportedOperationException if this vector does
2258      *         not support the requested operation
2259      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2260      * @see #add(Vector)
2261      * @see #mul(Vector)
2262      * @see #min(Vector)
2263      * @see #max(Vector)
2264      * @see VectorOperators#FIRST_NONZERO
2265      */
2266     public abstract float reduceLanes(VectorOperators.Associative op);
2267 
2268     /**
2269      * Returns a value accumulated from selected lanes of this vector,
2270      * controlled by a mask.
2271      *
2272      * This is an associative cross-lane reduction operation which
2273      * applies the specified operation to the selected lane elements.
2274      * <p>
2275      * If no elements are selected, an operation-specific identity
2276      * value is returned.
2277      * <ul>
2278      * <li>
2279      * If the operation is
2280      *  {@code ADD}
2281      * or {@code FIRST_NONZERO},
2282      * then the identity value is positive zero, the default {@code float} value.
2283      * <li>
2284      * If the operation is {@code MUL},
2285      * then the identity value is one.
2286      * <li>
2287      * If the operation is {@code MAX},
2288      * then the identity value is {@code Float.NEGATIVE_INFINITY}.
2289      * <li>
2290      * If the operation is {@code MIN},
2291      * then the identity value is {@code Float.POSITIVE_INFINITY}.
2292      * </ul>
2293      * <p>
2294      * A few reduction operations do not support arbitrary reordering
2295      * of their operands, yet are included here because of their
2296      * usefulness.
2297      * <ul>
2298      * <li>
2299      * In the case of {@code FIRST_NONZERO}, the reduction returns
2300      * the value from the lowest-numbered non-zero lane.
2301      * (As with {@code MAX} and {@code MIN}, floating point negative
2302      * zero {@code -0.0} is treated as a value distinct from
2303      * the default value, positive zero. So a first-nonzero lane reduction
2304      * might return {@code -0.0} even in the presence of non-zero
2305      * lane values.)
2306      * <li>
2307      * In the case of {@code ADD} and {@code MUL}, the
2308      * precise result will reflect the choice of an arbitrary order
2309      * of operations, which may even vary over time.
2310      * For further details see the section
2311      * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
2312      * <li>
2313      * All other reduction operations are fully commutative and
2314      * associative.  The implementation can choose any order of
2315      * processing, yet it will always produce the same result.
2316      * </ul>
2317      *
2318      * @param op the operation used to combine lane values
2319      * @param m the mask controlling lane selection
2320      * @return the reduced result accumulated from the selected lane values
2321      * @throws UnsupportedOperationException if this vector does
2322      *         not support the requested operation
2323      * @see #reduceLanes(VectorOperators.Associative)
2324      */
2325     public abstract float reduceLanes(VectorOperators.Associative op,
2326                                        VectorMask<Float> m);
2327 
2328     /*package-private*/
2329     @ForceInline
2330     final
2331     float reduceLanesTemplate(VectorOperators.Associative op,
2332                                VectorMask<Float> m) {
2333         FloatVector v = reduceIdentityVector(op).blend(this, m);
2334         return v.reduceLanesTemplate(op);
2335     }
2336 
2337     /*package-private*/
2338     @ForceInline
2339     final
2340     float reduceLanesTemplate(VectorOperators.Associative op) {
2341         if (op == FIRST_NONZERO) {
2342             // FIXME:  The JIT should handle this, and other scan ops alos.
2343             VectorMask<Integer> thisNZ
2344                 = this.viewAsIntegralLanes().compare(NE, (int) 0);
2345             return this.lane(thisNZ.firstTrue());
2346         }
2347         int opc = opCode(op);
2348         return fromBits(VectorSupport.reductionCoerced(
2349             opc, getClass(), float.class, length(),
2350             this,
2351             REDUCE_IMPL.find(op, opc, (opc_) -> {
2352               switch (opc_) {
2353               case VECTOR_OP_ADD: return v ->
2354                       toBits(v.rOp((float)0, (i, a, b) -> (float)(a + b)));
2355               case VECTOR_OP_MUL: return v ->
2356                       toBits(v.rOp((float)1, (i, a, b) -> (float)(a * b)));
2357               case VECTOR_OP_MIN: return v ->
2358                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (float) Math.min(a, b)));
2359               case VECTOR_OP_MAX: return v ->
2360                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (float) Math.max(a, b)));
2361               default: return null;
2362               }})));
2363     }
2364     private static final
2365     ImplCache<Associative,Function<FloatVector,Long>> REDUCE_IMPL
2366         = new ImplCache<>(Associative.class, FloatVector.class);
2367 
2368     private
2369     @ForceInline
2370     FloatVector reduceIdentityVector(VectorOperators.Associative op) {
2371         int opc = opCode(op);
2372         UnaryOperator<FloatVector> fn
2373             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2374                 switch (opc_) {
2375                 case VECTOR_OP_ADD:
2376                     return v -> v.broadcast(0);
2377                 case VECTOR_OP_MUL:
2378                     return v -> v.broadcast(1);
2379                 case VECTOR_OP_MIN:
2380                     return v -> v.broadcast(MAX_OR_INF);
2381                 case VECTOR_OP_MAX:
2382                     return v -> v.broadcast(MIN_OR_INF);
2383                 default: return null;
2384                 }
2385             });
2386         return fn.apply(this);
2387     }
2388     private static final
2389     ImplCache<Associative,UnaryOperator<FloatVector>> REDUCE_ID_IMPL
2390         = new ImplCache<>(Associative.class, FloatVector.class);
2391 
2392     private static final float MIN_OR_INF = Float.NEGATIVE_INFINITY;
2393     private static final float MAX_OR_INF = Float.POSITIVE_INFINITY;
2394 
2395     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2396     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2397                                                      VectorMask<Float> m);
2398 
2399     // Type specific accessors
2400 
2401     /**
2402      * Gets the lane element at lane index {@code i}
2403      *
2404      * @param i the lane index
2405      * @return the lane element at lane index {@code i}
2406      * @throws IllegalArgumentException if the index is is out of range
2407      * ({@code < 0 || >= length()})
2408      */
2409     public abstract float lane(int i);
2410 
2411     /**
2412      * Replaces the lane element of this vector at lane index {@code i} with
2413      * value {@code e}.
2414      *
2415      * This is a cross-lane operation and behaves as if it returns the result
2416      * of blending this vector with an input vector that is the result of
2417      * broadcasting {@code e} and a mask that has only one lane set at lane
2418      * index {@code i}.
2419      *
2420      * @param i the lane index of the lane element to be replaced
2421      * @param e the value to be placed
2422      * @return the result of replacing the lane element of this vector at lane
2423      * index {@code i} with value {@code e}.
2424      * @throws IllegalArgumentException if the index is is out of range
2425      * ({@code < 0 || >= length()})
2426      */
2427     public abstract FloatVector withLane(int i, float e);
2428 
2429     // Memory load operations
2430 
2431     /**
2432      * Returns an array of type {@code float[]}
2433      * containing all the lane values.
2434      * The array length is the same as the vector length.
2435      * The array elements are stored in lane order.
2436      * <p>
2437      * This method behaves as if it stores
2438      * this vector into an allocated array
2439      * (using {@link #intoArray(float[], int) intoArray})
2440      * and returns the array as follows:
2441      * <pre>{@code
2442      *   float[] a = new float[this.length()];
2443      *   this.intoArray(a, 0);
2444      *   return a;
2445      * }</pre>
2446      *
2447      * @return an array containing the lane values of this vector
2448      */
2449     @ForceInline
2450     @Override
2451     public final float[] toArray() {
2452         float[] a = new float[vspecies().laneCount()];
2453         intoArray(a, 0);
2454         return a;
2455     }
2456 
2457     /** {@inheritDoc} <!--workaround-->
2458      */
2459     @ForceInline
2460     @Override
2461     public final int[] toIntArray() {
2462         float[] a = toArray();
2463         int[] res = new int[a.length];
2464         for (int i = 0; i < a.length; i++) {
2465             float e = a[i];
2466             res[i] = (int) FloatSpecies.toIntegralChecked(e, true);
2467         }
2468         return res;
2469     }
2470 
2471     /** {@inheritDoc} <!--workaround-->
2472      */
2473     @ForceInline
2474     @Override
2475     public final long[] toLongArray() {
2476         float[] a = toArray();
2477         long[] res = new long[a.length];
2478         for (int i = 0; i < a.length; i++) {
2479             float e = a[i];
2480             res[i] = FloatSpecies.toIntegralChecked(e, false);
2481         }
2482         return res;
2483     }
2484 
2485     /** {@inheritDoc} <!--workaround-->
2486      * @implNote
2487      * When this method is used on used on vectors
2488      * of type {@code FloatVector},
2489      * there will be no loss of precision.
2490      */
2491     @ForceInline
2492     @Override
2493     public final double[] toDoubleArray() {
2494         float[] a = toArray();
2495         double[] res = new double[a.length];
2496         for (int i = 0; i < a.length; i++) {
2497             res[i] = (double) a[i];
2498         }
2499         return res;
2500     }
2501 
2502     /**
2503      * Loads a vector from a byte array starting at an offset.
2504      * Bytes are composed into primitive lane elements according
2505      * to the specified byte order.
2506      * The vector is arranged into lanes according to
2507      * <a href="Vector.html#lane-order">memory ordering</a>.
2508      * <p>
2509      * This method behaves as if it returns the result of calling
2510      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2511      * fromByteBuffer()} as follows:
2512      * <pre>{@code
2513      * var bb = ByteBuffer.wrap(a);
2514      * var m = species.maskAll(true);
2515      * return fromByteBuffer(species, bb, offset, bo, m);
2516      * }</pre>
2517      *
2518      * @param species species of desired vector
2519      * @param a the byte array
2520      * @param offset the offset into the array
2521      * @param bo the intended byte order
2522      * @return a vector loaded from a byte array
2523      * @throws IndexOutOfBoundsException
2524      *         if {@code offset+N*ESIZE < 0}
2525      *         or {@code offset+(N+1)*ESIZE > a.length}
2526      *         for any lane {@code N} in the vector
2527      */
2528     @ForceInline
2529     public static
2530     FloatVector fromByteArray(VectorSpecies<Float> species,
2531                                        byte[] a, int offset,
2532                                        ByteOrder bo) {
2533         offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
2534         FloatSpecies vsp = (FloatSpecies) species;
2535         return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
2536     }
2537 
2538     /**
2539      * Loads a vector from a byte array starting at an offset
2540      * and using a mask.
2541      * Lanes where the mask is unset are filled with the default
2542      * value of {@code float} (positive zero).
2543      * Bytes are composed into primitive lane elements according
2544      * to the specified byte order.
2545      * The vector is arranged into lanes according to
2546      * <a href="Vector.html#lane-order">memory ordering</a>.
2547      * <p>
2548      * This method behaves as if it returns the result of calling
2549      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2550      * fromByteBuffer()} as follows:
2551      * <pre>{@code
2552      * var bb = ByteBuffer.wrap(a);
2553      * return fromByteBuffer(species, bb, offset, bo, m);
2554      * }</pre>
2555      *
2556      * @param species species of desired vector
2557      * @param a the byte array
2558      * @param offset the offset into the array
2559      * @param bo the intended byte order
2560      * @param m the mask controlling lane selection
2561      * @return a vector loaded from a byte array
2562      * @throws IndexOutOfBoundsException
2563      *         if {@code offset+N*ESIZE < 0}
2564      *         or {@code offset+(N+1)*ESIZE > a.length}
2565      *         for any lane {@code N} in the vector
2566      *         where the mask is set
2567      */
2568     @ForceInline
2569     public static
2570     FloatVector fromByteArray(VectorSpecies<Float> species,
2571                                        byte[] a, int offset,
2572                                        ByteOrder bo,
2573                                        VectorMask<Float> m) {
2574         FloatSpecies vsp = (FloatSpecies) species;
2575         if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
2576             FloatVector zero = vsp.zero();
2577             FloatVector v = zero.fromByteArray0(a, offset);
2578             return zero.blend(v.maybeSwap(bo), m);
2579         }
2580 
2581         // FIXME: optimize
2582         checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
2583         ByteBuffer wb = wrapper(a, bo);
2584         return vsp.ldOp(wb, offset, (AbstractMask<Float>)m,
2585                    (wb_, o, i)  -> wb_.getFloat(o + i * 4));
2586     }
2587 
2588     /**
2589      * Loads a vector from an array of type {@code float[]}
2590      * starting at an offset.
2591      * For each vector lane, where {@code N} is the vector lane index, the
2592      * array element at index {@code offset + N} is placed into the
2593      * resulting vector at lane index {@code N}.
2594      *
2595      * @param species species of desired vector
2596      * @param a the array
2597      * @param offset the offset into the array
2598      * @return the vector loaded from an array
2599      * @throws IndexOutOfBoundsException
2600      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2601      *         for any lane {@code N} in the vector
2602      */
2603     @ForceInline
2604     public static
2605     FloatVector fromArray(VectorSpecies<Float> species,
2606                                    float[] a, int offset) {
2607         offset = checkFromIndexSize(offset, species.length(), a.length);
2608         FloatSpecies vsp = (FloatSpecies) species;
2609         return vsp.dummyVector().fromArray0(a, offset);
2610     }
2611 
2612     /**
2613      * Loads a vector from an array of type {@code float[]}
2614      * starting at an offset and using a mask.
2615      * Lanes where the mask is unset are filled with the default
2616      * value of {@code float} (positive zero).
2617      * For each vector lane, where {@code N} is the vector lane index,
2618      * if the mask lane at index {@code N} is set then the array element at
2619      * index {@code offset + N} is placed into the resulting vector at lane index
2620      * {@code N}, otherwise the default element value is placed into the
2621      * resulting vector at lane index {@code N}.
2622      *
2623      * @param species species of desired vector
2624      * @param a the array
2625      * @param offset the offset into the array
2626      * @param m the mask controlling lane selection
2627      * @return the vector loaded from an array
2628      * @throws IndexOutOfBoundsException
2629      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2630      *         for any lane {@code N} in the vector
2631      *         where the mask is set
2632      */
2633     @ForceInline
2634     public static
2635     FloatVector fromArray(VectorSpecies<Float> species,
2636                                    float[] a, int offset,
2637                                    VectorMask<Float> m) {
2638         FloatSpecies vsp = (FloatSpecies) species;
2639         if (offset >= 0 && offset <= (a.length - species.length())) {
2640             FloatVector zero = vsp.zero();
2641             return zero.blend(zero.fromArray0(a, offset), m);
2642         }
2643 
2644         // FIXME: optimize
2645         checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2646         return vsp.vOp(m, i -> a[offset + i]);
2647     }
2648 
2649     /**
2650      * Gathers a new vector composed of elements from an array of type
2651      * {@code float[]},
2652      * using indexes obtained by adding a fixed {@code offset} to a
2653      * series of secondary offsets from an <em>index map</em>.
2654      * The index map is a contiguous sequence of {@code VLENGTH}
2655      * elements in a second array of {@code int}s, starting at a given
2656      * {@code mapOffset}.
2657      * <p>
2658      * For each vector lane, where {@code N} is the vector lane index,
2659      * the lane is loaded from the array
2660      * element {@code a[f(N)]}, where {@code f(N)} is the
2661      * index mapping expression
2662      * {@code offset + indexMap[mapOffset + N]]}.
2663      *
2664      * @param species species of desired vector
2665      * @param a the array
2666      * @param offset the offset into the array, may be negative if relative
2667      * indexes in the index map compensate to produce a value within the
2668      * array bounds
2669      * @param indexMap the index map
2670      * @param mapOffset the offset into the index map
2671      * @return the vector loaded from the indexed elements of the array
2672      * @throws IndexOutOfBoundsException
2673      *         if {@code mapOffset+N < 0}
2674      *         or if {@code mapOffset+N >= indexMap.length},
2675      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2676      *         is an invalid index into {@code a},
2677      *         for any lane {@code N} in the vector
2678      * @see FloatVector#toIntArray()
2679      */
2680     @ForceInline
2681     public static
2682     FloatVector fromArray(VectorSpecies<Float> species,
2683                                    float[] a, int offset,
2684                                    int[] indexMap, int mapOffset) {
2685         FloatSpecies vsp = (FloatSpecies) species;
2686         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
2687         Objects.requireNonNull(a);
2688         Objects.requireNonNull(indexMap);
2689         Class<? extends FloatVector> vectorType = vsp.vectorType();
2690 
2691         // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
2692         IntVector vix = IntVector
2693             .fromArray(isp, indexMap, mapOffset)
2694             .add(offset);
2695 
2696         vix = VectorIntrinsics.checkIndex(vix, a.length);
2697 
2698         return VectorSupport.loadWithMap(
2699             vectorType, float.class, vsp.laneCount(),
2700             IntVector.species(vsp.indexShape()).vectorType(),
2701             a, ARRAY_BASE, vix,
2702             a, offset, indexMap, mapOffset, vsp,
2703             (float[] c, int idx, int[] iMap, int idy, FloatSpecies s) ->
2704             s.vOp(n -> c[idx + iMap[idy+n]]));
2705         }
2706 
2707     /**
2708      * Gathers a new vector composed of elements from an array of type
2709      * {@code float[]},
2710      * under the control of a mask, and
2711      * using indexes obtained by adding a fixed {@code offset} to a
2712      * series of secondary offsets from an <em>index map</em>.
2713      * The index map is a contiguous sequence of {@code VLENGTH}
2714      * elements in a second array of {@code int}s, starting at a given
2715      * {@code mapOffset}.
2716      * <p>
2717      * For each vector lane, where {@code N} is the vector lane index,
2718      * if the lane is set in the mask,
2719      * the lane is loaded from the array
2720      * element {@code a[f(N)]}, where {@code f(N)} is the
2721      * index mapping expression
2722      * {@code offset + indexMap[mapOffset + N]]}.
2723      * Unset lanes in the resulting vector are set to zero.
2724      *
2725      * @param species species of desired vector
2726      * @param a the array
2727      * @param offset the offset into the array, may be negative if relative
2728      * indexes in the index map compensate to produce a value within the
2729      * array bounds
2730      * @param indexMap the index map
2731      * @param mapOffset the offset into the index map
2732      * @param m the mask controlling lane selection
2733      * @return the vector loaded from the indexed elements of the array
2734      * @throws IndexOutOfBoundsException
2735      *         if {@code mapOffset+N < 0}
2736      *         or if {@code mapOffset+N >= indexMap.length},
2737      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2738      *         is an invalid index into {@code a},
2739      *         for any lane {@code N} in the vector
2740      *         where the mask is set
2741      * @see FloatVector#toIntArray()
2742      */
2743     @ForceInline
2744     public static
2745     FloatVector fromArray(VectorSpecies<Float> species,
2746                                    float[] a, int offset,
2747                                    int[] indexMap, int mapOffset,
2748                                    VectorMask<Float> m) {
2749         if (m.allTrue()) {
2750             return fromArray(species, a, offset, indexMap, mapOffset);
2751         }
2752         else {
2753             // FIXME: Cannot vectorize yet, if there's a mask.
2754             FloatSpecies vsp = (FloatSpecies) species;
2755             return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
2756         }
2757     }
2758 
2759 
2760 
2761     /**
2762      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2763      * starting at an offset into the byte buffer.
2764      * Bytes are composed into primitive lane elements according
2765      * to the specified byte order.
2766      * The vector is arranged into lanes according to
2767      * <a href="Vector.html#lane-order">memory ordering</a>.
2768      * <p>
2769      * This method behaves as if it returns the result of calling
2770      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2771      * fromByteBuffer()} as follows:
2772      * <pre>{@code
2773      * var m = species.maskAll(true);
2774      * return fromByteBuffer(species, bb, offset, bo, m);
2775      * }</pre>
2776      *
2777      * @param species species of desired vector
2778      * @param bb the byte buffer
2779      * @param offset the offset into the byte buffer
2780      * @param bo the intended byte order
2781      * @return a vector loaded from a byte buffer
2782      * @throws IndexOutOfBoundsException
2783      *         if {@code offset+N*4 < 0}
2784      *         or {@code offset+N*4 >= bb.limit()}
2785      *         for any lane {@code N} in the vector
2786      */
2787     @ForceInline
2788     public static
2789     FloatVector fromByteBuffer(VectorSpecies<Float> species,
2790                                         ByteBuffer bb, int offset,
2791                                         ByteOrder bo) {
2792         offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
2793         FloatSpecies vsp = (FloatSpecies) species;
2794         return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
2795     }
2796 
2797     /**
2798      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2799      * starting at an offset into the byte buffer
2800      * and using a mask.
2801      * Lanes where the mask is unset are filled with the default
2802      * value of {@code float} (positive zero).
2803      * Bytes are composed into primitive lane elements according
2804      * to the specified byte order.
2805      * The vector is arranged into lanes according to
2806      * <a href="Vector.html#lane-order">memory ordering</a>.
2807      * <p>
2808      * The following pseudocode illustrates the behavior:
2809      * <pre>{@code
2810      * FloatBuffer eb = bb.duplicate()
2811      *     .position(offset)
2812      *     .order(bo).asFloatBuffer();
2813      * float[] ar = new float[species.length()];
2814      * for (int n = 0; n < ar.length; n++) {
2815      *     if (m.laneIsSet(n)) {
2816      *         ar[n] = eb.get(n);
2817      *     }
2818      * }
2819      * FloatVector r = FloatVector.fromArray(species, ar, 0);
2820      * }</pre>
2821      * @implNote
2822      * This operation is likely to be more efficient if
2823      * the specified byte order is the same as
2824      * {@linkplain ByteOrder#nativeOrder()
2825      * the platform native order},
2826      * since this method will not need to reorder
2827      * the bytes of lane values.
2828      *
2829      * @param species species of desired vector
2830      * @param bb the byte buffer
2831      * @param offset the offset into the byte buffer
2832      * @param bo the intended byte order
2833      * @param m the mask controlling lane selection
2834      * @return a vector loaded from a byte buffer
2835      * @throws IndexOutOfBoundsException
2836      *         if {@code offset+N*4 < 0}
2837      *         or {@code offset+N*4 >= bb.limit()}
2838      *         for any lane {@code N} in the vector
2839      *         where the mask is set
2840      */
2841     @ForceInline
2842     public static
2843     FloatVector fromByteBuffer(VectorSpecies<Float> species,
2844                                         ByteBuffer bb, int offset,
2845                                         ByteOrder bo,
2846                                         VectorMask<Float> m) {
2847         FloatSpecies vsp = (FloatSpecies) species;
2848         if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
2849             FloatVector zero = vsp.zero();
2850             FloatVector v = zero.fromByteBuffer0(bb, offset);
2851             return zero.blend(v.maybeSwap(bo), m);
2852         }
2853 
2854         // FIXME: optimize
2855         checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
2856         ByteBuffer wb = wrapper(bb, bo);
2857         return vsp.ldOp(wb, offset, (AbstractMask<Float>)m,
2858                    (wb_, o, i)  -> wb_.getFloat(o + i * 4));
2859     }
2860 
2861     // Memory store operations
2862 
2863     /**
2864      * Stores this vector into an array of type {@code float[]}
2865      * starting at an offset.
2866      * <p>
2867      * For each vector lane, where {@code N} is the vector lane index,
2868      * the lane element at index {@code N} is stored into the array
2869      * element {@code a[offset+N]}.
2870      *
2871      * @param a the array, of type {@code float[]}
2872      * @param offset the offset into the array
2873      * @throws IndexOutOfBoundsException
2874      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2875      *         for any lane {@code N} in the vector
2876      */
2877     @ForceInline
2878     public final
2879     void intoArray(float[] a, int offset) {
2880         offset = checkFromIndexSize(offset, length(), a.length);
2881         FloatSpecies vsp = vspecies();
2882         VectorSupport.store(
2883             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
2884             a, arrayAddress(a, offset),
2885             this,
2886             a, offset,
2887             (arr, off, v)
2888             -> v.stOp(arr, off,
2889                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
2890     }
2891 
2892     /**
2893      * Stores this vector into an array of type {@code float[]}
2894      * starting at offset and using a mask.
2895      * <p>
2896      * For each vector lane, where {@code N} is the vector lane index,
2897      * the lane element at index {@code N} is stored into the array
2898      * element {@code a[offset+N]}.
2899      * If the mask lane at {@code N} is unset then the corresponding
2900      * array element {@code a[offset+N]} is left unchanged.
2901      * <p>
2902      * Array range checking is done for lanes where the mask is set.
2903      * Lanes where the mask is unset are not stored and do not need
2904      * to correspond to legitimate elements of {@code a}.
2905      * That is, unset lanes may correspond to array indexes less than
2906      * zero or beyond the end of the array.
2907      *
2908      * @param a the array, of type {@code float[]}
2909      * @param offset the offset into the array
2910      * @param m the mask controlling lane storage
2911      * @throws IndexOutOfBoundsException
2912      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2913      *         for any lane {@code N} in the vector
2914      *         where the mask is set
2915      */
2916     @ForceInline
2917     public final
2918     void intoArray(float[] a, int offset,
2919                    VectorMask<Float> m) {
2920         if (m.allTrue()) {
2921             intoArray(a, offset);
2922         } else {
2923             // FIXME: optimize
2924             FloatSpecies vsp = vspecies();
2925             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
2926             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
2927         }
2928     }
2929 
2930     /**
2931      * Scatters this vector into an array of type {@code float[]}
2932      * using indexes obtained by adding a fixed {@code offset} to a
2933      * series of secondary offsets from an <em>index map</em>.
2934      * The index map is a contiguous sequence of {@code VLENGTH}
2935      * elements in a second array of {@code int}s, starting at a given
2936      * {@code mapOffset}.
2937      * <p>
2938      * For each vector lane, where {@code N} is the vector lane index,
2939      * the lane element at index {@code N} is stored into the array
2940      * element {@code a[f(N)]}, where {@code f(N)} is the
2941      * index mapping expression
2942      * {@code offset + indexMap[mapOffset + N]]}.
2943      *
2944      * @param a the array
2945      * @param offset an offset to combine with the index map offsets
2946      * @param indexMap the index map
2947      * @param mapOffset the offset into the index map
2948      * @throws IndexOutOfBoundsException
2949      *         if {@code mapOffset+N < 0}
2950      *         or if {@code mapOffset+N >= indexMap.length},
2951      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2952      *         is an invalid index into {@code a},
2953      *         for any lane {@code N} in the vector
2954      * @see FloatVector#toIntArray()
2955      */
2956     @ForceInline
2957     public final
2958     void intoArray(float[] a, int offset,
2959                    int[] indexMap, int mapOffset) {
2960         FloatSpecies vsp = vspecies();
2961         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
2962         // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
2963         IntVector vix = IntVector
2964             .fromArray(isp, indexMap, mapOffset)
2965             .add(offset);
2966 
2967         vix = VectorIntrinsics.checkIndex(vix, a.length);
2968 
2969         VectorSupport.storeWithMap(
2970             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
2971             isp.vectorType(),
2972             a, arrayAddress(a, 0), vix,
2973             this,
2974             a, offset, indexMap, mapOffset,
2975             (arr, off, v, map, mo)
2976             -> v.stOp(arr, off,
2977                       (arr_, off_, i, e) -> {
2978                           int j = map[mo + i];
2979                           arr[off + j] = e;
2980                       }));
2981     }
2982 
2983     /**
2984      * Scatters this vector into an array of type {@code float[]},
2985      * under the control of a mask, and
2986      * using indexes obtained by adding a fixed {@code offset} to a
2987      * series of secondary offsets from an <em>index map</em>.
2988      * The index map is a contiguous sequence of {@code VLENGTH}
2989      * elements in a second array of {@code int}s, starting at a given
2990      * {@code mapOffset}.
2991      * <p>
2992      * For each vector lane, where {@code N} is the vector lane index,
2993      * if the mask lane at index {@code N} is set then
2994      * the lane element at index {@code N} is stored into the array
2995      * element {@code a[f(N)]}, where {@code f(N)} is the
2996      * index mapping expression
2997      * {@code offset + indexMap[mapOffset + N]]}.
2998      *
2999      * @param a the array
3000      * @param offset an offset to combine with the index map offsets
3001      * @param indexMap the index map
3002      * @param mapOffset the offset into the index map
3003      * @param m the mask
3004      * @throws IndexOutOfBoundsException
3005      *         if {@code mapOffset+N < 0}
3006      *         or if {@code mapOffset+N >= indexMap.length},
3007      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3008      *         is an invalid index into {@code a},
3009      *         for any lane {@code N} in the vector
3010      *         where the mask is set
3011      * @see FloatVector#toIntArray()
3012      */
3013     @ForceInline
3014     public final
3015     void intoArray(float[] a, int offset,
3016                    int[] indexMap, int mapOffset,
3017                    VectorMask<Float> m) {
3018         if (m.allTrue()) {
3019             intoArray(a, offset, indexMap, mapOffset);
3020         }
3021         else {
3022             // FIXME: Cannot vectorize yet, if there's a mask.
3023             stOp(a, offset, m,
3024                  (arr, off, i, e) -> {
3025                      int j = indexMap[mapOffset + i];
3026                      arr[off + j] = e;
3027                  });
3028         }
3029     }
3030 
3031 
3032 
3033     /**
3034      * {@inheritDoc} <!--workaround-->
3035      */
3036     @Override
3037     @ForceInline
3038     public final
3039     void intoByteArray(byte[] a, int offset,
3040                        ByteOrder bo) {
3041         offset = checkFromIndexSize(offset, byteSize(), a.length);
3042         maybeSwap(bo).intoByteArray0(a, offset);
3043     }
3044 
3045     /**
3046      * {@inheritDoc} <!--workaround-->
3047      */
3048     @Override
3049     @ForceInline
3050     public final
3051     void intoByteArray(byte[] a, int offset,
3052                        ByteOrder bo,
3053                        VectorMask<Float> m) {
3054         if (m.allTrue()) {
3055             intoByteArray(a, offset, bo);
3056         } else {
3057             // FIXME: optimize
3058             FloatSpecies vsp = vspecies();
3059             checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
3060             ByteBuffer wb = wrapper(a, bo);
3061             this.stOp(wb, offset, m,
3062                     (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
3063         }
3064     }
3065 
3066     /**
3067      * {@inheritDoc} <!--workaround-->
3068      */
3069     @Override
3070     @ForceInline
3071     public final
3072     void intoByteBuffer(ByteBuffer bb, int offset,
3073                         ByteOrder bo) {
3074         if (bb.isReadOnly()) {
3075             throw new ReadOnlyBufferException();
3076         }
3077         offset = checkFromIndexSize(offset, byteSize(), bb.limit());
3078         maybeSwap(bo).intoByteBuffer0(bb, offset);
3079     }
3080 
3081     /**
3082      * {@inheritDoc} <!--workaround-->
3083      */
3084     @Override
3085     @ForceInline
3086     public final
3087     void intoByteBuffer(ByteBuffer bb, int offset,
3088                         ByteOrder bo,
3089                         VectorMask<Float> m) {
3090         if (m.allTrue()) {
3091             intoByteBuffer(bb, offset, bo);
3092         } else {
3093             // FIXME: optimize
3094             if (bb.isReadOnly()) {
3095                 throw new ReadOnlyBufferException();
3096             }
3097             FloatSpecies vsp = vspecies();
3098             checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
3099             ByteBuffer wb = wrapper(bb, bo);
3100             this.stOp(wb, offset, m,
3101                     (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
3102         }
3103     }
3104 
3105     // ================================================
3106 
3107     // Low-level memory operations.
3108     //
3109     // Note that all of these operations *must* inline into a context
3110     // where the exact species of the involved vector is a
3111     // compile-time constant.  Otherwise, the intrinsic generation
3112     // will fail and performance will suffer.
3113     //
3114     // In many cases this is achieved by re-deriving a version of the
3115     // method in each concrete subclass (per species).  The re-derived
3116     // method simply calls one of these generic methods, with exact
3117     // parameters for the controlling metadata, which is either a
3118     // typed vector or constant species instance.
3119 
3120     // Unchecked loading operations in native byte order.
3121     // Caller is responsible for applying index checks, masking, and
3122     // byte swapping.
3123 
3124     /*package-private*/
3125     abstract
3126     FloatVector fromArray0(float[] a, int offset);
3127     @ForceInline
3128     final
3129     FloatVector fromArray0Template(float[] a, int offset) {
3130         FloatSpecies vsp = vspecies();
3131         return VectorSupport.load(
3132             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3133             a, arrayAddress(a, offset),
3134             a, offset, vsp,
3135             (arr, off, s) -> s.ldOp(arr, off,
3136                                     (arr_, off_, i) -> arr_[off_ + i]));
3137     }
3138 
3139 
3140 
3141     @Override
3142     abstract
3143     FloatVector fromByteArray0(byte[] a, int offset);
3144     @ForceInline
3145     final
3146     FloatVector fromByteArray0Template(byte[] a, int offset) {
3147         FloatSpecies vsp = vspecies();
3148         return VectorSupport.load(
3149             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3150             a, byteArrayAddress(a, offset),
3151             a, offset, vsp,
3152             (arr, off, s) -> {
3153                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3154                 return s.ldOp(wb, off,
3155                         (wb_, o, i) -> wb_.getFloat(o + i * 4));
3156             });
3157     }
3158 
3159     abstract
3160     FloatVector fromByteBuffer0(ByteBuffer bb, int offset);
3161     @ForceInline
3162     final
3163     FloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3164         FloatSpecies vsp = vspecies();
3165         return ScopedMemoryAccess.loadFromByteBuffer(
3166                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3167                 bb, offset, vsp,
3168                 (buf, off, s) -> {
3169                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3170                     return s.ldOp(wb, off,
3171                             (wb_, o, i) -> wb_.getFloat(o + i * 4));
3172                 });
3173     }
3174 
3175     // Unchecked storing operations in native byte order.
3176     // Caller is responsible for applying index checks, masking, and
3177     // byte swapping.
3178 
3179     abstract
3180     void intoArray0(float[] a, int offset);
3181     @ForceInline
3182     final
3183     void intoArray0Template(float[] a, int offset) {
3184         FloatSpecies vsp = vspecies();
3185         VectorSupport.store(
3186             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3187             a, arrayAddress(a, offset),
3188             this, a, offset,
3189             (arr, off, v)
3190             -> v.stOp(arr, off,
3191                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3192     }
3193 
3194     abstract
3195     void intoByteArray0(byte[] a, int offset);
3196     @ForceInline
3197     final
3198     void intoByteArray0Template(byte[] a, int offset) {
3199         FloatSpecies vsp = vspecies();
3200         VectorSupport.store(
3201             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3202             a, byteArrayAddress(a, offset),
3203             this, a, offset,
3204             (arr, off, v) -> {
3205                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
3206                 v.stOp(wb, off,
3207                         (tb_, o, i, e) -> tb_.putFloat(o + i * 4, e));
3208             });
3209     }
3210 
3211     @ForceInline
3212     final
3213     void intoByteBuffer0(ByteBuffer bb, int offset) {
3214         FloatSpecies vsp = vspecies();
3215         ScopedMemoryAccess.storeIntoByteBuffer(
3216                 vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3217                 this, bb, offset,
3218                 (buf, off, v) -> {
3219                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
3220                     v.stOp(wb, off,
3221                             (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
3222                 });
3223     }
3224 
3225     // End of low-level memory operations.
3226 
3227     private static
3228     void checkMaskFromIndexSize(int offset,
3229                                 FloatSpecies vsp,
3230                                 VectorMask<Float> m,
3231                                 int scale,
3232                                 int limit) {
3233         ((AbstractMask<Float>)m)
3234             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3235     }
3236 
3237     @ForceInline
3238     private void conditionalStoreNYI(int offset,
3239                                      FloatSpecies vsp,
3240                                      VectorMask<Float> m,
3241                                      int scale,
3242                                      int limit) {
3243         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3244             String msg =
3245                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3246                               offset, limit, m, vsp);
3247             throw new AssertionError(msg);
3248         }
3249     }
3250 
3251     /*package-private*/
3252     @Override
3253     @ForceInline
3254     final
3255     FloatVector maybeSwap(ByteOrder bo) {
3256         if (bo != NATIVE_ENDIAN) {
3257             return this.reinterpretAsBytes()
3258                 .rearrange(swapBytesShuffle())
3259                 .reinterpretAsFloats();
3260         }
3261         return this;
3262     }
3263 
3264     static final int ARRAY_SHIFT =
3265         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_FLOAT_INDEX_SCALE);
3266     static final long ARRAY_BASE =
3267         Unsafe.ARRAY_FLOAT_BASE_OFFSET;
3268 
3269     @ForceInline
3270     static long arrayAddress(float[] a, int index) {
3271         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3272     }
3273 
3274 
3275 
3276     @ForceInline
3277     static long byteArrayAddress(byte[] a, int index) {
3278         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3279     }
3280 
3281     // ================================================
3282 
3283     /// Reinterpreting view methods:
3284     //   lanewise reinterpret: viewAsXVector()
3285     //   keep shape, redraw lanes: reinterpretAsEs()
3286 
3287     /**
3288      * {@inheritDoc} <!--workaround-->
3289      */
3290     @ForceInline
3291     @Override
3292     public final ByteVector reinterpretAsBytes() {
3293          // Going to ByteVector, pay close attention to byte order.
3294          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
3295          return asByteVectorRaw();
3296          //return asByteVectorRaw().rearrange(swapBytesShuffle());
3297     }
3298 
3299     /**
3300      * {@inheritDoc} <!--workaround-->
3301      */
3302     @ForceInline
3303     @Override
3304     public final IntVector viewAsIntegralLanes() {
3305         LaneType ilt = LaneType.FLOAT.asIntegral();
3306         return (IntVector) asVectorRaw(ilt);
3307     }
3308 
3309     /**
3310      * {@inheritDoc} <!--workaround-->
3311      */
3312     @ForceInline
3313     @Override
3314     public final
3315     FloatVector
3316     viewAsFloatingLanes() {
3317         return this;
3318     }
3319 
3320     // ================================================
3321 
3322     /// Object methods: toString, equals, hashCode
3323     //
3324     // Object methods are defined as if via Arrays.toString, etc.,
3325     // is applied to the array of elements.  Two equal vectors
3326     // are required to have equal species and equal lane values.
3327 
3328     /**
3329      * Returns a string representation of this vector, of the form
3330      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
3331      * in lane order.
3332      *
3333      * The string is produced as if by a call to {@link
3334      * java.util.Arrays#toString(float[]) Arrays.toString()},
3335      * as appropriate to the {@code float} array returned by
3336      * {@link #toArray this.toArray()}.
3337      *
3338      * @return a string of the form {@code "[0,1,2...]"}
3339      * reporting the lane values of this vector
3340      */
3341     @Override
3342     @ForceInline
3343     public final
3344     String toString() {
3345         // now that toArray is strongly typed, we can define this
3346         return Arrays.toString(toArray());
3347     }
3348 
3349     /**
3350      * {@inheritDoc} <!--workaround-->
3351      */
3352     @Override
3353     @ForceInline
3354     public final
3355     boolean equals(Object obj) {
3356         if (obj instanceof Vector) {
3357             Vector<?> that = (Vector<?>) obj;
3358             if (this.species().equals(that.species())) {
3359                 return this.eq(that.check(this.species())).allTrue();
3360             }
3361         }
3362         return false;
3363     }
3364 
3365     /**
3366      * {@inheritDoc} <!--workaround-->
3367      */
3368     @Override
3369     @ForceInline
3370     public final
3371     int hashCode() {
3372         // now that toArray is strongly typed, we can define this
3373         return Objects.hash(species(), Arrays.hashCode(toArray()));
3374     }
3375 
3376     // ================================================
3377 
3378     // Species
3379 
3380     /**
3381      * Class representing {@link FloatVector}'s of the same {@link VectorShape VectorShape}.
3382      */
3383     /*package-private*/
3384     static final class FloatSpecies extends AbstractSpecies<Float> {
3385         private FloatSpecies(VectorShape shape,
3386                 Class<? extends FloatVector> vectorType,
3387                 Class<? extends AbstractMask<Float>> maskType,
3388                 Function<Object, FloatVector> vectorFactory) {
3389             super(shape, LaneType.of(float.class),
3390                   vectorType, maskType,
3391                   vectorFactory);
3392             assert(this.elementSize() == Float.SIZE);
3393         }
3394 
3395         // Specializing overrides:
3396 
3397         @Override
3398         @ForceInline
3399         public final Class<Float> elementType() {
3400             return float.class;
3401         }
3402 
3403         @Override
3404         @ForceInline
3405         final Class<Float> genericElementType() {
3406             return Float.class;
3407         }
3408 
3409         @SuppressWarnings("unchecked")
3410         @Override
3411         @ForceInline
3412         public final Class<? extends FloatVector> vectorType() {
3413             return (Class<? extends FloatVector>) vectorType;
3414         }
3415 
3416         @Override
3417         @ForceInline
3418         public final long checkValue(long e) {
3419             longToElementBits(e);  // only for exception
3420             return e;
3421         }
3422 
3423         /*package-private*/
3424         @Override
3425         @ForceInline
3426         final FloatVector broadcastBits(long bits) {
3427             return (FloatVector)
3428                 VectorSupport.broadcastCoerced(
3429                     vectorType, float.class, laneCount,
3430                     bits, this,
3431                     (bits_, s_) -> s_.rvOp(i -> bits_));
3432         }
3433 
3434         /*package-private*/
3435         @ForceInline
3436         final FloatVector broadcast(float e) {
3437             return broadcastBits(toBits(e));
3438         }
3439 
3440         @Override
3441         @ForceInline
3442         public final FloatVector broadcast(long e) {
3443             return broadcastBits(longToElementBits(e));
3444         }
3445 
3446         /*package-private*/
3447         final @Override
3448         @ForceInline
3449         long longToElementBits(long value) {
3450             // Do the conversion, and then test it for failure.
3451             float e = (float) value;
3452             if ((long) e != value) {
3453                 throw badElementBits(value, e);
3454             }
3455             return toBits(e);
3456         }
3457 
3458         /*package-private*/
3459         @ForceInline
3460         static long toIntegralChecked(float e, boolean convertToInt) {
3461             long value = convertToInt ? (int) e : (long) e;
3462             if ((float) value != e) {
3463                 throw badArrayBits(e, convertToInt, value);
3464             }
3465             return value;
3466         }
3467 
3468         /* this non-public one is for internal conversions */
3469         @Override
3470         @ForceInline
3471         final FloatVector fromIntValues(int[] values) {
3472             VectorIntrinsics.requireLength(values.length, laneCount);
3473             float[] va = new float[laneCount()];
3474             for (int i = 0; i < va.length; i++) {
3475                 int lv = values[i];
3476                 float v = (float) lv;
3477                 va[i] = v;
3478                 if ((int)v != lv) {
3479                     throw badElementBits(lv, v);
3480                 }
3481             }
3482             return dummyVector().fromArray0(va, 0);
3483         }
3484 
3485         // Virtual constructors
3486 
3487         @ForceInline
3488         @Override final
3489         public FloatVector fromArray(Object a, int offset) {
3490             // User entry point:  Be careful with inputs.
3491             return FloatVector
3492                 .fromArray(this, (float[]) a, offset);
3493         }
3494 
3495         @ForceInline
3496         @Override final
3497         FloatVector dummyVector() {
3498             return (FloatVector) super.dummyVector();
3499         }
3500 
3501         /*package-private*/
3502         final @Override
3503         @ForceInline
3504         FloatVector rvOp(RVOp f) {
3505             float[] res = new float[laneCount()];
3506             for (int i = 0; i < res.length; i++) {
3507                 int bits = (int) f.apply(i);
3508                 res[i] = fromBits(bits);
3509             }
3510             return dummyVector().vectorFactory(res);
3511         }
3512 
3513         FloatVector vOp(FVOp f) {
3514             float[] res = new float[laneCount()];
3515             for (int i = 0; i < res.length; i++) {
3516                 res[i] = f.apply(i);
3517             }
3518             return dummyVector().vectorFactory(res);
3519         }
3520 
3521         FloatVector vOp(VectorMask<Float> m, FVOp f) {
3522             float[] res = new float[laneCount()];
3523             boolean[] mbits = ((AbstractMask<Float>)m).getBits();
3524             for (int i = 0; i < res.length; i++) {
3525                 if (mbits[i]) {
3526                     res[i] = f.apply(i);
3527                 }
3528             }
3529             return dummyVector().vectorFactory(res);
3530         }
3531 
3532         /*package-private*/
3533         @ForceInline
3534         <M> FloatVector ldOp(M memory, int offset,
3535                                       FLdOp<M> f) {
3536             return dummyVector().ldOp(memory, offset, f);
3537         }
3538 
3539         /*package-private*/
3540         @ForceInline
3541         <M> FloatVector ldOp(M memory, int offset,
3542                                       AbstractMask<Float> m,
3543                                       FLdOp<M> f) {
3544             return dummyVector().ldOp(memory, offset, m, f);
3545         }
3546 
3547         /*package-private*/
3548         @ForceInline
3549         <M> void stOp(M memory, int offset, FStOp<M> f) {
3550             dummyVector().stOp(memory, offset, f);
3551         }
3552 
3553         /*package-private*/
3554         @ForceInline
3555         <M> void stOp(M memory, int offset,
3556                       AbstractMask<Float> m,
3557                       FStOp<M> f) {
3558             dummyVector().stOp(memory, offset, m, f);
3559         }
3560 
3561         // N.B. Make sure these constant vectors and
3562         // masks load up correctly into registers.
3563         //
3564         // Also, see if we can avoid all that switching.
3565         // Could we cache both vectors and both masks in
3566         // this species object?
3567 
3568         // Zero and iota vector access
3569         @Override
3570         @ForceInline
3571         public final FloatVector zero() {
3572             if ((Class<?>) vectorType() == FloatMaxVector.class)
3573                 return FloatMaxVector.ZERO;
3574             switch (vectorBitSize()) {
3575                 case 64: return Float64Vector.ZERO;
3576                 case 128: return Float128Vector.ZERO;
3577                 case 256: return Float256Vector.ZERO;
3578                 case 512: return Float512Vector.ZERO;
3579             }
3580             throw new AssertionError();
3581         }
3582 
3583         @Override
3584         @ForceInline
3585         public final FloatVector iota() {
3586             if ((Class<?>) vectorType() == FloatMaxVector.class)
3587                 return FloatMaxVector.IOTA;
3588             switch (vectorBitSize()) {
3589                 case 64: return Float64Vector.IOTA;
3590                 case 128: return Float128Vector.IOTA;
3591                 case 256: return Float256Vector.IOTA;
3592                 case 512: return Float512Vector.IOTA;
3593             }
3594             throw new AssertionError();
3595         }
3596 
3597         // Mask access
3598         @Override
3599         @ForceInline
3600         public final VectorMask<Float> maskAll(boolean bit) {
3601             if ((Class<?>) vectorType() == FloatMaxVector.class)
3602                 return FloatMaxVector.FloatMaxMask.maskAll(bit);
3603             switch (vectorBitSize()) {
3604                 case 64: return Float64Vector.Float64Mask.maskAll(bit);
3605                 case 128: return Float128Vector.Float128Mask.maskAll(bit);
3606                 case 256: return Float256Vector.Float256Mask.maskAll(bit);
3607                 case 512: return Float512Vector.Float512Mask.maskAll(bit);
3608             }
3609             throw new AssertionError();
3610         }
3611     }
3612 
3613     /**
3614      * Finds a species for an element type of {@code float} and shape.
3615      *
3616      * @param s the shape
3617      * @return a species for an element type of {@code float} and shape
3618      * @throws IllegalArgumentException if no such species exists for the shape
3619      */
3620     static FloatSpecies species(VectorShape s) {
3621         Objects.requireNonNull(s);
3622         switch (s) {
3623             case S_64_BIT: return (FloatSpecies) SPECIES_64;
3624             case S_128_BIT: return (FloatSpecies) SPECIES_128;
3625             case S_256_BIT: return (FloatSpecies) SPECIES_256;
3626             case S_512_BIT: return (FloatSpecies) SPECIES_512;
3627             case S_Max_BIT: return (FloatSpecies) SPECIES_MAX;
3628             default: throw new IllegalArgumentException("Bad shape: " + s);
3629         }
3630     }
3631 
3632     /** Species representing {@link FloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
3633     public static final VectorSpecies<Float> SPECIES_64
3634         = new FloatSpecies(VectorShape.S_64_BIT,
3635                             Float64Vector.class,
3636                             Float64Vector.Float64Mask.class,
3637                             Float64Vector::new);
3638 
3639     /** Species representing {@link FloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
3640     public static final VectorSpecies<Float> SPECIES_128
3641         = new FloatSpecies(VectorShape.S_128_BIT,
3642                             Float128Vector.class,
3643                             Float128Vector.Float128Mask.class,
3644                             Float128Vector::new);
3645 
3646     /** Species representing {@link FloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
3647     public static final VectorSpecies<Float> SPECIES_256
3648         = new FloatSpecies(VectorShape.S_256_BIT,
3649                             Float256Vector.class,
3650                             Float256Vector.Float256Mask.class,
3651                             Float256Vector::new);
3652 
3653     /** Species representing {@link FloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
3654     public static final VectorSpecies<Float> SPECIES_512
3655         = new FloatSpecies(VectorShape.S_512_BIT,
3656                             Float512Vector.class,
3657                             Float512Vector.Float512Mask.class,
3658                             Float512Vector::new);
3659 
3660     /** Species representing {@link FloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
3661     public static final VectorSpecies<Float> SPECIES_MAX
3662         = new FloatSpecies(VectorShape.S_Max_BIT,
3663                             FloatMaxVector.class,
3664                             FloatMaxVector.FloatMaxMask.class,
3665                             FloatMaxVector::new);
3666 
3667     /**
3668      * Preferred species for {@link FloatVector}s.
3669      * A preferred species is a species of maximal bit-size for the platform.
3670      */
3671     public static final VectorSpecies<Float> SPECIES_PREFERRED
3672         = (FloatSpecies) VectorSpecies.ofPreferred(float.class);
3673 }