1 /*
   2  * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.util.Arrays;
  30 
  31 /**
  32  * A
  33  *
  34  * <!-- The following paragraphs are shared verbatim
  35  *   -- between Vector.java and package-info.java -->
  36  * sequence of a fixed number of <em>lanes</em>,
  37  * all of some fixed
  38  * {@linkplain Vector#elementType() <em>element type</em>}
  39  * such as {@code byte}, {@code long}, or {@code float}.
  40  * Each lane contains an independent value of the element type.
  41  * Operations on vectors are typically
  42  * <a href="Vector.html#lane-wise"><em>lane-wise</em></a>,
  43  * distributing some scalar operator (such as
  44  * {@linkplain Vector#add(Vector) addition})
  45  * across the lanes of the participating vectors,
  46  * usually generating a vector result whose lanes contain the various
  47  * scalar results.  When run on a supporting platform, lane-wise
  48  * operations can be executed in parallel by the hardware.  This style
  49  * of parallelism is called <em>Single Instruction Multiple Data</em>
  50  * (SIMD) parallelism.
  51  *
  52  * <p> In the SIMD style of programming, most of the operations within
  53  * a vector lane are unconditional, but the effect of conditional
  54  * execution may be achieved using
  55  * <a href="Vector.html#masking"><em>masked operations</em></a>
  56  * such as {@link Vector#blend(Vector,VectorMask) blend()},
  57  * under the control of an associated {@link VectorMask}.
  58  * Data motion other than strictly lane-wise flow is achieved using
  59  * <a href="Vector.html#cross-lane"><em>cross-lane</em></a>
  60  * operations, often under the control of an associated
  61  * {@link VectorShuffle}.
  62  * Lane data and/or whole vectors can be reformatted using various
  63  * kinds of lane-wise
  64  * {@linkplain Vector#convert(VectorOperators.Conversion,int) conversions},
  65  * and byte-wise reformatting
  66  * {@linkplain Vector#reinterpretShape(VectorSpecies,int) reinterpretations},
  67  * often under the control of a reflective {@link VectorSpecies}
  68  * object which selects an alternative vector format different
  69  * from that of the input vector.
  70  *
  71  * <p> {@code Vector<E>} declares a set of vector operations (methods)
  72  * that are common to all element types.  These common operations
  73  * include generic access to lane values, data selection and movement,
  74  * reformatting, and certain arithmetic and logical operations (such as addition
  75  * or comparison) that are common to all primitive types.
  76  *
  77  * <p> <a href="Vector.html#subtypes">Public subtypes of {@code Vector}</a>
  78  * correspond to specific
  79  * element types.  These declare further operations that are specific
  80  * to that element type, including unboxed access to lane values,
  81  * bitwise operations on values of integral element types, or
  82  * transcendental operations on values of floating point element
  83  * types.
  84  *
  85  * <p> Some lane-wise operations, such as the {@code add} operator, are defined as
  86  * a full-service named operation, where a corresponding method on {@code Vector}
  87  * comes in masked and unmasked overloadings, and (in subclasses) also comes in
  88  * covariant overrides (returning the subclass) and additional scalar-broadcast
  89  * overloadings (both masked and unmasked).
  90  *
  91  * Other lane-wise operations, such as the {@code min} operator, are defined as a
  92  * partially serviced (not a full-service) named operation, where a corresponding
  93  * method on {@code Vector} and/or a subclass provide some but all possible
  94  * overloadings and overrides (commonly the unmasked varient with scalar-broadcast
  95  * overloadings).
  96  *
  97  * Finally, all lane-wise operations (those named as previously described,
  98  * or otherwise unnamed method-wise) have a corresponding
  99  * {@link VectorOperators.Operator operator token}
 100  * declared as a static constant on {@link VectorOperators}.
 101  * Each operator token defines a symbolic Java expression for the operation,
 102  * such as {@code a + b} for the
 103  * {@link VectorOperators#ADD ADD} operator token.
 104  * General lane-wise operation-token accepting methods, such as for a
 105  * {@linkplain Vector#lanewise(VectorOperators.Unary) unary lane-wise}
 106  * operation, are provided on {@code Vector} and come in the same variants as
 107  * a full-service named operation.
 108  *
 109  * <p>This package contains a public subtype of {@link Vector}
 110  * corresponding to each supported element type:
 111  * {@link ByteVector}, {@link ShortVector},
 112  * {@link IntVector}, {@link LongVector},
 113  * {@link FloatVector}, and {@link DoubleVector}.
 114  *
 115  * <!-- The preceding paragraphs are shared verbatim
 116  *   -- between Vector.java and package-info.java -->
 117  *
 118  * <p><a id="ETYPE"></a> The {@linkplain #elementType element type} of a vector,
 119  * referred to as {@code ETYPE}, is one of the primitive types
 120  * {@code byte}, {@code short}, {@code int}, {@code long}, {@code
 121  * float}, or {@code double}.
 122  *
 123  * <p> The type {@code E} in {@code Vector<E>} is the <em>boxed</em> version
 124  * of {@code ETYPE}. For example, in the type {@code Vector<Integer>}, the {@code E}
 125  * parameter is {@code Integer} and the {@code ETYPE} is {@code int}.  In such a
 126  * vector, each lane carries a primitive {@code int} value.  This pattern continues
 127  * for the other primitive types as well. (See also sections {@jls 5.1.7} and
 128  * {@jls 5.1.8} of the <cite>The Java Language Specification</cite>.)
 129  *
 130  * <p><a id="VLENGTH"></a> The {@linkplain #length() length} of a vector
 131  * is the lane count, the number of lanes it contains.
 132  *
 133  * This number is also called {@code VLENGTH} when the context makes
 134  * clear which vector it belongs to.  Each vector has its own fixed
 135  * {@code VLENGTH} but different instances of vectors may have
 136  * different lengths.  {@code VLENGTH} is an important number, because
 137  * it estimates the SIMD performance gain of a single vector operation
 138  * as compared to scalar execution of the {@code VLENGTH} scalar
 139  * operators which underly the vector operation.
 140  *
 141  * <h2><a id="species"></a>Shapes and species</h2>
 142  *
 143  * The information capacity of a vector is determined by its
 144  * {@linkplain #shape() <em>vector shape</em>}, also called its
 145  * {@code VSHAPE}.  Each possible {@code VSHAPE} is represented by
 146  * a member of the {@link VectorShape} enumeration, and represents
 147  * an implementation format shared in common by all vectors of
 148  * that shape.  Thus, the {@linkplain #bitSize() size in bits} of
 149  * of a vector is determined by appealing to its vector shape.
 150  *
 151  * <p> Some Java platforms give special support to only one shape,
 152  * while others support several.  A typical platform is not likely
 153  * to support all the shapes described by this API.  For this reason,
 154  * most vector operations work on a single input shape and
 155  * produce the same shape on output.  Operations which change
 156  * shape are clearly documented as such <em>shape-changing</em>,
 157  * while the majority of operations are <em>shape-invariant</em>,
 158  * to avoid disadvantaging platforms which support only one shape.
 159  * There are queries to discover, for the current Java platform,
 160  * the {@linkplain VectorShape#preferredShape() preferred shape}
 161  * for general SIMD computation, or the
 162  * {@linkplain VectorShape#largestShapeFor(Class) largest
 163  * available shape} for any given lane type.  To be portable,
 164  * code using this API should start by querying a supported
 165  * shape, and then process all data with shape-invariant
 166  * operations, within the selected shape.
 167  *
 168  * <p> Each unique combination of element type and vector shape
 169  * determines a unique
 170  * {@linkplain #species() <em>vector species</em>}.
 171  * A vector species is represented by a fixed instance of
 172  * {@link VectorSpecies VectorSpecies&lt;E&gt;}
 173  * shared in common by all vectors of the same shape and
 174  * {@code ETYPE}.
 175  *
 176  * <p> Unless otherwise documented, lane-wise vector operations
 177  * require that all vector inputs have exactly the same {@code VSHAPE}
 178  * and {@code VLENGTH}, which is to say that they must have exactly
 179  * the same species.  This allows corresponding lanes to be paired
 180  * unambiguously.  The {@link #check(VectorSpecies) check()} method
 181  * provides an easy way to perform this check explicitly.
 182  *
 183  * <p> Vector shape, {@code VLENGTH}, and {@code ETYPE} are all
 184  * mutually constrained, so that {@code VLENGTH} times the
 185  * {@linkplain #elementSize() bit-size of each lane}
 186  * must always match the bit-size of the vector's shape.
 187  *
 188  * Thus, {@linkplain #reinterpretShape(VectorSpecies,int) reinterpreting} a
 189  * vector may double its length if and only if it either halves the lane size,
 190  * or else changes the shape.  Likewise, reinterpreting a vector may double the
 191  * lane size if and only if it either halves the length, or else changes the
 192  * shape of the vector.
 193  *
 194  * <h2><a id="subtypes"></a>Vector subtypes</h2>
 195  *
 196  * Vector declares a set of vector operations (methods) that are common to all
 197  * element types (such as addition).  Sub-classes of Vector with a concrete
 198  * element type declare further operations that are specific to that
 199  * element type (such as access to element values in lanes, logical operations
 200  * on values of integral elements types, or transcendental operations on values
 201  * of floating point element types).
 202  * There are six abstract sub-classes of Vector corresponding to the supported set
 203  * of element types, {@link ByteVector}, {@link ShortVector},
 204  * {@link IntVector}, {@link LongVector}, {@link FloatVector}, and
 205  * {@link DoubleVector}. Along with type-specific operations these classes
 206  * support creation of vector values (instances of Vector).
 207  * They expose static constants corresponding to the supported species,
 208  * and static methods on these types generally take a species as a parameter.
 209  * For example,
 210  * {@link FloatVector#fromArray(VectorSpecies, float[], int) FloatVector.fromArray}
 211  * creates and returns a float vector of the specified species, with elements
 212  * loaded from the specified float array.
 213  * It is recommended that Species instances be held in {@code static final}
 214  * fields for optimal creation and usage of Vector values by the runtime compiler.
 215  *
 216  * <p> As an example of static constants defined by the typed vector classes,
 217  * constant {@link FloatVector#SPECIES_256 FloatVector.SPECIES_256}
 218  * is the unique species whose lanes are {@code float}s and whose
 219  * vector size is 256 bits.  Again, the constant
 220  * {@link FloatVector#SPECIES_PREFERRED} is the species which
 221  * best supports processing of {@code float} vector lanes on
 222  * the currently running Java platform.
 223  *
 224  * <p> As another example, a broadcast scalar value of
 225  * {@code (double)0.5} can be obtained by calling
 226  * {@link DoubleVector#broadcast(VectorSpecies,double)
 227  * DoubleVector.broadcast(dsp, 0.5)}, but the argument {@code dsp} is
 228  * required to select the species (and hence the shape and length) of
 229  * the resulting vector.
 230  *
 231  * <h2><a id="lane-wise"></a>Lane-wise operations</h2>
 232  *
 233  * We use the term <em>lanes</em> when defining operations on
 234  * vectors. The number of lanes in a vector is the number of scalar
 235  * elements it holds. For example, a vector of type {@code float} and
 236  * shape {@code S_256_BIT} has eight lanes, since {@code 32*8=256}.
 237  *
 238  * <p> Most operations on vectors are lane-wise, which means the operation
 239  * is composed of an underlying scalar operator, which is repeated for
 240  * each distinct lane of the input vector.  If there are additional
 241  * vector arguments of the same type, their lanes are aligned with the
 242  * lanes of the first input vector.  (They must all have a common
 243  * {@code VLENGTH}.)  For most lane-wise operations, the output resulting
 244  * from a lane-wise operation will have a {@code VLENGTH} which is equal to
 245  * the {@code VLENGTH} of the input(s) to the operation.  Thus, such lane-wise
 246  * operations are <em>length-invariant</em>, in their basic definitions.
 247  *
 248  * <p> The principle of length-invariance is combined with another
 249  * basic principle, that most length-invariant lane-wise operations are also
 250  * <em>shape-invariant</em>, meaning that the inputs and the output of
 251  * a lane-wise operation will have a common {@code VSHAPE}.  When the
 252  * principles conflict, because a logical result (with an invariant
 253  * {@code VLENGTH}), does not fit into the invariant {@code VSHAPE},
 254  * the resulting expansions and contractions are handled explicitly
 255  * with
 256  * <a href="Vector.html#expansion">special conventions</a>.
 257  *
 258  * <p> Vector operations can be grouped into various categories and
 259  * their behavior can be generally specified in terms of underlying
 260  * scalar operators.  In the examples below, {@code ETYPE} is the
 261  * element type of the operation (such as {@code int.class}) and
 262  * {@code EVector} is the corresponding concrete vector type (such as
 263  * {@code IntVector.class}).
 264  *
 265  * <ul>
 266  * <li>
 267  * A <em>lane-wise unary</em> operation, such as
 268  * {@code w = v0.}{@link Vector#neg() neg}{@code ()},
 269  * takes one input vector,
 270  * distributing a unary scalar operator across the lanes,
 271  * and produces a result vector of the same type and shape.
 272  *
 273  * For each lane of the input vector {@code a},
 274  * the underlying scalar operator is applied to the lane value.
 275  * The result is placed into the vector result in the same lane.
 276  * The following pseudocode illustrates the behavior of this operation
 277  * category:
 278  *
 279  * <pre>{@code
 280  * ETYPE scalar_unary_op(ETYPE s);
 281  * EVector a = ...;
 282  * VectorSpecies<E> species = a.species();
 283  * ETYPE[] ar = new ETYPE[a.length()];
 284  * for (int i = 0; i < ar.length; i++) {
 285  *     ar[i] = scalar_unary_op(a.lane(i));
 286  * }
 287  * EVector r = EVector.fromArray(species, ar, 0);
 288  * }</pre>
 289  *
 290  * <li>
 291  * A <em>lane-wise binary</em> operation, such as
 292  * {@code w = v0.}{@link Vector#add(Vector) add}{@code (v1)},
 293  * takes two input vectors,
 294  * distributing a binary scalar operator across the lanes,
 295  * and produces a result vector of the same type and shape.
 296  *
 297  * For each lane of the two input vectors {@code a} and {@code b},
 298  * the underlying scalar operator is applied to the lane values.
 299  * The result is placed into the vector result in the same lane.
 300  * The following pseudocode illustrates the behavior of this operation
 301  * category:
 302  *
 303  * <pre>{@code
 304  * ETYPE scalar_binary_op(ETYPE s, ETYPE t);
 305  * EVector a = ...;
 306  * VectorSpecies<E> species = a.species();
 307  * EVector b = ...;
 308  * b.check(species);  // must have same species
 309  * ETYPE[] ar = new ETYPE[a.length()];
 310  * for (int i = 0; i < ar.length; i++) {
 311  *     ar[i] = scalar_binary_op(a.lane(i), b.lane(i));
 312  * }
 313  * EVector r = EVector.fromArray(species, ar, 0);
 314  * }</pre>
 315  * </li>
 316  *
 317  * <li>
 318  * Generalizing from unary and binary operations,
 319  * a <em>lane-wise n-ary</em> operation takes {@code N} input vectors {@code v[j]},
 320  * distributing an n-ary scalar operator across the lanes,
 321  * and produces a result vector of the same type and shape.
 322  * Except for a few ternary operations, such as
 323  * {@code w = v0.}{@link FloatVector#fma(Vector,Vector) fma}{@code (v1,v2)},
 324  * this API has no support for
 325  * lane-wise n-ary operations.
 326  *
 327  * For each lane of all of the input vectors {@code v[j]},
 328  * the underlying scalar operator is applied to the lane values.
 329  * The result is placed into the vector result in the same lane.
 330  * The following pseudocode illustrates the behavior of this operation
 331  * category:
 332  *
 333  * <pre>{@code
 334  * ETYPE scalar_nary_op(ETYPE... args);
 335  * EVector[] v = ...;
 336  * int N = v.length;
 337  * VectorSpecies<E> species = v[0].species();
 338  * for (EVector arg : v) {
 339  *     arg.check(species);  // all must have same species
 340  * }
 341  * ETYPE[] ar = new ETYPE[a.length()];
 342  * for (int i = 0; i < ar.length; i++) {
 343  *     ETYPE[] args = new ETYPE[N];
 344  *     for (int j = 0; j < N; j++) {
 345  *         args[j] = v[j].lane(i);
 346  *     }
 347  *     ar[i] = scalar_nary_op(args);
 348  * }
 349  * EVector r = EVector.fromArray(species, ar, 0);
 350  * }</pre>
 351  * </li>
 352  *
 353  * <li>
 354  * A <em>lane-wise conversion</em> operation, such as
 355  * {@code w0 = v0.}{@link
 356  * Vector#convert(VectorOperators.Conversion,int)
 357  * convert}{@code (VectorOperators.I2D, 0)},
 358  * takes one input vector,
 359  * distributing a unary scalar conversion operator across the lanes,
 360  * and produces a logical result of the converted values.  The logical
 361  * result (or at least a part of it) is presented in a vector of the
 362  * same shape as the input vector.
 363  *
 364  * <p> Unlike other lane-wise operations, conversions can change lane
 365  * type, from the input (domain) type to the output (range) type.  The
 366  * lane size may change along with the type.  In order to manage the
 367  * size changes, lane-wise conversion methods can product <em>partial
 368  * results</em>, under the control of a {@code part} parameter, which
 369  * is <a href="Vector.html#expansion">explained elsewhere</a>.
 370  * (Following the example above, the second group of converted lane
 371  * values could be obtained as
 372  * {@code w1 = v0.convert(VectorOperators.I2D, 1)}.)
 373  *
 374  * <p> The following pseudocode illustrates the behavior of this
 375  * operation category in the specific example of a conversion from
 376  * {@code int} to {@code double}, retaining either lower or upper
 377  * lanes (depending on {@code part}) to maintain shape-invariance:
 378  *
 379  * <pre>{@code
 380  * IntVector a = ...;
 381  * int VLENGTH = a.length();
 382  * int part = ...;  // 0 or 1
 383  * VectorShape VSHAPE = a.shape();
 384  * double[] arlogical = new double[VLENGTH];
 385  * for (int i = 0; i < limit; i++) {
 386  *     int e = a.lane(i);
 387  *     arlogical[i] = (double) e;
 388  * }
 389  * VectorSpecies<Double> rs = VSHAPE.withLanes(double.class);
 390  * int M = Double.BITS / Integer.BITS;  // expansion factor
 391  * int offset = part * (VLENGTH / M);
 392  * DoubleVector r = DoubleVector.fromArray(rs, arlogical, offset);
 393  * assert r.length() == VLENGTH / M;
 394  * }</pre>
 395  * </li>
 396  *
 397  * <li>
 398  * A <em>cross-lane reduction</em> operation, such as
 399  * {@code e = v0.}{@link
 400  * IntVector#reduceLanes(VectorOperators.Associative)
 401  * reduceLanes}{@code (VectorOperators.ADD)},
 402  * operates on all
 403  * the lane elements of an input vector.
 404  * An accumulation function is applied to all the
 405  * lane elements to produce a scalar result.
 406  * If the reduction operation is associative then the result may be accumulated
 407  * by operating on the lane elements in any order using a specified associative
 408  * scalar binary operation and identity value.  Otherwise, the reduction
 409  * operation specifies the order of accumulation.
 410  * The following pseudocode illustrates the behavior of this operation category
 411  * if it is associative:
 412  * <pre>{@code
 413  * ETYPE assoc_scalar_binary_op(ETYPE s, ETYPE t);
 414  * EVector a = ...;
 415  * ETYPE r = <identity value>;
 416  * for (int i = 0; i < a.length(); i++) {
 417  *     r = assoc_scalar_binary_op(r, a.lane(i));
 418  * }
 419  * }</pre>
 420  * </li>
 421  *
 422  * <li>
 423  * A <em>cross-lane movement</em> operation, such as
 424  * {@code w = v0.}{@link
 425  * Vector#rearrange(VectorShuffle) rearrange}{@code (shuffle)}
 426  * operates on all
 427  * the lane elements of an input vector and moves them
 428  * in a data-dependent manner into <em>different lanes</em>
 429  * in an output vector.
 430  * The movement is steered by an auxiliary datum, such as
 431  * a {@link VectorShuffle} or a scalar index defining the
 432  * origin of the movement.
 433  * The following pseudocode illustrates the behavior of this
 434  * operation category, in the case of a shuffle:
 435  * <pre>{@code
 436  * EVector a = ...;
 437  * Shuffle<E> s = ...;
 438  * ETYPE[] ar = new ETYPE[a.length()];
 439  * for (int i = 0; i < ar.length; i++) {
 440  *     int source = s.laneSource(i);
 441  *     ar[i] = a.lane(source);
 442  * }
 443  * EVector r = EVector.fromArray(a.species(), ar, 0);
 444  * }</pre>
 445  * </li>
 446  *
 447  * <li>
 448  * A <em>masked operation</em> is one which is a variation on one of the
 449  * previous operations (either lane-wise or cross-lane), where
 450  * the operation takes an extra trailing {@link VectorMask} argument.
 451  * In lanes the mask is set, the operation behaves as if the mask
 452  * argument were absent, but in lanes where the mask is unset, the
 453  * underlying scalar operation is suppressed.
 454  * Masked operations are explained in
 455  * <a href="Vector.html#masking">greater detail elsewhere</a>.
 456  * </li>
 457  *
 458  * <li>
 459  * A very special case of a masked lane-wise binary operation is a
 460  * {@linkplain #blend(Vector,VectorMask) blend}, which operates
 461  * lane-wise on two input vectors {@code a} and {@code b}, selecting lane
 462  * values from one input or the other depending on a mask {@code m}.
 463  * In lanes where {@code m} is set, the corresponding value from
 464  * {@code b} is selected into the result; otherwise the value from
 465  * {@code a} is selected.  Thus, a blend acts as a vectorized version
 466  * of Java's ternary selection expression {@code m?b:a}:
 467  * <pre>{@code
 468  * ETYPE[] ar = new ETYPE[a.length()];
 469  * for (int i = 0; i < ar.length; i++) {
 470  *     boolean isSet = m.laneIsSet(i);
 471  *     ar[i] = isSet ? b.lane(i) : a.lane(i);
 472  * }
 473  * EVector r = EVector.fromArray(species, ar, 0);
 474  * }</pre>
 475  * </li>
 476  *
 477  * <li>
 478  * A <em>lane-wise binary test</em> operation, such as
 479  * {@code m = v0.}{@link Vector#lt(Vector) lt}{@code (v1)},
 480  * takes two input vectors,
 481  * distributing a binary scalar comparison across the lanes,
 482  * and produces, not a vector of booleans, but rather a
 483  * {@linkplain VectorMask vector mask}.
 484  *
 485  * For each lane of the two input vectors {@code a} and {@code b},
 486  * the underlying scalar comparison operator is applied to the lane values.
 487  * The resulting boolean is placed into the vector mask result in the same lane.
 488  * The following pseudocode illustrates the behavior of this operation
 489  * category:
 490  * <pre>{@code
 491  * boolean scalar_binary_test_op(ETYPE s, ETYPE t);
 492  * EVector a = ...;
 493  * VectorSpecies<E> species = a.species();
 494  * EVector b = ...;
 495  * b.check(species);  // must have same species
 496  * boolean[] mr = new boolean[a.length()];
 497  * for (int i = 0; i < mr.length; i++) {
 498  *     mr[i] = scalar_binary_test_op(a.lane(i), b.lane(i));
 499  * }
 500  * VectorMask<E> m = VectorMask.fromArray(species, mr, 0);
 501  * }</pre>
 502  * </li>
 503  *
 504  * <li>
 505  * Similarly to a binary comparison, a <em>lane-wise unary test</em>
 506  * operation, such as
 507  * {@code m = v0.}{@link Vector#test(VectorOperators.Test)
 508  * test}{@code (IS_FINITE)},
 509  * takes one input vector, distributing a scalar predicate
 510  * (a test function) across the lanes, and produces a
 511  * {@linkplain VectorMask vector mask}.
 512  * </li>
 513  *
 514  * </ul>
 515  *
 516  * <p>
 517  * If a vector operation does not belong to one of the above categories then
 518  * the method documentation explicitly specifies how it processes the lanes of
 519  * input vectors, and where appropriate illustrates the behavior using
 520  * pseudocode.
 521  *
 522  * <p>
 523  * Most lane-wise binary and comparison operations offer convenience
 524  * overloadings which accept a scalar as the second input, in place of a
 525  * vector.  In this case the scalar value is promoted to a vector by
 526  * {@linkplain Vector#broadcast(long) broadcasting it}
 527  * into the same lane structure as the first input.
 528  *
 529  * For example, to multiply all lanes of a {@code double} vector by
 530  * a scalar value {@code 1.1}, the expression {@code v.mul(1.1)} is
 531  * easier to work with than an equivalent expression with an explicit
 532  * broadcast operation, such as {@code v.mul(v.broadcast(1.1))}
 533  * or {@code v.mul(DoubleVector.broadcast(v.species(), 1.1))}.
 534  *
 535  * Unless otherwise specified the scalar variant always behaves as if
 536  * each scalar value is first transformed to a vector of the same
 537  * species as the first vector input, using the appropriate
 538  * {@code broadcast} operation.
 539  *
 540  * <h2><a id="masking"></a>Masked operations</h2>
 541  *
 542  * <p> Many vector operations accept an optional
 543  * {@link VectorMask mask} argument, selecting which lanes participate
 544  * in the underlying scalar operator.  If present, the mask argument
 545  * appears at the end of the method argument list.
 546  *
 547  * <p> Each lane of the mask argument is a boolean which is either in
 548  * the <em>set</em> or <em>unset</em> state.  For lanes where the mask
 549  * argument is unset, the underlying scalar operator is suppressed.
 550  * In this way, masks allow vector operations to emulate scalar
 551  * control flow operations, without losing SIMD parallelism, except
 552  * where the mask lane is unset.
 553  *
 554  * <p> An operation suppressed by a mask will never cause an exception
 555  * or side effect of any sort, even if the underlying scalar operator
 556  * can potentially do so.  For example, an unset lane that seems to
 557  * access an out of bounds array element or divide an integral value
 558  * by zero will simply be ignored.  Values in suppressed lanes never
 559  * participate or appear in the result of the overall operation.
 560  *
 561  * <p> Result lanes corresponding to a suppressed operation will be
 562  * filled with a default value which depends on the specific
 563  * operation, as follows:
 564  *
 565  * <ul>
 566  *
 567  * <li>If the masked operation is a unary, binary, or n-ary arithmetic or
 568  * logical operation, suppressed lanes are filled from the first
 569  * vector operand (i.e., the vector receiving the method call), as if
 570  * by a {@linkplain #blend(Vector,VectorMask) blend}.</li>
 571  *
 572  * <li>If the masked operation is a memory load or a {@code slice()} from
 573  * another vector, suppressed lanes are not loaded, and are filled
 574  * with the default value for the {@code ETYPE}, which in every case
 575  * consists of all zero bits.  An unset lane can never cause an
 576  * exception, even if the hypothetical corresponding memory location
 577  * does not exist (because it is out of an array's index range).</li>
 578  *
 579  * <li>If the operation is a cross-lane operation with an operand
 580  * which supplies lane indexes (of type {@code VectorShuffle} or
 581  * {@code Vector}, suppressed lanes are not computed, and are filled
 582  * with the zero default value.  Normally, invalid lane indexes elicit
 583  * an {@code IndexOutOfBoundsException}, but if a lane is unset, the
 584  * zero value is quietly substituted, regardless of the index.  This
 585  * rule is similar to the previous rule, for masked memory loads.</li>
 586  *
 587  * <li>If the masked operation is a memory store or an {@code unslice()} into
 588  * another vector, suppressed lanes are not stored, and the
 589  * corresponding memory or vector locations (if any) are unchanged.
 590  *
 591  * <p> (Note: Memory effects such as race conditions never occur for
 592  * suppressed lanes.  That is, implementations will not secretly
 593  * re-write the existing value for unset lanes.  In the Java Memory
 594  * Model, reassigning a memory variable to its current value is not a
 595  * no-op; it may quietly undo a racing store from another
 596  * thread.)</p>
 597  * </li>
 598  *
 599  * <li>If the masked operation is a reduction, suppressed lanes are ignored
 600  * in the reduction.  If all lanes are suppressed, a suitable neutral
 601  * value is returned, depending on the specific reduction operation,
 602  * and documented by the masked variant of that method.  (This means
 603  * that users can obtain the neutral value programmatically by
 604  * executing the reduction on a dummy vector with an all-unset mask.)
 605  *
 606  * <li>If the masked operation is a comparison operation, suppressed output
 607  * lanes in the resulting mask are themselves unset, as if the
 608  * suppressed comparison operation returned {@code false} regardless
 609  * of the suppressed input values.  In effect, it is as if the
 610  * comparison operation were performed unmasked, and then the
 611  * result intersected with the controlling mask.</li>
 612  *
 613  * <li>In other cases, such as masked
 614  * <a href="Vector.html#cross-lane"><em>cross-lane movements</em></a>,
 615  * the specific effects of masking are documented by the masked
 616  * variant of the method.
 617  *
 618  * </ul>
 619  *
 620  * <p> As an example, a masked binary operation on two input vectors
 621  * {@code a} and {@code b} suppresses the binary operation for lanes
 622  * where the mask is unset, and retains the original lane value from
 623  * {@code a}.  The following pseudocode illustrates this behavior:
 624  * <pre>{@code
 625  * ETYPE scalar_binary_op(ETYPE s, ETYPE t);
 626  * EVector a = ...;
 627  * VectorSpecies<E> species = a.species();
 628  * EVector b = ...;
 629  * b.check(species);  // must have same species
 630  * VectorMask<E> m = ...;
 631  * m.check(species);  // must have same species
 632  * boolean[] ar = new boolean[a.length()];
 633  * for (int i = 0; i < ar.length; i++) {
 634  *     if (m.laneIsSet(i)) {
 635  *         ar[i] = scalar_binary_op(a.lane(i), b.lane(i));
 636  *     } else {
 637  *         ar[i] = a.lane(i);  // from first input
 638  *     }
 639  * }
 640  * EVector r = EVector.fromArray(species, ar, 0);
 641  * }</pre>
 642  *
 643  * <h2><a id="lane-order"></a>Lane order and byte order</h2>
 644  *
 645  * The number of lane values stored in a given vector is referred to
 646  * as its {@linkplain #length() vector length} or {@code VLENGTH}.
 647  *
 648  * It is useful to consider vector lanes as ordered
 649  * <em>sequentially</em> from first to last, with the first lane
 650  * numbered {@code 0}, the next lane numbered {@code 1}, and so on to
 651  * the last lane numbered {@code VLENGTH-1}.  This is a temporal
 652  * order, where lower-numbered lanes are considered earlier than
 653  * higher-numbered (later) lanes.  This API uses these terms
 654  * in preference to spatial terms such as "left", "right", "high",
 655  * and "low".
 656  *
 657  * <p> Temporal terminology works well for vectors because they
 658  * (usually) represent small fixed-sized segments in a long sequence
 659  * of workload elements, where the workload is conceptually traversed
 660  * in time order from beginning to end.  (This is a mental model: it
 661  * does not exclude multicore divide-and-conquer techniques.)  Thus,
 662  * when a scalar loop is transformed into a vector loop, adjacent
 663  * scalar items (one earlier, one later) in the workload end up as
 664  * adjacent lanes in a single vector (again, one earlier, one later).
 665  * At a vector boundary, the last lane item in the earlier vector is
 666  * adjacent to (and just before) the first lane item in the
 667  * immediately following vector.
 668  *
 669  * <p> Vectors are also sometimes thought of in spatial terms, where
 670  * the first lane is placed at an edge of some virtual paper, and
 671  * subsequent lanes are presented in order next to it.  When using
 672  * spatial terms, all directions are equally plausible: Some vector
 673  * notations present lanes from left to right, and others from right
 674  * to left; still others present from top to bottom or vice versa.
 675  * Using the language of time (before, after, first, last) instead of
 676  * space (left, right, high, low) is often more likely to avoid
 677  * misunderstandings.
 678  *
 679  * <p> As second reason to prefer temporal to spatial language about
 680  * vector lanes is the fact that the terms "left", "right", "high" and
 681  * "low" are widely used to describe the relations between bits in
 682  * scalar values.  The leftmost or highest bit in a given type is
 683  * likely to be a sign bit, while the rightmost or lowest bit is
 684  * likely to be the arithmetically least significant, and so on.
 685  * Applying these terms to vector lanes risks confusion, however,
 686  * because it is relatively rare to find algorithms where, given two
 687  * adjacent vector lanes, one lane is somehow more arithmetically
 688  * significant than its neighbor, and even in those cases, there is no
 689  * general way to know which neighbor is the the more significant.
 690  *
 691  * <p> Putting the terms together, we view the information structure
 692  * of a vector as a temporal sequence of lanes ("first", "next",
 693  * "earlier", "later", "last", etc.)  of bit-strings which are
 694  * internally ordered spatially (either "low" to "high" or "right" to
 695  * "left").  The primitive values in the lanes are decoded from these
 696  * bit-strings, in the usual way.  Most vector operations, like most
 697  * Java scalar operators, treat primitive values as atomic values, but
 698  * some operations reveal the internal bit-string structure.
 699  *
 700  * <p> When a vector is loaded from or stored into memory, the order
 701  * of vector lanes is <em>always consistent </em> with the inherent
 702  * ordering of the memory container.  This is true whether or not
 703  * individual lane elements are subject to "byte swapping" due to
 704  * details of byte order.  Thus, while the scalar lane elements of
 705  * vector might be "byte swapped", the lanes themselves are never
 706  * reordered, except by an explicit method call that performs
 707  * cross-lane reordering.
 708  *
 709  * <p> When vector lane values are stored to Java variables of the
 710  * same type, byte swapping is performed if and only if the
 711  * implementation of the vector hardware requires such swapping.  It
 712  * is therefore unconditional and invisible.
 713  *
 714  * <p> As a useful fiction, this API presents a consistent illusion
 715  * that vector lane bytes are composed into larger lane scalars in
 716  * <em>little endian order</em>.  This means that storing a vector
 717  * into a Java byte array will reveal the successive bytes of the
 718  * vector lane values in little-endian order on all platforms,
 719  * regardless of native memory order, and also regardless of byte
 720  * order (if any) within vector unit registers.
 721  *
 722  * <p> This hypothetical little-endian ordering also appears when a
 723  * {@linkplain #reinterpretShape(VectorSpecies,int) reinterpretation cast} is
 724  * applied in such a way that lane boundaries are discarded and
 725  * redrawn differently, while maintaining vector bits unchanged.  In
 726  * such an operation, two adjacent lanes will contribute bytes to a
 727  * single new lane (or vice versa), and the sequential order of the
 728  * two lanes will determine the arithmetic order of the bytes in the
 729  * single lane.  In this case, the little-endian convention provides
 730  * portable results, so that on all platforms earlier lanes tend to
 731  * contribute lower (rightward) bits, and later lanes tend to
 732  * contribute higher (leftward) bits.  The {@linkplain #reinterpretAsBytes()
 733  * reinterpretation casts} between {@link ByteVector}s and the
 734  * other non-byte vectors use this convention to clarify their
 735  * portable semantics.
 736  *
 737  * <p> The little-endian fiction for relating lane order to per-lane
 738  * byte order is slightly preferable to an equivalent big-endian
 739  * fiction, because some related formulas are much simpler,
 740  * specifically those which renumber bytes after lane structure
 741  * changes.  The earliest byte is invariantly earliest across all lane
 742  * structure changes, but only if little-endian convention are used.
 743  * The root cause of this is that bytes in scalars are numbered from
 744  * the least significant (rightmost) to the most significant
 745  * (leftmost), and almost never vice-versa.  If we habitually numbered
 746  * sign bits as zero (as on some computers) then this API would reach
 747  * for big-endian fictions to create unified addressing of vector
 748  * bytes.
 749  *
 750  * <h2><a id="memory"></a>Memory operations</h2>
 751  *
 752  * As was already mentioned, vectors can be loaded from memory and
 753  * stored back.  An optional mask can control which individual memory
 754  * locations are read from or written to.  The shape of a vector
 755  * determines how much memory it will occupy.
 756  *
 757  * An implementation typically has the property, in the absence of
 758  * masking, that lanes are stored as a dense sequence of back-to-back
 759  * values in memory, the same as a dense (gap-free) series of single
 760  * scalar values in an array of the scalar type.
 761  *
 762  * In such cases memory order corresponds exactly to lane order.  The
 763  * first vector lane value occupies the first position in memory, and so on,
 764  * up to the length of the vector. Further, the memory order of stored
 765  * vector lanes corresponds to increasing index values in a Java array or
 766  * in a {@link java.nio.ByteBuffer}.
 767  *
 768  * <p> Byte order for lane storage is chosen such that the stored
 769  * vector values can be read or written as single primitive values,
 770  * within the array or buffer that holds the vector, producing the
 771  * same values as the lane-wise values within the vector.
 772  * This fact is independent of the convenient fiction that lane values
 773  * inside of vectors are stored in little-endian order.
 774  *
 775  * <p> For example,
 776  * {@link FloatVector#fromArray(VectorSpecies, float[], int)
 777  *        FloatVector.fromArray(fsp,fa,i)}
 778  * creates and returns a float vector of some particular species {@code fsp},
 779  * with elements loaded from some float array {@code fa}.
 780  * The first lane is loaded from {@code fa[i]} and the last lane
 781  * is initialized loaded from {@code fa[i+VL-1]}, where {@code VL}
 782  * is the length of the vector as derived from the species {@code fsp}.
 783  * Then, {@link FloatVector#add(Vector) fv=fv.add(fv2)}
 784  * will produce another float vector of that species {@code fsp},
 785  * given a vector {@code fv2} of the same species {@code fsp}.
 786  * Next, {@link FloatVector#compare(VectorOperators.Comparison,float)
 787  * mnz=fv.compare(NE, 0.0f)} tests whether the result is zero,
 788  * yielding a mask {@code mnz}.  The non-zero lanes (and only those
 789  * lanes) can then be stored back into the original array elements
 790  * using the statement
 791  * {@link FloatVector#intoArray(float[],int,VectorMask) fv.intoArray(fa,i,mnz)}.
 792  *
 793  * <h2><a id="expansion"></a>Expansions, contractions, and partial results</h2>
 794  *
 795  * Since vectors are fixed in size, occasions often arise where the
 796  * logical result of an operation is not the same as the physical size
 797  * of the proposed output vector.  To encourage user code that is as
 798  * portable and predictable as possible, this API has a systematic
 799  * approach to the design of such <em>resizing</em> vector operations.
 800  *
 801  * <p> As a basic principle, lane-wise operations are
 802  * <em>length-invariant</em>, unless clearly marked otherwise.
 803  * Length-invariance simply means that
 804  * if {@code VLENGTH} lanes go into an operation, the same number
 805  * of lanes come out, with nothing discarded and no extra padding.
 806  *
 807  * <p> As a second principle, sometimes in tension with the first,
 808  * lane-wise operations are also <em>shape-invariant</em>, unless
 809  * clearly marked otherwise.
 810  *
 811  * Shape-invariance means that {@code VSHAPE} is constant for typical
 812  * computations.  Keeping the same shape throughout a computation
 813  * helps ensure that scarce vector resources are efficiently used.
 814  * (On some hardware platforms shape changes could cause unwanted
 815  * effects like extra data movement instructions, round trips through
 816  * memory, or pipeline bubbles.)
 817  *
 818  * <p> Tension between these principles arises when an operation
 819  * produces a <em>logical result</em> that is too large for the
 820  * required output {@code VSHAPE}.  In other cases, when a logical
 821  * result is smaller than the capacity of the output {@code VSHAPE},
 822  * the positioning of the logical result is open to question, since
 823  * the physical output vector must contain a mix of logical result and
 824  * padding.
 825  *
 826  * <p> In the first case, of a too-large logical result being crammed
 827  * into a too-small output {@code VSHAPE}, we say that data has
 828  * <em>expanded</em>.  In other words, an <em>expansion operation</em>
 829  * has caused the output shape to overflow.  Symmetrically, in the
 830  * second case of a small logical result fitting into a roomy output
 831  * {@code VSHAPE}, the data has <em>contracted</em>, and the
 832  * <em>contraction operation</em> has required the output shape to pad
 833  * itself with extra zero lanes.
 834  *
 835  * <p> In both cases we can speak of a parameter {@code M} which
 836  * measures the <em>expansion ratio</em> or <em>contraction ratio</em>
 837  * between the logical result size (in bits) and the bit-size of the
 838  * actual output shape.  When vector shapes are changed, and lane
 839  * sizes are not, {@code M} is just the integral ratio of the output
 840  * shape to the logical result.  (With the possible exception of
 841  * the {@linkplain VectorShape#S_Max_BIT maximum shape}, all vector
 842  * sizes are powers of two, and so the ratio {@code M} is always
 843  * an integer.  In the hypothetical case of a non-integral ratio,
 844  * the value {@code M} would be rounded up to the next integer,
 845  * and then the same general considerations would apply.)
 846  *
 847  * <p> If the logical result is larger than the physical output shape,
 848  * such a shape change must inevitably drop result lanes (all but
 849  * {@code 1/M} of the logical result).  If the logical size is smaller
 850  * than the output, the shape change must introduce zero-filled lanes
 851  * of padding (all but {@code 1/M} of the physical output).  The first
 852  * case, with dropped lanes, is an expansion, while the second, with
 853  * padding lanes added, is a contraction.
 854  *
 855  * <p> Similarly, consider a lane-wise conversion operation which
 856  * leaves the shape invariant but changes the lane size by a ratio of
 857  * {@code M}.  If the logical result is larger than the output (or
 858  * input), this conversion must reduce the {@code VLENGTH} lanes of the
 859  * output by {@code M}, dropping all but {@code 1/M} of the logical
 860  * result lanes.  As before, the dropping of lanes is the hallmark of
 861  * an expansion.  A lane-wise operation which contracts lane size by a
 862  * ratio of {@code M} must increase the {@code VLENGTH} by the same
 863  * factor {@code M}, filling the extra lanes with a zero padding
 864  * value; because padding must be added this is a contraction.
 865  *
 866  * <p> It is also possible (though somewhat confusing) to change both
 867  * lane size and container size in one operation which performs both
 868  * lane conversion <em>and</em> reshaping.  If this is done, the same
 869  * rules apply, but the logical result size is the product of the
 870  * input size times any expansion or contraction ratio from the lane
 871  * change size.
 872  *
 873  * <p> For completeness, we can also speak of <em>in-place
 874  * operations</em> for the frequent case when resizing does not occur.
 875  * With an in-place operation, the data is simply copied from logical
 876  * output to its physical container with no truncation or padding.
 877  * The ratio parameter {@code M} in this case is unity.
 878  *
 879  * <p> Note that the classification of contraction vs. expansion
 880  * depends on the relative sizes of the logical result and the
 881  * physical output container.  The size of the input container may be
 882  * larger or smaller than either of the other two values, without
 883  * changing the classification.  For example, a conversion from a
 884  * 128-bit shape to a 256-bit shape will be a contraction in many
 885  * cases, but it would be an expansion if it were combined with a
 886  * conversion from {@code byte} to {@code long}, since in that case
 887  * the logical result would be 1024 bits in size.  This example also
 888  * illustrates that a logical result does not need to correspond to
 889  * any particular platform-supported vector shape.
 890  *
 891  * <p> Although lane-wise masked operations can be viewed as producing
 892  * partial operations, they are not classified (in this API) as
 893  * expansions or contractions.  A masked load from an array surely
 894  * produces a partial vector, but there is no meaningful "logical
 895  * output vector" that this partial result was contracted from.
 896  *
 897  * <p> Some care is required with these terms, because it is the
 898  * <em>data</em>, not the <em>container size</em>, that is expanding
 899  * or contracting, relative to the size of its output container.
 900  * Thus, resizing a 128-bit input into 512-bit vector has the effect
 901  * of a <em>contraction</em>.  Though the 128 bits of payload hasn't
 902  * changed in size, we can say it "looks smaller" in its new 512-bit
 903  * home, and this will capture the practical details of the situation.
 904  *
 905  * <p> If a vector method might expand its data, it accepts an extra
 906  * {@code int} parameter called {@code part}, or the "part number".
 907  * The part number must be in the range {@code [0..M-1]}, where
 908  * {@code M} is the expansion ratio.  The part number selects one
 909  * of {@code M} contiguous disjoint equally-sized blocks of lanes
 910  * from the logical result and fills the physical output vector
 911  * with this block of lanes.
 912  *
 913  * <p> Specifically, the lanes selected from the logical result of an
 914  * expansion are numbered in the range {@code [R..R+L-1]}, where
 915  * {@code L} is the {@code VLENGTH} of the physical output vector, and
 916  * the origin of the block, {@code R}, is {@code part*L}.
 917  *
 918  * <p> A similar convention applies to any vector method that might
 919  * contract its data.  Such a method also accepts an extra part number
 920  * parameter (again called {@code part}) which steers the contracted
 921  * data lanes one of {@code M} contiguous disjoint equally-sized
 922  * blocks of lanes in the physical output vector.  The remaining lanes
 923  * are filled with zero, or as specified by the method.
 924  *
 925  * <p> Specifically, the data is steered into the lanes numbered in the
 926  * range {@code [R..R+L-1]}, where {@code L} is the {@code VLENGTH} of
 927  * the logical result vector, and the origin of the block, {@code R},
 928  * is again a multiple of {@code L} selected by the part number,
 929  * specifically {@code |part|*L}.
 930  *
 931  * <p> In the case of a contraction, the part number must be in the
 932  * non-positive range {@code [-M+1..0]}.  This convention is adopted
 933  * because some methods can perform both expansions and contractions,
 934  * in a data-dependent manner, and the extra sign on the part number
 935  * serves as an error check.  If vector method takes a part number and
 936  * is invoked to perform an in-place operation (neither contracting
 937  * nor expanding), the {@code part} parameter must be exactly zero.
 938  * Part numbers outside the allowed ranges will elicit an indexing
 939  * exception.  Note that in all cases a zero part number is valid, and
 940  * corresponds to an operation which preserves as many lanes as
 941  * possible from the beginning of the logical result, and places them
 942  * into the beginning of the physical output container.  This is
 943  * often a desirable default, so a part number of zero is safe
 944  * in all cases and useful in most cases.
 945  *
 946  * <p> The various resizing operations of this API contract or expand
 947  * their data as follows:
 948  * <ul>
 949  *
 950  * <li>
 951  * {@link Vector#convert(VectorOperators.Conversion,int) Vector.convert()}
 952  * will expand (respectively, contract) its operand by ratio
 953  * {@code M} if the
 954  * {@linkplain #elementSize() element size} of its output is
 955  * larger (respectively, smaller) by a factor of {@code M}.
 956  * If the element sizes of input and output are the same,
 957  * then {@code convert()} is an in-place operation.
 958  *
 959  * <li>
 960  * {@link Vector#convertShape(VectorOperators.Conversion,VectorSpecies,int) Vector.convertShape()}
 961  * will expand (respectively, contract) its operand by ratio
 962  * {@code M} if the bit-size of its logical result is
 963  * larger (respectively, smaller) than the bit-size of its
 964  * output shape.
 965  * The size of the logical result is defined as the
 966  * {@linkplain #elementSize() element size} of the output,
 967  * times the {@code VLENGTH} of its input.
 968  *
 969  * Depending on the ratio of the changed lane sizes, the logical size
 970  * may be (in various cases) either larger or smaller than the input
 971  * vector, independently of whether the operation is an expansion
 972  * or contraction.
 973  *
 974  * <li>
 975  * Since {@link Vector#castShape(VectorSpecies,int) Vector.castShape()}
 976  * is a convenience method for {@code convertShape()}, its classification
 977  * as an expansion or contraction is the same as for {@code convertShape()}.
 978  *
 979  * <li>
 980  * {@link Vector#reinterpretShape(VectorSpecies,int) Vector.reinterpretShape()}
 981  * is an expansion (respectively, contraction) by ratio {@code M} if the
 982  * {@linkplain #bitSize() vector bit-size} of its input is
 983  * crammed into a smaller (respectively, dropped into a larger)
 984  * output container by a factor of {@code M}.
 985  * Otherwise it is an in-place operation.
 986  *
 987  * Since this method is a reinterpretation cast that can erase and
 988  * redraw lane boundaries as well as modify shape, the input vector's
 989  * lane size and lane count are irrelevant to its classification as
 990  * expanding or contracting.
 991  *
 992  * <li>
 993  * The {@link #unslice(int,Vector,int) unslice()} methods expand
 994  * by a ratio of {@code M=2}, because the single input slice is
 995  * positioned and inserted somewhere within two consecutive background
 996  * vectors.  The part number selects the first or second background
 997  * vector, as updated by the inserted slice.
 998  * Note that the corresponding
 999  * {@link #slice(int,Vector) slice()} methods, although inverse
1000  * to the {@code unslice()} methods, do not contract their data
1001  * and thus require no part number.  This is because
1002  * {@code slice()} delivers a slice of exactly {@code VLENGTH}
1003  * lanes extracted from two input vectors.
1004  * </ul>
1005  *
1006  * The method {@link VectorSpecies#partLimit(VectorSpecies,boolean)
1007  * partLimit()} on {@link VectorSpecies} can be used, before any
1008  * expanding or contracting operation is performed, to query the
1009  * limiting value on a part parameter for a proposed expansion
1010  * or contraction.  The value returned from {@code partLimit()} is
1011  * positive for expansions, negative for contractions, and zero for
1012  * in-place operations.  Its absolute value is the parameter {@code
1013  * M}, and so it serves as an exclusive limit on valid part number
1014  * arguments for the relevant methods.  Thus, for expansions, the
1015  * {@code partLimit()} value {@code M} is the exclusive upper limit
1016  * for part numbers, while for contractions the {@code partLimit()}
1017  * value {@code -M} is the exclusive <em>lower</em> limit.
1018  *
1019  * <h2><a id="cross-lane"></a>Moving data across lane boundaries</h2>
1020  * The cross-lane methods which do not redraw lanes or change species
1021  * are more regularly structured and easier to reason about.
1022  * These operations are:
1023  * <ul>
1024  *
1025  * <li>The {@link #slice(int,Vector) slice()} family of methods,
1026  * which extract contiguous slice of {@code VLENGTH} fields from
1027  * a given origin point within a concatenated pair of vectors.
1028  *
1029  * <li>The {@link #unslice(int,Vector,int) unslice()} family of
1030  * methods, which insert a contiguous slice of {@code VLENGTH} fields
1031  * into a concatenated pair of vectors at a given origin point.
1032  *
1033  * <li>The {@link #rearrange(VectorShuffle) rearrange()} family of
1034  * methods, which select an arbitrary set of {@code VLENGTH} lanes
1035  * from one or two input vectors, and assemble them in an arbitrary
1036  * order.  The selection and order of lanes is controlled by a
1037  * {@code VectorShuffle} object, which acts as an routing table
1038  * mapping source lanes to destination lanes.  A {@code VectorShuffle}
1039  * can encode a mathematical permutation as well as many other
1040  * patterns of data movement.
1041  *
1042  * <li>The {@link #compress(VectorMask)} and {@link #expand(VectorMask)}
1043  * methods, which select up to {@code VLENGTH} lanes from an
1044  * input vector, and assemble them in lane order.  The selection of lanes
1045  * is controlled by a {@code VectorMask}, with set lane elements mapping, by
1046  * compression or expansion in lane order, source lanes to destination lanes.
1047  *
1048  * </ul>
1049  * <p> Some vector operations are not lane-wise, but rather move data
1050  * across lane boundaries.  Such operations are typically rare in SIMD
1051  * code, though they are sometimes necessary for specific algorithms
1052  * that manipulate data formats at a low level, and/or require SIMD
1053  * data to move in complex local patterns.  (Local movement in a small
1054  * window of a large array of data is relatively unusual, although
1055  * some highly patterned algorithms call for it.)  In this API such
1056  * methods are always clearly recognizable, so that simpler lane-wise
1057  * reasoning can be confidently applied to the rest of the code.
1058  *
1059  * <p> In some cases, vector lane boundaries are discarded and
1060  * "redrawn from scratch", so that data in a given input lane might
1061  * appear (in several parts) distributed through several output lanes,
1062  * or (conversely) data from several input lanes might be consolidated
1063  * into a single output lane.  The fundamental method which can redraw
1064  * lanes boundaries is
1065  * {@link #reinterpretShape(VectorSpecies,int) reinterpretShape()}.
1066  * Built on top of this method, certain convenience methods such
1067  * as {@link #reinterpretAsBytes() reinterpretAsBytes()} or
1068  * {@link #reinterpretAsInts() reinterpretAsInts()} will
1069  * (potentially) redraw lane boundaries, while retaining the
1070  * same overall vector shape.
1071  *
1072  * <p> Operations which produce or consume a scalar result can be
1073  * viewed as very simple cross-lane operations.  Methods in the
1074  * {@link #reduceLanesToLong(VectorOperators.Associative)
1075  * reduceLanes()} family fold together all lanes (or mask-selected
1076  * lanes) of a method and return a single result.  As an inverse, the
1077  * {@link #broadcast(long) broadcast} family of methods can be thought
1078  * of as crossing lanes in the other direction, from a scalar to all
1079  * lanes of the output vector.  Single-lane access methods such as
1080  * {@code lane(I)} or {@code withLane(I,E)} might also be regarded as
1081  * very simple cross-lane operations.
1082  *
1083  * <p> Likewise, a method which moves a non-byte vector to or from a
1084  * byte array could be viewed as a cross-lane operation, because the
1085  * vector lanes must be distributed into separate bytes, or (in the
1086  * other direction) consolidated from array bytes.
1087  *
1088  * @implNote
1089  *
1090  * <h2>Hardware platform dependencies and limitations</h2>
1091  *
1092  * The Vector API is to accelerate computations in style of Single
1093  * Instruction Multiple Data (SIMD), using available hardware
1094  * resources such as vector hardware registers and vector hardware
1095  * instructions.  The API is designed to make effective use of
1096  * multiple SIMD hardware platforms.
1097  *
1098  * <p> This API will also work correctly even on Java platforms which
1099  * do not include specialized hardware support for SIMD computations.
1100  * The Vector API is not likely to provide any special performance
1101  * benefit on such platforms.
1102  *
1103  * <p> Currently the implementation is optimized to work best on:
1104  *
1105  * <ul>
1106  *
1107  * <li> Intel x64 platforms supporting at least AVX2 up to AVX-512.
1108  * Masking using mask registers and mask accepting hardware
1109  * instructions on AVX-512 are not currently supported.
1110  *
1111  * <li> ARM AArch64 platforms supporting NEON.  Although the API has
1112  * been designed to ensure ARM SVE instructions can be supported
1113  * (vector sizes between 128 to 2048 bits) there is currently no
1114  * implementation of such instructions and the general masking
1115  * capability.
1116  *
1117  * </ul>
1118  * The implementation currently supports masked lane-wise operations
1119  * in a cross-platform manner by composing the unmasked lane-wise
1120  * operation with {@link #blend(Vector, VectorMask) blend} as in
1121  * the expression {@code a.blend(a.lanewise(op, b), m)}, where
1122  * {@code a} and {@code b} are vectors, {@code op} is the vector
1123  * operation, and {@code m} is the mask.
1124  *
1125  * <p> The implementation does not currently support optimal
1126  * vectorized instructions for floating point transcendental
1127  * functions (such as operators {@link VectorOperators#SIN SIN}
1128  * and {@link VectorOperators#LOG LOG}).
1129  *
1130  * <h2>No boxing of primitives</h2>
1131  *
1132  * Although a vector type like {@code Vector<Integer>} may seem to
1133  * work with boxed {@code Integer} values, the overheads associated
1134  * with boxing are avoided by having each vector subtype work
1135  * internally on lane values of the actual {@code ETYPE}, such as
1136  * {@code int}.
1137  *
1138  * <h2>Value-based classes and identity operations</h2>
1139  *
1140  * {@code Vector}, along with all of its subtypes and many of its
1141  * helper types like {@code VectorMask} and {@code VectorShuffle}, is a
1142  * <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
1143  * class.
1144  *
1145  * <p> Once created, a vector is never mutated, not even if only
1146  * {@linkplain IntVector#withLane(int,int) a single lane is changed}.
1147  * A new vector is always created to hold a new configuration
1148  * of lane values.  The unavailability of mutative methods is a
1149  * necessary consequence of suppressing the object identity of
1150  * all vectors, as value-based classes.
1151  *
1152  * <p> With {@code Vector},
1153  *
1154  * <!-- The following paragraph is shared verbatim
1155  *   -- between Vector.java and package-info.java -->
1156  * identity-sensitive operations such as {@code ==} may yield
1157  * unpredictable results, or reduced performance.  Oddly enough,
1158  * {@link Vector#equals(Object) v.equals(w)} is likely to be faster
1159  * than {@code v==w}, since {@code equals} is <em>not</em> an identity
1160  * sensitive method.
1161  *
1162  * Also, these objects can be stored in locals and parameters and as
1163  * {@code static final} constants, but storing them in other Java
1164  * fields or in array elements, while semantically valid, may incur
1165  * performance penalties.
1166  * <!-- The preceding paragraph is shared verbatim
1167  *   -- between Vector.java and package-info.java -->
1168  *
1169  * @param <E> the boxed version of {@code ETYPE},
1170  *           the element type of a vector
1171  *
1172  */
1173 @SuppressWarnings("exports")
1174 public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vector<E> {
1175 
1176     // This type is sealed within its package.
1177     // Users cannot roll their own vector types.
1178     Vector(Object bits) {
1179         super(bits);
1180     }
1181 
1182     /**
1183      * Returns the species of this vector.
1184      *
1185      * @return the species of this vector
1186      */
1187     public abstract VectorSpecies<E> species();
1188 
1189     /**
1190      * Returns the primitive <a href="Vector.html#ETYPE">element type</a>
1191      * ({@code ETYPE}) of this vector.
1192      *
1193      * @implSpec
1194      * This is the same value as {@code this.species().elementType()}.
1195      *
1196      * @return the primitive element type of this vector
1197      */
1198     public abstract Class<E> elementType();
1199 
1200     /**
1201      * Returns the size of each lane, in bits, of this vector.
1202      *
1203      * @implSpec
1204      * This is the same value as {@code this.species().elementSize()}.
1205      *
1206      * @return the lane size, in bits, of this vector
1207      */
1208     public abstract int elementSize();
1209 
1210     /**
1211      * Returns the shape of this vector.
1212      *
1213      * @implSpec
1214      * This is the same value as {@code this.species().vectorShape()}.
1215      *
1216      * @return the shape of this vector
1217      */
1218     public abstract VectorShape shape();
1219 
1220     /**
1221      * Returns the lane count, or <a href="Vector.html#VLENGTH">vector length</a>
1222      * ({@code VLENGTH}).
1223      *
1224      * @return the lane count
1225      */
1226     public abstract int length();
1227 
1228     /**
1229      * Returns the total size, in bits, of this vector.
1230      *
1231      * @implSpec
1232      * This is the same value as {@code this.shape().vectorBitSize()}.
1233      *
1234      * @return the total size, in bits, of this vector
1235      */
1236     public abstract int bitSize();
1237 
1238     /**
1239      * Returns the total size, in bytes, of this vector.
1240      *
1241      * @implSpec
1242      * This is the same value as {@code this.bitSize()/Byte.SIZE}.
1243      *
1244      * @return the total size, in bytes, of this vector
1245      */
1246     public abstract int byteSize();
1247 
1248     /// Arithmetic
1249 
1250     /**
1251      * Operates on the lane values of this vector.
1252      *
1253      * This is a <a href="Vector.html#lane-wise">lane-wise</a>
1254      * unary operation which applies
1255      * the selected operation to each lane.
1256      *
1257      * @apiNote
1258      * Subtypes improve on this method by sharpening
1259      * the method return type.
1260      *
1261      * @param op the operation used to process lane values
1262      * @return the result of applying the operation lane-wise
1263      *         to the input vector
1264      * @throws UnsupportedOperationException if this vector does
1265      *         not support the requested operation
1266      * @see VectorOperators#NEG
1267      * @see VectorOperators#NOT
1268      * @see VectorOperators#SIN
1269      * @see #lanewise(VectorOperators.Unary,VectorMask)
1270      * @see #lanewise(VectorOperators.Binary,Vector)
1271      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1272      */
1273     public abstract Vector<E> lanewise(VectorOperators.Unary op);
1274 
1275     /**
1276      * Operates on the lane values of this vector,
1277      * with selection of lane elements controlled by a mask.
1278      *
1279      * This is a lane-wise unary operation which applies
1280      * the selected operation to each lane.
1281      *
1282      * @apiNote
1283      * Subtypes improve on this method by sharpening
1284      * the method return type.
1285      *
1286      * @param op the operation used to process lane values
1287      * @param m the mask controlling lane selection
1288      * @return the result of applying the operation lane-wise
1289      *         to the input vector
1290      * @throws UnsupportedOperationException if this vector does
1291      *         not support the requested operation
1292      * @see #lanewise(VectorOperators.Unary)
1293      */
1294     public abstract Vector<E> lanewise(VectorOperators.Unary op,
1295                                        VectorMask<E> m);
1296 
1297     /**
1298      * Combines the corresponding lane values of this vector
1299      * with those of a second input vector.
1300      *
1301      * This is a <a href="Vector.html#lane-wise">lane-wise</a>
1302      * binary operation which applies
1303      * the selected operation to each lane.
1304      *
1305      * @apiNote
1306      * Subtypes improve on this method by sharpening
1307      * the method return type.
1308      *
1309      * @param op the operation used to combine lane values
1310      * @param v the input vector
1311      * @return the result of applying the operation lane-wise
1312      *         to the two input vectors
1313      * @throws UnsupportedOperationException if this vector does
1314      *         not support the requested operation
1315      * @see VectorOperators#ADD
1316      * @see VectorOperators#XOR
1317      * @see VectorOperators#ATAN2
1318      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1319      * @see #lanewise(VectorOperators.Unary)
1320      * @see #lanewise(VectorOperators.Ternary,Vector, Vector)
1321      */
1322     public abstract Vector<E> lanewise(VectorOperators.Binary op,
1323                                        Vector<E> v);
1324 
1325     /**
1326      * Combines the corresponding lane values of this vector
1327      * with those of a second input vector,
1328      * with selection of lane elements controlled by a mask.
1329      *
1330      * This is a lane-wise binary operation which applies
1331      * the selected operation to each lane.
1332      *
1333      * @apiNote
1334      * Subtypes improve on this method by sharpening
1335      * the method return type.
1336      *
1337      * @param op the operation used to combine lane values
1338      * @param v the second input vector
1339      * @param m the mask controlling lane selection
1340      * @return the result of applying the operation lane-wise
1341      *         to the two input vectors
1342      * @throws UnsupportedOperationException if this vector does
1343      *         not support the requested operation
1344      * @see #lanewise(VectorOperators.Binary,Vector)
1345      */
1346     public abstract Vector<E> lanewise(VectorOperators.Binary op,
1347                                        Vector<E> v, VectorMask<E> m);
1348 
1349     /**
1350      * Combines the lane values of this vector
1351      * with the value of a broadcast scalar.
1352      *
1353      * This is a lane-wise binary operation which applies
1354      * the selected operation to each lane.
1355      * The return value will be equal to this expression:
1356      * {@code this.lanewise(op, this.broadcast(e))}.
1357      *
1358      * @apiNote
1359      * The {@code long} value {@code e} must be accurately
1360      * representable by the {@code ETYPE} of this vector's species,
1361      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
1362      * by the implicit call to {@code broadcast()}.
1363      * <p>
1364      * Subtypes improve on this method by sharpening
1365      * the method return type and
1366      * the type of the scalar parameter {@code e}.
1367      *
1368      * @param op the operation used to combine lane values
1369      * @param e the input scalar
1370      * @return the result of applying the operation lane-wise
1371      *         to the input vector and the scalar
1372      * @throws UnsupportedOperationException if this vector does
1373      *         not support the requested operation
1374      * @throws IllegalArgumentException
1375      *         if the given {@code long} value cannot
1376      *         be represented by the right operand type
1377      *         of the vector operation
1378      * @see #broadcast(long)
1379      * @see #lanewise(VectorOperators.Binary,long,VectorMask)
1380      */
1381     public abstract Vector<E> lanewise(VectorOperators.Binary op,
1382                                        long e);
1383 
1384     /**
1385      * Combines the corresponding lane values of this vector
1386      * with those of a second input vector,
1387      * with selection of lane elements controlled by a mask.
1388      *
1389      * This is a lane-wise binary operation which applies
1390      * the selected operation to each lane.
1391      * The second operand is a broadcast integral value.
1392      * The return value will be equal to this expression:
1393      * {@code this.lanewise(op, this.broadcast(e), m)}.
1394      *
1395      * @apiNote
1396      * The {@code long} value {@code e} must be accurately
1397      * representable by the {@code ETYPE} of this vector's species,
1398      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
1399      * by the implicit call to {@code broadcast()}.
1400      * <p>
1401      * Subtypes improve on this method by sharpening
1402      * the method return type and
1403      * the type of the scalar parameter {@code e}.
1404      *
1405      * @param op the operation used to combine lane values
1406      * @param e the input scalar
1407      * @param m the mask controlling lane selection
1408      * @return the result of applying the operation lane-wise
1409      *         to the input vector and the scalar
1410      * @throws UnsupportedOperationException if this vector does
1411      *         not support the requested operation
1412      * @throws IllegalArgumentException
1413      *         if the given {@code long} value cannot
1414      *         be represented by the right operand type
1415      *         of the vector operation
1416      * @see #broadcast(long)
1417      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1418      */
1419     public abstract Vector<E> lanewise(VectorOperators.Binary op,
1420                                        long e, VectorMask<E> m);
1421 
1422     /**
1423      * Combines the corresponding lane values of this vector
1424      * with the lanes of a second and a third input vector.
1425      *
1426      * This is a <a href="Vector.html#lane-wise">lane-wise</a>
1427      * ternary operation which applies
1428      * the selected operation to each lane.
1429      *
1430      * @apiNote
1431      * Subtypes improve on this method by sharpening
1432      * the method return type.
1433      *
1434      * @param op the operation used to combine lane values
1435      * @param v1 the second input vector
1436      * @param v2 the third input vector
1437      * @return the result of applying the operation lane-wise
1438      *         to the three input vectors
1439      * @throws UnsupportedOperationException if this vector does
1440      *         not support the requested operation
1441      * @see VectorOperators#BITWISE_BLEND
1442      * @see VectorOperators#FMA
1443      * @see #lanewise(VectorOperators.Unary)
1444      * @see #lanewise(VectorOperators.Binary,Vector)
1445      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1446      */
1447     public abstract Vector<E> lanewise(VectorOperators.Ternary op,
1448                                        Vector<E> v1,
1449                                        Vector<E> v2);
1450 
1451     /**
1452      * Combines the corresponding lane values of this vector
1453      * with the lanes of a second and a third input vector,
1454      * with selection of lane elements controlled by a mask.
1455      *
1456      * This is a lane-wise ternary operation which applies
1457      * the selected operation to each lane.
1458      *
1459      * @apiNote
1460      * Subtypes improve on this method by sharpening
1461      * the method return type.
1462      *
1463      * @param op the operation used to combine lane values
1464      * @param v1 the second input vector
1465      * @param v2 the third input vector
1466      * @param m the mask controlling lane selection
1467      * @return the result of applying the operation lane-wise
1468      *         to the three input vectors
1469      * @throws UnsupportedOperationException if this vector does
1470      *         not support the requested operation
1471      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1472      */
1473     public abstract Vector<E> lanewise(VectorOperators.Ternary op,
1474                                        Vector<E> v1, Vector<E> v2,
1475                                        VectorMask<E> m);
1476 
1477     // Note:  lanewise(Binary) has two rudimentary broadcast
1478     // operations from an approximate scalar type (long).
1479     // We do both with that, here, for lanewise(Ternary).
1480     // The vector subtypes supply a full suite of
1481     // broadcasting and masked lanewise operations
1482     // for their specific ETYPEs:
1483     //   lanewise(Unary, [mask])
1484     //   lanewise(Binary, [e | v], [mask])
1485     //   lanewise(Ternary, [e1 | v1], [e2 | v2], [mask])
1486 
1487     /// Full-service binary ops: ADD, SUB, MUL, DIV
1488 
1489     // Full-service functions support all four variations
1490     // of vector vs. broadcast scalar, and mask vs. not.
1491     // The lanewise generic operator is (by this definition)
1492     // also a full-service function.
1493 
1494     // Other named functions handle just the one named
1495     // variation.  Most lanewise operations are *not* named,
1496     // and are reached only by lanewise.
1497 
1498     /**
1499      * Adds this vector to a second input vector.
1500      *
1501      * This is a lane-wise binary operation which applies
1502      * the primitive addition operation ({@code +})
1503      * to each pair of corresponding lane values.
1504      *
1505      * This method is also equivalent to the expression
1506      * {@link #lanewise(VectorOperators.Binary,Vector)
1507      *    lanewise}{@code (}{@link VectorOperators#ADD
1508      *    ADD}{@code , v)}.
1509      *
1510      * <p>
1511      * As a full-service named operation, this method
1512      * comes in masked and unmasked overloadings, and
1513      * (in subclasses) also comes in scalar-broadcast
1514      * overloadings (both masked and unmasked).
1515      *
1516      * @param v a second input vector
1517      * @return the result of adding this vector to the second input vector
1518      * @see #add(Vector,VectorMask)
1519      * @see IntVector#add(int)
1520      * @see VectorOperators#ADD
1521      * @see #lanewise(VectorOperators.Binary,Vector)
1522      * @see IntVector#lanewise(VectorOperators.Binary,int)
1523      */
1524     public abstract Vector<E> add(Vector<E> v);
1525 
1526     /**
1527      * Adds this vector to a second input vector, selecting lanes
1528      * under the control of a mask.
1529      *
1530      * This is a masked lane-wise binary operation which applies
1531      * the primitive addition operation ({@code +})
1532      * to each pair of corresponding lane values.
1533      *
1534      * For any lane unset in the mask, the primitive operation is
1535      * suppressed and this vector retains the original value stored in
1536      * that lane.
1537      *
1538      * This method is also equivalent to the expression
1539      * {@link #lanewise(VectorOperators.Binary,Vector,VectorMask)
1540      *    lanewise}{@code (}{@link VectorOperators#ADD
1541      *    ADD}{@code , v, m)}.
1542      *
1543      * <p>
1544      * As a full-service named operation, this method
1545      * comes in masked and unmasked overloadings, and
1546      * (in subclasses) also comes in scalar-broadcast
1547      * overloadings (both masked and unmasked).
1548      *
1549      * @param v the second input vector
1550      * @param m the mask controlling lane selection
1551      * @return the result of adding this vector to the given vector
1552      * @see #add(Vector)
1553      * @see IntVector#add(int,VectorMask)
1554      * @see VectorOperators#ADD
1555      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1556      * @see IntVector#lanewise(VectorOperators.Binary,int,VectorMask)
1557      */
1558     public abstract Vector<E> add(Vector<E> v, VectorMask<E> m);
1559 
1560     /**
1561      * Subtracts a second input vector from this vector.
1562      *
1563      * This is a lane-wise binary operation which applies
1564      * the primitive subtraction operation ({@code -})
1565      * to each pair of corresponding lane values.
1566      *
1567      * This method is also equivalent to the expression
1568      * {@link #lanewise(VectorOperators.Binary,Vector)
1569      *    lanewise}{@code (}{@link VectorOperators#SUB
1570      *    SUB}{@code , v)}.
1571      *
1572      * <p>
1573      * As a full-service named operation, this method
1574      * comes in masked and unmasked overloadings, and
1575      * (in subclasses) also comes in scalar-broadcast
1576      * overloadings (both masked and unmasked).
1577      *
1578      * @param v a second input vector
1579      * @return the result of subtracting the second input vector from this vector
1580      * @see #sub(Vector,VectorMask)
1581      * @see IntVector#sub(int)
1582      * @see VectorOperators#SUB
1583      * @see #lanewise(VectorOperators.Binary,Vector)
1584      * @see IntVector#lanewise(VectorOperators.Binary,int)
1585      */
1586     public abstract Vector<E> sub(Vector<E> v);
1587 
1588     /**
1589      * Subtracts a second input vector from this vector
1590      * under the control of a mask.
1591      *
1592      * This is a masked lane-wise binary operation which applies
1593      * the primitive subtraction operation ({@code -})
1594      * to each pair of corresponding lane values.
1595      *
1596      * For any lane unset in the mask, the primitive operation is
1597      * suppressed and this vector retains the original value stored in
1598      * that lane.
1599      *
1600      * This method is also equivalent to the expression
1601      * {@link #lanewise(VectorOperators.Binary,Vector,VectorMask)
1602      *    lanewise}{@code (}{@link VectorOperators#SUB
1603      *    SUB}{@code , v, m)}.
1604      *
1605      * <p>
1606      * As a full-service named operation, this method
1607      * comes in masked and unmasked overloadings, and
1608      * (in subclasses) also comes in scalar-broadcast
1609      * overloadings (both masked and unmasked).
1610      *
1611      * @param v the second input vector
1612      * @param m the mask controlling lane selection
1613      * @return the result of subtracting the second input vector from this vector
1614      * @see #sub(Vector)
1615      * @see IntVector#sub(int,VectorMask)
1616      * @see VectorOperators#SUB
1617      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1618      * @see IntVector#lanewise(VectorOperators.Binary,int,VectorMask)
1619      */
1620     public abstract Vector<E> sub(Vector<E> v, VectorMask<E> m);
1621 
1622     /**
1623      * Multiplies this vector by a second input vector.
1624      *
1625      * This is a lane-wise binary operation which applies
1626      * the primitive multiplication operation ({@code *})
1627      * to each pair of corresponding lane values.
1628      *
1629      * This method is also equivalent to the expression
1630      * {@link #lanewise(VectorOperators.Binary,Vector)
1631      *    lanewise}{@code (}{@link VectorOperators#MUL
1632      *    MUL}{@code , v)}.
1633      *
1634      * <p>
1635      * As a full-service named operation, this method
1636      * comes in masked and unmasked overloadings, and
1637      * (in subclasses) also comes in scalar-broadcast
1638      * overloadings (both masked and unmasked).
1639      *
1640      * @param v a second input vector
1641      * @return the result of multiplying this vector by the second input vector
1642      * @see #mul(Vector,VectorMask)
1643      * @see IntVector#mul(int)
1644      * @see VectorOperators#MUL
1645      * @see #lanewise(VectorOperators.Binary,Vector)
1646      * @see IntVector#lanewise(VectorOperators.Binary,int)
1647      */
1648     public abstract Vector<E> mul(Vector<E> v);
1649 
1650     /**
1651      * Multiplies this vector by a second input vector
1652      * under the control of a mask.
1653      *
1654      * This is a lane-wise binary operation which applies
1655      * the primitive multiplication operation ({@code *})
1656      * to each pair of corresponding lane values.
1657      *
1658      * For any lane unset in the mask, the primitive operation is
1659      * suppressed and this vector retains the original value stored in
1660      * that lane.
1661      *
1662      * This method is also equivalent to the expression
1663      * {@link #lanewise(VectorOperators.Binary,Vector,VectorMask)
1664      *    lanewise}{@code (}{@link VectorOperators#MUL
1665      *    MUL}{@code , v, m)}.
1666      *
1667      * <p>
1668      * As a full-service named operation, this method
1669      * comes in masked and unmasked overloadings, and
1670      * (in subclasses) also comes in scalar-broadcast
1671      * overloadings (both masked and unmasked).
1672      *
1673      * @param v the second input vector
1674      * @param m the mask controlling lane selection
1675      * @return the result of multiplying this vector by the given vector
1676      * @see #mul(Vector)
1677      * @see IntVector#mul(int,VectorMask)
1678      * @see VectorOperators#MUL
1679      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1680      * @see IntVector#lanewise(VectorOperators.Binary,int,VectorMask)
1681      */
1682     public abstract Vector<E> mul(Vector<E> v, VectorMask<E> m);
1683 
1684     /**
1685      * Divides this vector by a second input vector.
1686      *
1687      * This is a lane-wise binary operation which applies
1688      * the primitive division operation ({@code /})
1689      * to each pair of corresponding lane values.
1690      *
1691      * This method is also equivalent to the expression
1692      * {@link #lanewise(VectorOperators.Binary,Vector)
1693      *    lanewise}{@code (}{@link VectorOperators#DIV
1694      *    DIV}{@code , v)}.
1695      *
1696      * <p>
1697      * As a full-service named operation, this method
1698      * comes in masked and unmasked overloadings, and
1699      * (in subclasses) also comes in scalar-broadcast
1700      * overloadings (both masked and unmasked).
1701      *
1702      * @apiNote If the underlying scalar operator does not support
1703      * division by zero, but is presented with a zero divisor,
1704      * an {@code ArithmeticException} will be thrown.
1705      *
1706      * @param v a second input vector
1707      * @return the result of dividing this vector by the second input vector
1708      * @throws ArithmeticException if any lane
1709      *         in {@code v} is zero
1710      *         and {@code ETYPE} is not {@code float} or {@code double}.
1711      * @see #div(Vector,VectorMask)
1712      * @see DoubleVector#div(double)
1713      * @see VectorOperators#DIV
1714      * @see #lanewise(VectorOperators.Binary,Vector)
1715      * @see IntVector#lanewise(VectorOperators.Binary,int)
1716      */
1717     public abstract Vector<E> div(Vector<E> v);
1718 
1719     /**
1720      * Divides this vector by a second input vector
1721      * under the control of a mask.
1722      *
1723      * This is a lane-wise binary operation which applies
1724      * the primitive division operation ({@code /})
1725      * to each pair of corresponding lane values.
1726      *
1727      * For any lane unset in the mask, the primitive operation is
1728      * suppressed and this vector retains the original value stored in
1729      * that lane.
1730      *
1731      * This method is also equivalent to the expression
1732      * {@link #lanewise(VectorOperators.Binary,Vector,VectorMask)
1733      *    lanewise}{@code (}{@link VectorOperators#DIV
1734      *    DIV}{@code , v, m)}.
1735      *
1736      * <p>
1737      * As a full-service named operation, this method
1738      * comes in masked and unmasked overloadings, and
1739      * (in subclasses) also comes in scalar-broadcast
1740      * overloadings (both masked and unmasked).
1741      *
1742      * @apiNote If the underlying scalar operator does not support
1743      * division by zero, but is presented with a zero divisor,
1744      * an {@code ArithmeticException} will be thrown.
1745      *
1746      * @param v a second input vector
1747      * @param m the mask controlling lane selection
1748      * @return the result of dividing this vector by the second input vector
1749      * @throws ArithmeticException if any lane selected by {@code m}
1750      *         in {@code v} is zero
1751      *         and {@code ETYPE} is not {@code float} or {@code double}.
1752      * @see #div(Vector)
1753      * @see DoubleVector#div(double,VectorMask)
1754      * @see VectorOperators#DIV
1755      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1756      * @see DoubleVector#lanewise(VectorOperators.Binary,double,VectorMask)
1757      */
1758     public abstract Vector<E> div(Vector<E> v, VectorMask<E> m);
1759 
1760     /// END OF FULL-SERVICE BINARY METHODS
1761 
1762     /// Non-full-service unary ops: NEG, ABS
1763 
1764     /**
1765      * Negates this vector.
1766      *
1767      * This is a lane-wise unary operation which applies
1768      * the primitive negation operation ({@code -x})
1769      * to each input lane.
1770      *
1771      * This method is also equivalent to the expression
1772      * {@link #lanewise(VectorOperators.Unary)
1773      *    lanewise}{@code (}{@link VectorOperators#NEG
1774      *    NEG}{@code )}.
1775      *
1776      * @apiNote
1777      * This method has no masked variant, but the corresponding
1778      * masked operation can be obtained from the
1779      * {@linkplain #lanewise(VectorOperators.Unary,VectorMask)
1780      * lanewise method}.
1781      *
1782      * @return the negation of this vector
1783      * @see VectorOperators#NEG
1784      * @see #lanewise(VectorOperators.Unary)
1785      * @see #lanewise(VectorOperators.Unary,VectorMask)
1786      */
1787     public abstract Vector<E> neg();
1788 
1789     /**
1790      * Returns the absolute value of this vector.
1791      *
1792      * This is a lane-wise unary operation which applies
1793      * the method {@code Math.abs}
1794      * to each input lane.
1795      *
1796      * This method is also equivalent to the expression
1797      * {@link #lanewise(VectorOperators.Unary)
1798      *    lanewise}{@code (}{@link VectorOperators#ABS
1799      *    ABS}{@code )}.
1800      *
1801      * @apiNote
1802      * This method has no masked variant, but the corresponding
1803      * masked operation can be obtained from the
1804      * {@linkplain #lanewise(VectorOperators.Unary,VectorMask)
1805      * lanewise method}.
1806      *
1807      * @return the absolute value of this vector
1808      * @see VectorOperators#ABS
1809      * @see #lanewise(VectorOperators.Unary)
1810      * @see #lanewise(VectorOperators.Unary,VectorMask)
1811      */
1812     public abstract Vector<E> abs();
1813 
1814     /// Non-full-service binary ops: MIN, MAX
1815 
1816     /**
1817      * Computes the smaller of this vector and a second input vector.
1818      *
1819      * This is a lane-wise binary operation which applies the
1820      * operation {@code Math.min()} to each pair of
1821      * corresponding lane values.
1822      *
1823      * This method is also equivalent to the expression
1824      * {@link #lanewise(VectorOperators.Binary,Vector)
1825      *    lanewise}{@code (}{@link VectorOperators#MIN
1826      *    MIN}{@code , v)}.
1827      *
1828      * @apiNote
1829      * This is not a full-service named operation like
1830      * {@link #add(Vector) add()}.  A masked version of
1831      * this operation is not directly available
1832      * but may be obtained via the masked version of
1833      * {@code lanewise}.  Subclasses define an additional
1834      * scalar-broadcast overloading of this method.
1835      *
1836      * @param v a second input vector
1837      * @return the lanewise minimum of this vector and the second input vector
1838      * @see IntVector#min(int)
1839      * @see VectorOperators#MIN
1840      * @see #lanewise(VectorOperators.Binary,Vector)
1841      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1842      */
1843     public abstract Vector<E> min(Vector<E> v);
1844 
1845     /**
1846      * Computes the larger of this vector and a second input vector.
1847      *
1848      * This is a lane-wise binary operation which applies the
1849      * operation {@code Math.max()} to each pair of
1850      * corresponding lane values.
1851      *
1852      * This method is also equivalent to the expression
1853      * {@link #lanewise(VectorOperators.Binary,Vector)
1854      *    lanewise}{@code (}{@link VectorOperators#MAX
1855      *    MAX}{@code , v)}.
1856      *
1857      * <p>
1858      * This is not a full-service named operation like
1859      * {@link #add(Vector) add()}.  A masked version of
1860      * this operation is not directly available
1861      * but may be obtained via the masked version of
1862      * {@code lanewise}.  Subclasses define an additional
1863      * scalar-broadcast overloading of this method.
1864      *
1865      * @param v a second input vector
1866      * @return the lanewise maximum of this vector and the second input vector
1867      * @see IntVector#max(int)
1868      * @see VectorOperators#MAX
1869      * @see #lanewise(VectorOperators.Binary,Vector)
1870      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1871      */
1872     public abstract Vector<E> max(Vector<E> v);
1873 
1874     // Reductions
1875 
1876     /**
1877      * Returns a value accumulated from all the lanes of this vector.
1878      *
1879      * This is an associative cross-lane reduction operation which
1880      * applies the specified operation to all the lane elements.
1881      * The return value will be equal to this expression:
1882      * {@code (long) ((EVector)this).reduceLanes(op)}, where {@code EVector}
1883      * is the vector class specific to this vector's element type
1884      * {@code ETYPE}.
1885      * <p>
1886      * In the case of operations {@code ADD} and {@code MUL},
1887      * when {@code ETYPE} is {@code float} or {@code double},
1888      * the precise result, before casting, will reflect the choice
1889      * of an arbitrary order of operations, which may even vary over time.
1890      * For further details see the section
1891      * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
1892      *
1893      * @apiNote
1894      * If the {@code ETYPE} is {@code float} or {@code double},
1895      * this operation can lose precision and/or range, as a
1896      * normal part of casting the result down to {@code long}.
1897      *
1898      * Usually
1899      * {@linkplain IntVector#reduceLanes(VectorOperators.Associative)
1900      * strongly typed access}
1901      * is preferable, if you are working with a vector
1902      * subtype that has a known element type.
1903      *
1904      * @param op the operation used to combine lane values
1905      * @return the accumulated result, cast to {@code long}
1906      * @throws UnsupportedOperationException if this vector does
1907      *         not support the requested operation
1908      * @see #reduceLanesToLong(VectorOperators.Associative,VectorMask)
1909      * @see IntVector#reduceLanes(VectorOperators.Associative)
1910      * @see FloatVector#reduceLanes(VectorOperators.Associative)
1911      */
1912     public abstract long reduceLanesToLong(VectorOperators.Associative op);
1913 
1914     /**
1915      * Returns a value accumulated from selected lanes of this vector,
1916      * controlled by a mask.
1917      *
1918      * This is an associative cross-lane reduction operation which
1919      * applies the specified operation to the selected lane elements.
1920      * The return value will be equal to this expression:
1921      * {@code (long) ((EVector)this).reduceLanes(op, m)}, where {@code EVector}
1922      * is the vector class specific to this vector's element type
1923      * {@code ETYPE}.
1924      * <p>
1925      * If no elements are selected, an operation-specific identity
1926      * value is returned.
1927      * <ul>
1928      * <li>
1929      * If the operation is {@code ADD}, {@code XOR}, or {@code OR},
1930      * then the identity value is zero.
1931      * <li>
1932      * If the operation is {@code MUL},
1933      * then the identity value is one.
1934      * <li>
1935      * If the operation is {@code AND},
1936      * then the identity value is minus one (all bits set).
1937      * <li>
1938      * If the operation is {@code MAX},
1939      * then the identity value is the {@code MIN_VALUE}
1940      * of the vector's native {@code ETYPE}.
1941      * (In the case of floating point types, the value
1942      * {@code NEGATIVE_INFINITY} is used, and will appear
1943      * after casting as {@code Long.MIN_VALUE}.
1944      * <li>
1945      * If the operation is {@code MIN},
1946      * then the identity value is the {@code MAX_VALUE}
1947      * of the vector's native {@code ETYPE}.
1948      * (In the case of floating point types, the value
1949      * {@code POSITIVE_INFINITY} is used, and will appear
1950      * after casting as {@code Long.MAX_VALUE}.
1951      * </ul>
1952      * <p>
1953      * In the case of operations {@code ADD} and {@code MUL},
1954      * when {@code ETYPE} is {@code float} or {@code double},
1955      * the precise result, before casting, will reflect the choice
1956      * of an arbitrary order of operations, which may even vary over time.
1957      * For further details see the section
1958      * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
1959      *
1960      * @apiNote
1961      * If the {@code ETYPE} is {@code float} or {@code double},
1962      * this operation can lose precision and/or range, as a
1963      * normal part of casting the result down to {@code long}.
1964      *
1965      * Usually
1966      * {@linkplain IntVector#reduceLanes(VectorOperators.Associative,VectorMask)
1967      * strongly typed access}
1968      * is preferable, if you are working with a vector
1969      * subtype that has a known element type.
1970      *
1971      * @param op the operation used to combine lane values
1972      * @param m the mask controlling lane selection
1973      * @return the reduced result accumulated from the selected lane values
1974      * @throws UnsupportedOperationException if this vector does
1975      *         not support the requested operation
1976      * @see #reduceLanesToLong(VectorOperators.Associative)
1977      * @see IntVector#reduceLanes(VectorOperators.Associative,VectorMask)
1978      * @see FloatVector#reduceLanes(VectorOperators.Associative,VectorMask)
1979      */
1980     public abstract long reduceLanesToLong(VectorOperators.Associative op,
1981                                            VectorMask<E> m);
1982 
1983     // Lanewise unary tests
1984 
1985     /**
1986      * Tests the lanes of this vector
1987      * according to the given operation.
1988      *
1989      * This is a lane-wise unary test operation which applies
1990      * the given test operation
1991      * to each lane value.
1992      * @param op the operation used to test lane values
1993      * @return the mask result of testing the lanes of this vector,
1994      *         according to the selected test operator
1995      * @see VectorOperators.Comparison
1996      * @see #test(VectorOperators.Test, VectorMask)
1997      * @see #compare(VectorOperators.Comparison, Vector)
1998      */
1999     public abstract VectorMask<E> test(VectorOperators.Test op);
2000 
2001     /**
2002      * Test selected lanes of this vector,
2003      * according to the given operation.
2004      *
2005      * This is a masked lane-wise unary test operation which applies
2006      * the given test operation
2007      * to each lane value.
2008      *
2009      * The returned result is equal to the expression
2010      * {@code test(op).and(m)}.
2011      *
2012      * @param op the operation used to test lane values
2013      * @param m the mask controlling lane selection
2014      * @return the mask result of testing the lanes of this vector,
2015      *         according to the selected test operator,
2016      *         and only in the lanes selected by the mask
2017      * @see #test(VectorOperators.Test)
2018      */
2019     public abstract VectorMask<E> test(VectorOperators.Test op,
2020                                        VectorMask<E> m);
2021 
2022     // Comparisons
2023 
2024     /**
2025      * Tests if this vector is equal to another input vector.
2026      *
2027      * This is a lane-wise binary test operation which applies
2028      * the primitive equals operation ({@code ==})
2029      * to each pair of corresponding lane values.
2030      * The result is the same as {@code compare(VectorOperators.EQ, v)}.
2031      *
2032      * @param v a second input vector
2033      * @return the mask result of testing lane-wise if this vector
2034      *         equal to the second input vector
2035      * @see #compare(VectorOperators.Comparison,Vector)
2036      * @see VectorOperators#EQ
2037      * @see #equals
2038      */
2039     public abstract VectorMask<E> eq(Vector<E> v);
2040 
2041     /**
2042      * Tests if this vector is less than another input vector.
2043      *
2044      * This is a lane-wise binary test operation which applies
2045      * the primitive less-than operation ({@code <}) to each lane.
2046      * The result is the same as {@code compare(VectorOperators.LT, v)}.
2047      *
2048      * @param v a second input vector
2049      * @return the mask result of testing lane-wise if this vector
2050      *         is less than the second input vector
2051      * @see #compare(VectorOperators.Comparison,Vector)
2052      * @see VectorOperators#LT
2053      */
2054     public abstract VectorMask<E> lt(Vector<E> v);
2055 
2056     /**
2057      * Tests this vector by comparing it with another input vector,
2058      * according to the given comparison operation.
2059      *
2060      * This is a lane-wise binary test operation which applies
2061      * the given comparison operation
2062      * to each pair of corresponding lane values.
2063      *
2064      * @param op the operation used to compare lane values
2065      * @param v a second input vector
2066      * @return the mask result of testing lane-wise if this vector
2067      *         compares to the input, according to the selected
2068      *         comparison operator
2069      * @see #eq(Vector)
2070      * @see #lt(Vector)
2071      * @see VectorOperators.Comparison
2072      * @see #compare(VectorOperators.Comparison, Vector, VectorMask)
2073      * @see #test(VectorOperators.Test)
2074      */
2075     public abstract VectorMask<E> compare(VectorOperators.Comparison op,
2076                                           Vector<E> v);
2077 
2078     /**
2079      * Tests this vector by comparing it with another input vector,
2080      * according to the given comparison operation,
2081      * in lanes selected by a mask.
2082      *
2083      * This is a masked lane-wise binary test operation which applies
2084      * the given comparison operation
2085      * to each pair of corresponding lane values.
2086      *
2087      * The returned result is equal to the expression
2088      * {@code compare(op,v).and(m)}.
2089      *
2090      * @param op the operation used to compare lane values
2091      * @param v a second input vector
2092      * @param m the mask controlling lane selection
2093      * @return the mask result of testing lane-wise if this vector
2094      *         compares to the input, according to the selected
2095      *         comparison operator,
2096      *         and only in the lanes selected by the mask
2097      * @see #compare(VectorOperators.Comparison, Vector)
2098      */
2099     public abstract VectorMask<E> compare(VectorOperators.Comparison op,
2100                                           Vector<E> v,
2101                                           VectorMask<E> m);
2102 
2103     /**
2104      * Tests this vector by comparing it with an input scalar,
2105      * according to the given comparison operation.
2106      *
2107      * This is a lane-wise binary test operation which applies
2108      * the given comparison operation
2109      * to each lane value, paired with the broadcast value.
2110      *
2111      * <p>
2112      * The result is the same as
2113      * {@code this.compare(op, this.broadcast(e))}.
2114      * That is, the scalar may be regarded as broadcast to
2115      * a vector of the same species, and then compared
2116      * against the original vector, using the selected
2117      * comparison operation.
2118      *
2119      * @apiNote
2120      * The {@code long} value {@code e} must be accurately
2121      * representable by the {@code ETYPE} of this vector's species,
2122      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
2123      * by the implicit call to {@code broadcast()}.
2124      * <p>
2125      * Subtypes improve on this method by sharpening
2126      * the type of the scalar parameter {@code e}.
2127      *
2128      * @param op the operation used to compare lane values
2129      * @param e the input scalar
2130      * @return the mask result of testing lane-wise if this vector
2131      *         compares to the input, according to the selected
2132      *         comparison operator
2133      * @throws IllegalArgumentException
2134      *         if the given {@code long} value cannot
2135      *         be represented by the vector's {@code ETYPE}
2136      * @see #broadcast(long)
2137      * @see #compare(VectorOperators.Comparison,Vector)
2138      */
2139     public abstract VectorMask<E> compare(VectorOperators.Comparison op,
2140                                           long e);
2141 
2142     /**
2143      * Tests this vector by comparing it with an input scalar,
2144      * according to the given comparison operation,
2145      * in lanes selected by a mask.
2146      *
2147      * This is a masked lane-wise binary test operation which applies
2148      * the given comparison operation
2149      * to each lane value, paired with the broadcast value.
2150      *
2151      * The returned result is equal to the expression
2152      * {@code compare(op,e).and(m)}.
2153      *
2154      * @apiNote
2155      * The {@code long} value {@code e} must be accurately
2156      * representable by the {@code ETYPE} of this vector's species,
2157      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
2158      * by the implicit call to {@code broadcast()}.
2159      * <p>
2160      * Subtypes improve on this method by sharpening
2161      * the type of the scalar parameter {@code e}.
2162      *
2163      * @param op the operation used to compare lane values
2164      * @param e the input scalar
2165      * @param m the mask controlling lane selection
2166      * @return the mask result of testing lane-wise if this vector
2167      *         compares to the input, according to the selected
2168      *         comparison operator,
2169      *         and only in the lanes selected by the mask
2170      * @throws IllegalArgumentException
2171      *         if the given {@code long} value cannot
2172      *         be represented by the vector's {@code ETYPE}
2173      * @see #broadcast(long)
2174      * @see #compare(VectorOperators.Comparison,Vector)
2175      */
2176     public abstract VectorMask<E> compare(VectorOperators.Comparison op,
2177                                           long e,
2178                                           VectorMask<E> m);
2179 
2180     /**
2181      * Replaces selected lanes of this vector with
2182      * corresponding lanes from a second input vector
2183      * under the control of a mask.
2184      *
2185      * This is a masked lane-wise binary operation which
2186      * selects each lane value from one or the other input.
2187      *
2188      * <ul>
2189      * <li>
2190      * For any lane <em>set</em> in the mask, the new lane value
2191      * is taken from the second input vector, and replaces
2192      * whatever value was in the that lane of this vector.
2193      * <li>
2194      * For any lane <em>unset</em> in the mask, the replacement is
2195      * suppressed and this vector retains the original value stored in
2196      * that lane.
2197      * </ul>
2198      *
2199      * The following pseudocode illustrates this behavior:
2200      * <pre>{@code
2201      * Vector<E> a = ...;
2202      * VectorSpecies<E> species = a.species();
2203      * Vector<E> b = ...;
2204      * b.check(species);
2205      * VectorMask<E> m = ...;
2206      * ETYPE[] ar = a.toArray();
2207      * for (int i = 0; i < ar.length; i++) {
2208      *     if (m.laneIsSet(i)) {
2209      *         ar[i] = b.lane(i);
2210      *     }
2211      * }
2212      * return EVector.fromArray(s, ar, 0);
2213      * }</pre>
2214      *
2215      * @param v the second input vector, containing replacement lane values
2216      * @param m the mask controlling lane selection from the second input vector
2217      * @return the result of blending the lane elements of this vector with
2218      *         those of the second input vector
2219      */
2220     public abstract Vector<E> blend(Vector<E> v, VectorMask<E> m);
2221 
2222     /**
2223      * Replaces selected lanes of this vector with
2224      * a scalar value
2225      * under the control of a mask.
2226      *
2227      * This is a masked lane-wise binary operation which
2228      * selects each lane value from one or the other input.
2229      *
2230      * The returned result is equal to the expression
2231      * {@code blend(broadcast(e),m)}.
2232      *
2233      * @apiNote
2234      * The {@code long} value {@code e} must be accurately
2235      * representable by the {@code ETYPE} of this vector's species,
2236      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
2237      * by the implicit call to {@code broadcast()}.
2238      * <p>
2239      * Subtypes improve on this method by sharpening
2240      * the type of the scalar parameter {@code e}.
2241      *
2242      * @param e the input scalar, containing the replacement lane value
2243      * @param m the mask controlling lane selection of the scalar
2244      * @return the result of blending the lane elements of this vector with
2245      *         the scalar value
2246      */
2247     public abstract Vector<E> blend(long e, VectorMask<E> m);
2248 
2249     /**
2250      * Adds the lanes of this vector to their corresponding
2251      * lane numbers, scaled by a given constant.
2252      *
2253      * This is a lane-wise unary operation which, for
2254      * each lane {@code N}, computes the scaled index value
2255      * {@code N*scale} and adds it to the value already
2256      * in lane {@code N} of the current vector.
2257      *
2258      * <p> The scale must not be so large, and the element size must
2259      * not be so small, that that there would be an overflow when
2260      * computing any of the {@code N*scale} or {@code VLENGTH*scale},
2261      * when the the result is represented using the vector
2262      * lane type {@code ETYPE}.
2263      *
2264      * <p>
2265      * The following pseudocode illustrates this behavior:
2266      * <pre>{@code
2267      * Vector<E> a = ...;
2268      * VectorSpecies<E> species = a.species();
2269      * ETYPE[] ar = a.toArray();
2270      * for (int i = 0; i < ar.length; i++) {
2271      *     long d = (long)i * scale;
2272      *     if (d != (ETYPE) d)  throw ...;
2273      *     ar[i] += (ETYPE) d;
2274      * }
2275      * long d = (long)ar.length * scale;
2276      * if (d != (ETYPE) d)  throw ...;
2277      * return EVector.fromArray(s, ar, 0);
2278      * }</pre>
2279      *
2280      * @param scale the number to multiply by each lane index
2281      *        {@code N}, typically {@code 1}
2282      * @return the result of incrementing each lane element by its
2283      *         corresponding lane index {@code N}, scaled by {@code scale}
2284      * @throws IllegalArgumentException
2285      *         if the values in the interval
2286      *         {@code [0..VLENGTH*scale]}
2287      *         are not representable by the {@code ETYPE}
2288      */
2289     public abstract Vector<E> addIndex(int scale);
2290 
2291     // Slicing segments of adjacent lanes
2292 
2293     /**
2294      * Slices a segment of adjacent lanes, starting at a given
2295      * {@code origin} lane in the current vector, and continuing (as
2296      * needed) into an immediately following vector.  The block of
2297      * {@code VLENGTH} lanes is extracted into its own vector and
2298      * returned.
2299      *
2300      * <p> This is a cross-lane operation that shifts lane elements
2301      * to the front, from the current vector and the second vector.
2302      * Both vectors can be viewed as a combined "background" of length
2303      * {@code 2*VLENGTH}, from which a slice is extracted.
2304      *
2305      * The lane numbered {@code N} in the output vector is copied
2306      * from lane {@code origin+N} of the input vector, if that
2307      * lane exists, else from lane {@code origin+N-VLENGTH} of
2308      * the second vector (which is guaranteed to exist).
2309      *
2310      * <p> The {@code origin} value must be in the inclusive range
2311      * {@code 0..VLENGTH}.  As limiting cases, {@code v.slice(0,w)}
2312      * and {@code v.slice(VLENGTH,w)} return {@code v} and {@code w},
2313      * respectively.
2314      *
2315      * @apiNote
2316      *
2317      * This method may be regarded as the inverse of
2318      * {@link #unslice(int,Vector,int) unslice()},
2319      * in that the sliced value could be unsliced back into its
2320      * original position in the two input vectors, without
2321      * disturbing unrelated elements, as in the following
2322      * pseudocode:
2323      * <pre>{@code
2324      * EVector slice = v1.slice(origin, v2);
2325      * EVector w1 = slice.unslice(origin, v1, 0);
2326      * EVector w2 = slice.unslice(origin, v2, 1);
2327      * assert v1.equals(w1);
2328      * assert v2.equals(w2);
2329      * }</pre>
2330      *
2331      * <p> This method also supports a variety of cross-lane shifts and
2332      * rotates as follows:
2333      * <ul>
2334      *
2335      * <li>To shift lanes forward to the front of the vector, supply a
2336      * zero vector for the second operand and specify the shift count
2337      * as the origin.  For example: {@code v.slice(shift, v.broadcast(0))}.
2338      *
2339      * <li>To shift lanes backward to the back of the vector, supply a
2340      * zero vector for the <em>first</em> operand, and specify the
2341      * negative shift count as the origin (modulo {@code VLENGTH}.
2342      * For example: {@code v.broadcast(0).slice(v.length()-shift, v)}.
2343      *
2344      * <li>To rotate lanes forward toward the front end of the vector,
2345      * cycling the earliest lanes around to the back, supply the same
2346      * vector for both operands and specify the rotate count as the
2347      * origin.  For example: {@code v.slice(rotate, v)}.
2348      *
2349      * <li>To rotate lanes backward toward the back end of the vector,
2350      * cycling the latest lanes around to the front, supply the same
2351      * vector for both operands and specify the negative of the rotate
2352      * count (modulo {@code VLENGTH}) as the origin.  For example:
2353      * {@code v.slice(v.length() - rotate, v)}.
2354      *
2355      * <li>
2356      * Since {@code origin} values less then zero or more than
2357      * {@code VLENGTH} will be rejected, if you need to rotate
2358      * by an unpredictable multiple of {@code VLENGTH}, be sure
2359      * to reduce the origin value into the required range.
2360      * The {@link VectorSpecies#loopBound(int) loopBound()}
2361      * method can help with this.  For example:
2362      * {@code v.slice(rotate - v.species().loopBound(rotate), v)}.
2363      *
2364      * </ul>
2365      *
2366      * @param origin the first input lane to transfer into the slice
2367      * @param v1 a second vector logically concatenated with the first,
2368      *        before the slice is taken (if omitted it defaults to zero)
2369      * @return a contiguous slice of {@code VLENGTH} lanes, taken from
2370      *         this vector starting at the indicated origin, and
2371      *         continuing (as needed) into the second vector
2372      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2373      *         is negative or greater than {@code VLENGTH}
2374      * @see #slice(int,Vector,VectorMask)
2375      * @see #slice(int)
2376      * @see #unslice(int,Vector,int)
2377      */
2378     public abstract Vector<E> slice(int origin, Vector<E> v1);
2379 
2380     /**
2381      * Slices a segment of adjacent lanes
2382      * under the control of a mask,
2383      * starting at a given
2384      * {@code origin} lane in the current vector, and continuing (as
2385      * needed) into an immediately following vector.  The block of
2386      * {@code VLENGTH} lanes is extracted into its own vector and
2387      * returned.
2388      *
2389      * The resulting vector will be zero in all lanes unset in the
2390      * given mask.  Lanes set in the mask will contain data copied
2391      * from selected lanes of {@code this} or {@code v1}.
2392      *
2393      * <p> This is a cross-lane operation that shifts lane elements
2394      * to the front, from the current vector and the second vector.
2395      * Both vectors can be viewed as a combined "background" of length
2396      * {@code 2*VLENGTH}, from which a slice is extracted.
2397      *
2398      * The returned result is equal to the expression
2399      * {@code broadcast(0).blend(slice(origin,v1),m)}.
2400      *
2401      * @apiNote
2402      * This method may be regarded as the inverse of
2403      * {@code #unslice(int,Vector,int,VectorMask) unslice()},
2404      * in that the sliced value could be unsliced back into its
2405      * original position in the two input vectors, without
2406      * disturbing unrelated elements, as in the following
2407      * pseudocode:
2408      * <pre>{@code
2409      * EVector slice = v1.slice(origin, v2, m);
2410      * EVector w1 = slice.unslice(origin, v1, 0, m);
2411      * EVector w2 = slice.unslice(origin, v2, 1, m);
2412      * assert v1.equals(w1);
2413      * assert v2.equals(w2);
2414      * }</pre>
2415      *
2416      * @param origin the first input lane to transfer into the slice
2417      * @param v1 a second vector logically concatenated with the first,
2418      *        before the slice is taken (if omitted it defaults to zero)
2419      * @param m the mask controlling lane selection into the resulting vector
2420      * @return a contiguous slice of {@code VLENGTH} lanes, taken from
2421      *         this vector starting at the indicated origin, and
2422      *         continuing (as needed) into the second vector
2423      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2424      *         is negative or greater than {@code VLENGTH}
2425      * @see #slice(int,Vector)
2426      * @see #unslice(int,Vector,int,VectorMask)
2427      */
2428     // This doesn't pull its weight, but its symmetrical with
2429     // masked unslice, and might cause questions if missing.
2430     // It could make for clearer code.
2431     public abstract Vector<E> slice(int origin, Vector<E> v1, VectorMask<E> m);
2432 
2433     /**
2434      * Slices a segment of adjacent lanes, starting at a given
2435      * {@code origin} lane in the current vector.  A block of
2436      * {@code VLENGTH} lanes, possibly padded with zero lanes, is
2437      * extracted into its own vector and returned.
2438      *
2439      * This is a convenience method which slices from a single
2440      * vector against an extended background of zero lanes.
2441      * It is equivalent to
2442      * {@link #slice(int,Vector) slice}{@code
2443      * (origin, }{@link #broadcast(long) broadcast}{@code (0))}.
2444      * It may also be viewed simply as a cross-lane shift
2445      * from later to earlier lanes, with zeroes filling
2446      * in the vacated lanes at the end of the vector.
2447      * In this view, the shift count is {@code origin}.
2448      *
2449      * @param origin the first input lane to transfer into the slice
2450      * @return the last {@code VLENGTH-origin} input lanes,
2451      *         placed starting in the first lane of the ouput,
2452      *         padded at the end with zeroes
2453      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2454      *         is negative or greater than {@code VLENGTH}
2455      * @see #slice(int,Vector)
2456      * @see #unslice(int,Vector,int)
2457      */
2458     // This API point pulls its weight as a teaching aid,
2459     // though it's a one-off and broadcast(0) is easy.
2460     public abstract Vector<E> slice(int origin);
2461 
2462     /**
2463      * Reverses a {@linkplain #slice(int,Vector) slice()}, inserting
2464      * the current vector as a slice within another "background" input
2465      * vector, which is regarded as one or the other input to a
2466      * hypothetical subsequent {@code slice()} operation.
2467      *
2468      * <p> This is a cross-lane operation that permutes the lane
2469      * elements of the current vector toward the back and inserts them
2470      * into a logical pair of background vectors.  Only one of the
2471      * pair will be returned, however.  The background is formed by
2472      * duplicating the second input vector.  (However, the output will
2473      * never contain two duplicates from the same input lane.)
2474      *
2475      * The lane numbered {@code N} in the input vector is copied into
2476      * lane {@code origin+N} of the first background vector, if that
2477      * lane exists, else into lane {@code origin+N-VLENGTH} of the
2478      * second background vector (which is guaranteed to exist).
2479      *
2480      * The first or second background vector, updated with the
2481      * inserted slice, is returned.  The {@code part} number of zero
2482      * or one selects the first or second updated background vector.
2483      *
2484      * <p> The {@code origin} value must be in the inclusive range
2485      * {@code 0..VLENGTH}.  As limiting cases, {@code v.unslice(0,w,0)}
2486      * and {@code v.unslice(VLENGTH,w,1)} both return {@code v}, while
2487      * {@code v.unslice(0,w,1)} and {@code v.unslice(VLENGTH,w,0)}
2488      * both return {@code w}.
2489      *
2490      * @apiNote
2491      * This method supports a variety of cross-lane insertion
2492      * operations as follows:
2493      * <ul>
2494      *
2495      * <li>To insert near the end of a background vector {@code w}
2496      * at some offset, specify the offset as the origin and
2497      * select part zero. For example: {@code v.unslice(offset, w, 0)}.
2498      *
2499      * <li>To insert near the end of a background vector {@code w},
2500      * but capturing the overflow into the next vector {@code x},
2501      * specify the offset as the origin and select part one.
2502      * For example: {@code v.unslice(offset, x, 1)}.
2503      *
2504      * <li>To insert the last {@code N} items near the beginning
2505      * of a background vector {@code w}, supply a {@code VLENGTH-N}
2506      * as the origin and select part one.
2507      * For example: {@code v.unslice(v.length()-N, w)}.
2508      *
2509      * </ul>
2510      *
2511      * @param origin the first output lane to receive the slice
2512      * @param w the background vector that (as two copies) will receive
2513      *        the inserted slice
2514      * @param part the part number of the result (either zero or one)
2515      * @return either the first or second part of a pair of
2516      *         background vectors {@code w}, updated by inserting
2517      *         this vector at the indicated origin
2518      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2519      *         is negative or greater than {@code VLENGTH},
2520      *         or if {@code part} is not zero or one
2521      * @see #slice(int,Vector)
2522      * @see #unslice(int,Vector,int,VectorMask)
2523      */
2524     public abstract Vector<E> unslice(int origin, Vector<E> w, int part);
2525 
2526     /**
2527      * Reverses a {@linkplain #slice(int,Vector) slice()}, inserting
2528      * (under the control of a mask)
2529      * the current vector as a slice within another "background" input
2530      * vector, which is regarded as one or the other input to a
2531      * hypothetical subsequent {@code slice()} operation.
2532      *
2533      * <p> This is a cross-lane operation that permutes the lane
2534      * elements of the current vector forward and inserts its lanes
2535      * (when selected by the mask) into a logical pair of background
2536      * vectors.  As with the
2537      * {@linkplain #unslice(int,Vector,int) unmasked version} of this method,
2538      * only one of the pair will be returned, as selected by the
2539      * {@code part} number.
2540      *
2541      * For each lane {@code N} selected by the mask, the lane value
2542      * is copied into
2543      * lane {@code origin+N} of the first background vector, if that
2544      * lane exists, else into lane {@code origin+N-VLENGTH} of the
2545      * second background vector (which is guaranteed to exist).
2546      * Background lanes retain their original values if the
2547      * corresponding input lanes {@code N} are unset in the mask.
2548      *
2549      * The first or second background vector, updated with set lanes
2550      * of the inserted slice, is returned.  The {@code part} number of
2551      * zero or one selects the first or second updated background
2552      * vector.
2553      *
2554      * @param origin the first output lane to receive the slice
2555      * @param w the background vector that (as two copies) will receive
2556      *        the inserted slice, if they are set in {@code m}
2557      * @param part the part number of the result (either zero or one)
2558      * @param m the mask controlling lane selection from the current vector
2559      * @return either the first or second part of a pair of
2560      *         background vectors {@code w}, updated by inserting
2561      *         selected lanes of this vector at the indicated origin
2562      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2563      *         is negative or greater than {@code VLENGTH},
2564      *         or if {@code part} is not zero or one
2565      * @see #unslice(int,Vector,int)
2566      * @see #slice(int,Vector)
2567      */
2568     public abstract Vector<E> unslice(int origin, Vector<E> w, int part, VectorMask<E> m);
2569 
2570     /**
2571      * Reverses a {@linkplain #slice(int) slice()}, inserting
2572      * the current vector as a slice within a "background" input
2573      * of zero lane values.  Compared to other {@code unslice()}
2574      * methods, this method only returns the first of the
2575      * pair of background vectors.
2576      *
2577      * This is a convenience method which returns the result of
2578      * {@link #unslice(int,Vector,int) unslice}{@code
2579      * (origin, }{@link #broadcast(long) broadcast}{@code (0), 0)}.
2580      * It may also be viewed simply as a cross-lane shift
2581      * from earlier to later lanes, with zeroes filling
2582      * in the vacated lanes at the beginning of the vector.
2583      * In this view, the shift count is {@code origin}.
2584      *
2585      * @param origin the first output lane to receive the slice
2586      * @return the first {@code VLENGTH-origin} input lanes,
2587      *         placed starting at the given origin,
2588      *         padded at the beginning with zeroes
2589      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2590      *         is negative or greater than {@code VLENGTH}
2591      * @see #unslice(int,Vector,int)
2592      * @see #slice(int)
2593      */
2594     // This API point pulls its weight as a teaching aid,
2595     // though it's a one-off and broadcast(0) is easy.
2596     public abstract Vector<E> unslice(int origin);
2597 
2598     // ISSUE: Add a slice which uses a mask instead of an origin?
2599     //public abstract Vector<E> slice(VectorMask<E> support);
2600 
2601     // ISSUE: Add some more options for questionable edge conditions?
2602     // We might define enum EdgeOption { ERROR, ZERO, WRAP } for the
2603     // default of throwing AIOOBE, or substituting zeroes, or just
2604     // reducing the out-of-bounds index modulo VLENGTH.  Similar
2605     // concerns also apply to general Shuffle operations.  For now,
2606     // just support ERROR, since that is safest.
2607 
2608     /**
2609      * Rearranges the lane elements of this vector, selecting lanes
2610      * under the control of a specific shuffle.
2611      *
2612      * This is a cross-lane operation that rearranges the lane
2613      * elements of this vector.
2614      *
2615      * For each lane {@code N} of the shuffle, and for each lane
2616      * source index {@code I=s.laneSource(N)} in the shuffle,
2617      * the output lane {@code N} obtains the value from
2618      * the input vector at lane {@code I}.
2619      *
2620      * @param s the shuffle controlling lane index selection
2621      * @return the rearrangement of the lane elements of this vector
2622      * @throws IndexOutOfBoundsException if there are any exceptional
2623      *        source indexes in the shuffle
2624      * @see #rearrange(VectorShuffle,VectorMask)
2625      * @see #rearrange(VectorShuffle,Vector)
2626      * @see VectorShuffle#laneIsValid()
2627      */
2628     public abstract Vector<E> rearrange(VectorShuffle<E> s);
2629 
2630     /**
2631      * Rearranges the lane elements of this vector, selecting lanes
2632      * under the control of a specific shuffle and a mask.
2633      *
2634      * This is a cross-lane operation that rearranges the lane
2635      * elements of this vector.
2636      *
2637      * For each lane {@code N} of the shuffle, and for each lane
2638      * source index {@code I=s.laneSource(N)} in the shuffle,
2639      * the output lane {@code N} obtains the value from
2640      * the input vector at lane {@code I} if the mask is set.
2641      * Otherwise the output lane {@code N} is set to zero.
2642      *
2643      * <p> This method returns the value of this pseudocode:
2644      * <pre>{@code
2645      * Vector<E> r = this.rearrange(s.wrapIndexes());
2646      * VectorMask<E> valid = s.laneIsValid();
2647      * if (m.andNot(valid).anyTrue()) throw ...;
2648      * return broadcast(0).blend(r, m);
2649      * }</pre>
2650      *
2651      * @param s the shuffle controlling lane index selection
2652      * @param m the mask controlling application of the shuffle
2653      * @return the rearrangement of the lane elements of this vector
2654      * @throws IndexOutOfBoundsException if there are any exceptional
2655      *        source indexes in the shuffle where the mask is set
2656      * @see #rearrange(VectorShuffle)
2657      * @see #rearrange(VectorShuffle,Vector)
2658      * @see VectorShuffle#laneIsValid()
2659      */
2660     public abstract Vector<E> rearrange(VectorShuffle<E> s, VectorMask<E> m);
2661 
2662     /**
2663      * Rearranges the lane elements of two vectors, selecting lanes
2664      * under the control of a specific shuffle, using both normal and
2665      * exceptional indexes in the shuffle to steer data.
2666      *
2667      * This is a cross-lane operation that rearranges the lane
2668      * elements of the two input vectors (the current vector
2669      * and a second vector {@code v}).
2670      *
2671      * For each lane {@code N} of the shuffle, and for each lane
2672      * source index {@code I=s.laneSource(N)} in the shuffle,
2673      * the output lane {@code N} obtains the value from
2674      * the first vector at lane {@code I} if {@code I>=0}.
2675      * Otherwise, the exceptional index {@code I} is wrapped
2676      * by adding {@code VLENGTH} to it and used to index
2677      * the <em>second</em> vector, at index {@code I+VLENGTH}.
2678      *
2679      * <p> This method returns the value of this pseudocode:
2680      * <pre>{@code
2681      * Vector<E> r1 = this.rearrange(s.wrapIndexes());
2682      * // or else: r1 = this.rearrange(s, s.laneIsValid());
2683      * Vector<E> r2 = v.rearrange(s.wrapIndexes());
2684      * return r2.blend(r1,s.laneIsValid());
2685      * }</pre>
2686      *
2687      * @param s the shuffle controlling lane selection from both input vectors
2688      * @param v the second input vector
2689      * @return the rearrangement of lane elements of this vector and
2690      *         a second input vector
2691      * @see #rearrange(VectorShuffle)
2692      * @see #rearrange(VectorShuffle,VectorMask)
2693      * @see VectorShuffle#laneIsValid()
2694      * @see #slice(int,Vector)
2695      */
2696     public abstract Vector<E> rearrange(VectorShuffle<E> s, Vector<E> v);
2697 
2698     /**
2699      * Compresses the lane elements of this vector selecting lanes
2700      * under the control of a specific mask.
2701      *
2702      * This is a cross-lane operation that compresses the lane
2703      * elements of this vector as selected by the specified mask.
2704      *
2705      * For each lane {@code N} of the mask, if the mask at
2706      * lane {@code N} is set, the element at lane {@code N}
2707      * of input vector is selected and stored into the output
2708      * vector contiguously starting from the lane {@code 0}.
2709      * All the upper remaining lanes, if any, of the output
2710      * vector are set to zero.
2711      *
2712      * @param m the mask controlling the compression
2713      * @return the compressed lane elements of this vector
2714      */
2715     public abstract Vector<E> compress(VectorMask<E> m);
2716 
2717     /**
2718      * Expands the lane elements of this vector
2719      * under the control of a specific mask.
2720      *
2721      * This is a cross-lane operation that expands the contiguous lane
2722      * elements of this vector into lanes of an output vector
2723      * as selected by the specified mask.
2724      *
2725      * For each lane {@code N} of the mask, if the mask at
2726      * lane {@code N} is set, the next contiguous element of input vector
2727      * starting from lane {@code 0} is selected and stored into the output
2728      * vector at lane {@code N}.
2729      * All the remaining lanes, if any, of the output vector are set to zero.
2730      *
2731      * @param m the mask controlling the compression
2732      * @return the expanded lane elements of this vector
2733      */
2734     public abstract Vector<E> expand(VectorMask<E> m);
2735 
2736     /**
2737      * Using index values stored in the lanes of this vector,
2738      * assemble values stored in second vector {@code v}.
2739      * The second vector thus serves as a table, whose
2740      * elements are selected by indexes in the current vector.
2741      *
2742      * This is a cross-lane operation that rearranges the lane
2743      * elements of the argument vector, under the control of
2744      * this vector.
2745      *
2746      * For each lane {@code N} of this vector, and for each lane
2747      * value {@code I=this.lane(N)} in this vector,
2748      * the output lane {@code N} obtains the value from
2749      * the argument vector at lane {@code I}.
2750      *
2751      * In this way, the result contains only values stored in the
2752      * argument vector {@code v}, but presented in an order which
2753      * depends on the index values in {@code this}.
2754      *
2755      * The result is the same as the expression
2756      * {@code v.rearrange(this.toShuffle())}.
2757      *
2758      * @param v the vector supplying the result values
2759      * @return the rearrangement of the lane elements of {@code v}
2760      * @throws IndexOutOfBoundsException if any invalid
2761      *         source indexes are found in {@code this}
2762      * @see #rearrange(VectorShuffle)
2763      */
2764     public abstract Vector<E> selectFrom(Vector<E> v);
2765 
2766     /**
2767      * Using index values stored in the lanes of this vector,
2768      * assemble values stored in second vector, under the control
2769      * of a mask.
2770      * Using index values stored in the lanes of this vector,
2771      * assemble values stored in second vector {@code v}.
2772      * The second vector thus serves as a table, whose
2773      * elements are selected by indexes in the current vector.
2774      * Lanes that are unset in the mask receive a
2775      * zero rather than a value from the table.
2776      *
2777      * This is a cross-lane operation that rearranges the lane
2778      * elements of the argument vector, under the control of
2779      * this vector and the mask.
2780      *
2781      * The result is the same as the expression
2782      * {@code v.rearrange(this.toShuffle(), m)}.
2783      *
2784      * @param v the vector supplying the result values
2785      * @param m the mask controlling selection from {@code v}
2786      * @return the rearrangement of the lane elements of {@code v}
2787      * @throws IndexOutOfBoundsException if any invalid
2788      *         source indexes are found in {@code this},
2789      *         in a lane which is set in the mask
2790      * @see #selectFrom(Vector)
2791      * @see #rearrange(VectorShuffle,VectorMask)
2792      */
2793     public abstract Vector<E> selectFrom(Vector<E> v, VectorMask<E> m);
2794 
2795     // Conversions
2796 
2797     /**
2798      * Returns a vector of the same species as this one
2799      * where all lane elements are set to
2800      * the primitive value {@code e}.
2801      *
2802      * The contents of the current vector are discarded;
2803      * only the species is relevant to this operation.
2804      *
2805      * <p> This method returns the value of this expression:
2806      * {@code EVector.broadcast(this.species(), (ETYPE)e)}, where
2807      * {@code EVector} is the vector class specific to this
2808      * vector's element type {@code ETYPE}.
2809      *
2810      * <p>
2811      * The {@code long} value {@code e} must be accurately
2812      * representable by the {@code ETYPE} of this vector's species,
2813      * so that {@code e==(long)(ETYPE)e}.
2814      *
2815      * If this rule is violated the problem is not detected
2816      * statically, but an {@code IllegalArgumentException} is thrown
2817      * at run-time.  Thus, this method somewhat weakens the static
2818      * type checking of immediate constants and other scalars, but it
2819      * makes up for this by improving the expressiveness of the
2820      * generic API.  Note that an {@code e} value in the range
2821      * {@code [-128..127]} is always acceptable, since every
2822      * {@code ETYPE} will accept every {@code byte} value.
2823      *
2824      * @apiNote
2825      * Subtypes improve on this method by sharpening
2826      * the method return type and
2827      * and the type of the scalar parameter {@code e}.
2828      *
2829      * @param e the value to broadcast
2830      * @return a vector where all lane elements are set to
2831      *         the primitive value {@code e}
2832      * @throws IllegalArgumentException
2833      *         if the given {@code long} value cannot
2834      *         be represented by the vector's {@code ETYPE}
2835      * @see VectorSpecies#broadcast(long)
2836      * @see IntVector#broadcast(int)
2837      * @see FloatVector#broadcast(float)
2838      */
2839     public abstract Vector<E> broadcast(long e);
2840 
2841     /**
2842      * Returns a mask of same species as this vector,
2843      * where each lane is set or unset according to given
2844      * single boolean, which is broadcast to all lanes.
2845      * <p>
2846      * This method returns the value of this expression:
2847      * {@code species().maskAll(bit)}.
2848      *
2849      * @param bit the given mask bit to be replicated
2850      * @return a mask where each lane is set or unset according to
2851      *         the given bit
2852      * @see VectorSpecies#maskAll(boolean)
2853      */
2854     public abstract VectorMask<E> maskAll(boolean bit);
2855 
2856     /**
2857      * Converts this vector into a shuffle, converting the lane values
2858      * to {@code int} and regarding them as source indexes.
2859      * <p>
2860      * This method behaves as if it returns the result of creating a shuffle
2861      * given an array of the vector elements, as follows:
2862      * <pre>{@code
2863      * long[] a = this.toLongArray();
2864      * int[] sa = new int[a.length];
2865      * for (int i = 0; i < a.length; i++) {
2866      *     sa[i] = (int) a[i];
2867      * }
2868      * return VectorShuffle.fromValues(this.species(), sa);
2869      * }</pre>
2870      *
2871      * @return a shuffle representation of this vector
2872      * @see VectorShuffle#fromValues(VectorSpecies,int...)
2873      */
2874     public abstract VectorShuffle<E> toShuffle();
2875 
2876     // Bitwise preserving
2877 
2878     /**
2879      * Transforms this vector to a vector of the given species of
2880      * element type {@code F}, reinterpreting the bytes of this
2881      * vector without performing any value conversions.
2882      *
2883      * <p> Depending on the selected species, this operation may
2884      * either <a href="Vector.html#expansion">expand or contract</a>
2885      * its logical result, in which case a non-zero {@code part}
2886      * number can further control the selection and steering of the
2887      * logical result into the physical output vector.
2888      *
2889      * <p>
2890      * The underlying bits of this vector are copied to the resulting
2891      * vector without modification, but those bits, before copying,
2892      * may be truncated if the this vector's bit-size is greater than
2893      * desired vector's bit size, or filled with zero bits if this
2894      * vector's bit-size is less than desired vector's bit-size.
2895      *
2896      * <p> If the old and new species have different shape, this is a
2897      * <em>shape-changing</em> operation, and may have special
2898      * implementation costs.
2899      *
2900      * <p> The method behaves as if this vector is stored into a byte
2901      * buffer or array using little-endian byte ordering and then the
2902      * desired vector is loaded from the same byte buffer or array
2903      * using the same ordering.
2904      *
2905      * <p> The following pseudocode illustrates the behavior:
2906      * <pre>{@code
2907      * int domSize = this.byteSize();
2908      * int ranSize = species.vectorByteSize();
2909      * int M = (domSize > ranSize ? domSize / ranSize : ranSize / domSize);
2910      * assert Math.abs(part) < M;
2911      * assert (part == 0) || (part > 0) == (domSize > ranSize);
2912      * byte[] ra = new byte[Math.max(domSize, ranSize)];
2913      * if (domSize > ranSize) {  // expansion
2914      *     this.intoByteArray(ra, 0, ByteOrder.native());
2915      *     int origin = part * ranSize;
2916      *     return species.fromByteArray(ra, origin, ByteOrder.native());
2917      * } else {  // contraction or size-invariant
2918      *     int origin = (-part) * domSize;
2919      *     this.intoByteArray(ra, origin, ByteOrder.native());
2920      *     return species.fromByteArray(ra, 0, ByteOrder.native());
2921      * }
2922      * }</pre>
2923      *
2924      * @apiNote Although this method is defined as if the vectors in
2925      * question were loaded or stored into memory, memory semantics
2926      * has little to do or nothing with the actual implementation.
2927      * The appeal to little-endian ordering is simply a shorthand
2928      * for what could otherwise be a large number of detailed rules
2929      * concerning the mapping between lane-structured vectors and
2930      * byte-structured vectors.
2931      *
2932      * @param species the desired vector species
2933      * @param part the <a href="Vector.html#expansion">part number</a>
2934      *        of the result, or zero if neither expanding nor contracting
2935      * @param <F> the boxed element type of the species
2936      * @return a vector transformed, by shape and element type, from this vector
2937      * @see Vector#convertShape(VectorOperators.Conversion,VectorSpecies,int)
2938      * @see Vector#castShape(VectorSpecies,int)
2939      * @see VectorSpecies#partLimit(VectorSpecies,boolean)
2940      */
2941     public abstract <F> Vector<F> reinterpretShape(VectorSpecies<F> species, int part);
2942 
2943     /**
2944      * Views this vector as a vector of the same shape
2945      * and contents but a lane type of {@code byte},
2946      * where the bytes are extracted from the lanes
2947      * according to little-endian order.
2948      * It is a convenience method for the expression
2949      * {@code reinterpretShape(species().withLanes(byte.class))}.
2950      * It may be considered an inverse to the various
2951      * methods which consolidate bytes into larger lanes
2952      * within the same vector, such as
2953      * {@link Vector#reinterpretAsInts()}.
2954      *
2955      * @return a {@code ByteVector} with the same shape and information content
2956      * @see Vector#reinterpretShape(VectorSpecies,int)
2957      * @see IntVector#intoByteArray(byte[], int, ByteOrder)
2958      * @see FloatVector#intoByteArray(byte[], int, ByteOrder)
2959      * @see VectorSpecies#withLanes(Class)
2960      */
2961     public abstract ByteVector reinterpretAsBytes();
2962 
2963     /**
2964      * Reinterprets this vector as a vector of the same shape
2965      * and contents but a lane type of {@code short},
2966      * where the lanes are assembled from successive bytes
2967      * according to little-endian order.
2968      * It is a convenience method for the expression
2969      * {@code reinterpretShape(species().withLanes(short.class))}.
2970      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
2971      *
2972      * @return a {@code ShortVector} with the same shape and information content
2973      */
2974     public abstract ShortVector reinterpretAsShorts();
2975 
2976     /**
2977      * Reinterprets this vector as a vector of the same shape
2978      * and contents but a lane type of {@code int},
2979      * where the lanes are assembled from successive bytes
2980      * according to little-endian order.
2981      * It is a convenience method for the expression
2982      * {@code reinterpretShape(species().withLanes(int.class))}.
2983      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
2984      *
2985      * @return a {@code IntVector} with the same shape and information content
2986      */
2987     public abstract IntVector reinterpretAsInts();
2988 
2989     /**
2990      * Reinterprets this vector as a vector of the same shape
2991      * and contents but a lane type of {@code long},
2992      * where the lanes are assembled from successive bytes
2993      * according to little-endian order.
2994      * It is a convenience method for the expression
2995      * {@code reinterpretShape(species().withLanes(long.class))}.
2996      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
2997      *
2998      * @return a {@code LongVector} with the same shape and information content
2999      */
3000     public abstract LongVector reinterpretAsLongs();
3001 
3002     /**
3003      * Reinterprets this vector as a vector of the same shape
3004      * and contents but a lane type of {@code float},
3005      * where the lanes are assembled from successive bytes
3006      * according to little-endian order.
3007      * It is a convenience method for the expression
3008      * {@code reinterpretShape(species().withLanes(float.class))}.
3009      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
3010      *
3011      * @return a {@code FloatVector} with the same shape and information content
3012      */
3013     public abstract FloatVector reinterpretAsFloats();
3014 
3015     /**
3016      * Reinterprets this vector as a vector of the same shape
3017      * and contents but a lane type of {@code double},
3018      * where the lanes are assembled from successive bytes
3019      * according to little-endian order.
3020      * It is a convenience method for the expression
3021      * {@code reinterpretShape(species().withLanes(double.class))}.
3022      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
3023      *
3024      * @return a {@code DoubleVector} with the same shape and information content
3025      */
3026     public abstract DoubleVector reinterpretAsDoubles();
3027 
3028     /**
3029      * Views this vector as a vector of the same shape, length, and
3030      * contents, but a lane type that is not a floating-point type.
3031      *
3032      * This is a lane-wise reinterpretation cast on the lane values.
3033      * As such, this method does not change {@code VSHAPE} or
3034      * {@code VLENGTH}, and there is no change to the bitwise contents
3035      * of the vector.  If the vector's {@code ETYPE} is already an
3036      * integral type, the same vector is returned unchanged.
3037      *
3038      * This method returns the value of this expression:
3039      * {@code convert(conv,0)}, where {@code conv} is
3040      * {@code VectorOperators.Conversion.ofReinterpret(E.class,F.class)},
3041      * and {@code F} is the non-floating-point type of the
3042      * same size as {@code E}.
3043      *
3044      * @apiNote
3045      * Subtypes improve on this method by sharpening
3046      * the return type.
3047      *
3048      * @return the original vector, reinterpreted as non-floating point
3049      * @see VectorOperators.Conversion#ofReinterpret(Class,Class)
3050      * @see Vector#convert(VectorOperators.Conversion,int)
3051      */
3052     public abstract Vector<?> viewAsIntegralLanes();
3053 
3054     /**
3055      * Views this vector as a vector of the same shape, length, and
3056      * contents, but a lane type that is a floating-point type.
3057      *
3058      * This is a lane-wise reinterpretation cast on the lane values.
3059      * As such, there this method does not change {@code VSHAPE} or
3060      * {@code VLENGTH}, and there is no change to the bitwise contents
3061      * of the vector.  If the vector's {@code ETYPE} is already a
3062      * float-point type, the same vector is returned unchanged.
3063      *
3064      * If the vector's element size does not match any floating point
3065      * type size, an {@code IllegalArgumentException} is thrown.
3066      *
3067      * This method returns the value of this expression:
3068      * {@code convert(conv,0)}, where {@code conv} is
3069      * {@code VectorOperators.Conversion.ofReinterpret(E.class,F.class)},
3070      * and {@code F} is the floating-point type of the
3071      * same size as {@code E}, if any.
3072      *
3073      * @apiNote
3074      * Subtypes improve on this method by sharpening
3075      * the return type.
3076      *
3077      * @return the original vector, reinterpreted as floating point
3078      * @throws UnsupportedOperationException if there is no floating point
3079      *         type the same size as the lanes of this vector
3080      * @see VectorOperators.Conversion#ofReinterpret(Class,Class)
3081      * @see Vector#convert(VectorOperators.Conversion,int)
3082      */
3083     public abstract Vector<?> viewAsFloatingLanes();
3084 
3085     /**
3086      * Convert this vector to a vector of the same shape and a new
3087      * element type, converting lane values from the current {@code ETYPE}
3088      * to a new lane type (called {@code FTYPE} here) according to the
3089      * indicated {@linkplain VectorOperators.Conversion conversion}.
3090      *
3091      * This is a lane-wise shape-invariant operation which copies
3092      * {@code ETYPE} values from the input vector to corresponding
3093      * {@code FTYPE} values in the result.  Depending on the selected
3094      * conversion, this operation may either
3095      * <a href="Vector.html#expansion">expand or contract</a> its
3096      * logical result, in which case a non-zero {@code part} number
3097      * can further control the selection and steering of the logical
3098      * result into the physical output vector.
3099      *
3100      * <p> Each specific conversion is described by a conversion
3101      * constant in the class {@link VectorOperators}.  Each conversion
3102      * operator has a specified {@linkplain
3103      * VectorOperators.Conversion#domainType() domain type} and
3104      * {@linkplain VectorOperators.Conversion#rangeType() range type}.
3105      * The domain type must exactly match the lane type of the input
3106      * vector, while the range type determines the lane type of the
3107      * output vectors.
3108      *
3109      * <p> A conversion operator may be classified as (respectively)
3110      * in-place, expanding, or contracting, depending on whether the
3111      * bit-size of its domain type is (respectively) equal, less than,
3112      * or greater than the bit-size of its range type.
3113      *
3114      * <p> Independently, conversion operations can also be classified
3115      * as reinterpreting or value-transforming, depending on whether
3116      * the conversion copies representation bits unchanged, or changes
3117      * the representation bits in order to retain (part or all of)
3118      * the logical value of the input value.
3119      *
3120      * <p> If a reinterpreting conversion contracts, it will truncate the
3121      * upper bits of the input.  If it expands, it will pad upper bits
3122      * of the output with zero bits, when there are no corresponding
3123      * input bits.
3124      *
3125      * <p> An expanding conversion such as {@code S2I} ({@code short}
3126      * value to {@code int}) takes a scalar value and represents it
3127      * in a larger format (always with some information redundancy).
3128      *
3129      * A contracting conversion such as {@code D2F} ({@code double}
3130      * value to {@code float}) takes a scalar value and represents it
3131      * in a smaller format (always with some information loss).
3132      *
3133      * Some in-place conversions may also include information loss,
3134      * such as {@code L2D} ({@code long} value to {@code double})
3135      * or {@code F2I}  ({@code float} value to {@code int}).
3136      *
3137      * Reinterpreting in-place conversions are not lossy, unless the
3138      * bitwise value is somehow not legal in the output type.
3139      * Converting the bit-pattern of a {@code NaN} may discard bits
3140      * from the {@code NaN}'s significand.
3141      *
3142      * <p> This classification is important, because, unless otherwise
3143      * documented, conversion operations <em>never change vector
3144      * shape</em>, regardless of how they may change <em>lane sizes</em>.
3145      *
3146      * Therefore an <em>expanding</em> conversion cannot store all of its
3147      * results in its output vector, because the output vector has fewer
3148      * lanes of larger size, in order to have the same overall bit-size as
3149      * its input.
3150      *
3151      * Likewise, a contracting conversion must store its relatively small
3152      * results into a subset of the lanes of the output vector, defaulting
3153      * the unused lanes to zero.
3154      *
3155      * <p> As an example, a conversion from {@code byte} to {@code long}
3156      * ({@code M=8}) will discard 87.5% of the input values in order to
3157      * convert the remaining 12.5% into the roomy {@code long} lanes of
3158      * the output vector. The inverse conversion will convert back all of
3159      * the large results, but will waste 87.5% of the lanes in the output
3160      * vector.
3161      *
3162      * <em>In-place</em> conversions ({@code M=1}) deliver all of
3163      * their results in one output vector, without wasting lanes.
3164      *
3165      * <p> To manage the details of these
3166      * <a href="Vector.html#expansion">expansions and contractions</a>,
3167      * a non-zero {@code part} parameter selects partial results from
3168      * expansions, or steers the results of contractions into
3169      * corresponding locations, as follows:
3170      *
3171      * <ul>
3172      * <li> expanding by {@code M}: {@code part} must be in the range
3173      * {@code [0..M-1]}, and selects the block of {@code VLENGTH/M} input
3174      * lanes starting at the <em>origin lane</em> at {@code part*VLENGTH/M}.
3175 
3176      * <p> The {@code VLENGTH/M} output lanes represent a partial
3177      * slice of the whole logical result of the conversion, filling
3178      * the entire physical output vector.
3179      *
3180      * <li> contracting by {@code M}: {@code part} must be in the range
3181      * {@code [-M+1..0]}, and steers all {@code VLENGTH} input lanes into
3182      * the output located at the <em>origin lane</em> {@code -part*VLENGTH}.
3183      * There is a total of {@code VLENGTH*M} output lanes, and those not
3184      * holding converted input values are filled with zeroes.
3185      *
3186      * <p> A group of such output vectors, with logical result parts
3187      * steered to disjoint blocks, can be reassembled using the
3188      * {@linkplain VectorOperators#OR bitwise or} or (for floating
3189      * point) the {@link VectorOperators#FIRST_NONZERO FIRST_NONZERO}
3190      * operator.
3191      *
3192      * <li> in-place ({@code M=1}): {@code part} must be zero.
3193      * Both vectors have the same {@code VLENGTH}.  The result is
3194      * always positioned at the <em>origin lane</em> of zero.
3195      *
3196      * </ul>
3197      *
3198      * <p> This method is a restricted version of the more general
3199      * but less frequently used <em>shape-changing</em> method
3200      * {@link #convertShape(VectorOperators.Conversion,VectorSpecies,int)
3201      * convertShape()}.
3202      * The result of this method is the same as the expression
3203      * {@code this.convertShape(conv, rsp, this.broadcast(part))},
3204      * where the output species is
3205      * {@code rsp=this.species().withLanes(FTYPE.class)}.
3206      *
3207      * @param conv the desired scalar conversion to apply lane-wise
3208      * @param part the <a href="Vector.html#expansion">part number</a>
3209      *        of the result, or zero if neither expanding nor contracting
3210      * @param <F> the boxed element type of the species
3211      * @return a vector converted by shape and element type from this vector
3212      * @throws ArrayIndexOutOfBoundsException unless {@code part} is zero,
3213      *         or else the expansion ratio is {@code M} and
3214      *         {@code part} is positive and less than {@code M},
3215      *         or else the contraction ratio is {@code M} and
3216      *         {@code part} is negative and greater {@code -M}
3217      *
3218      * @see VectorOperators#I2L
3219      * @see VectorOperators.Conversion#ofCast(Class,Class)
3220      * @see VectorSpecies#partLimit(VectorSpecies,boolean)
3221      * @see #viewAsFloatingLanes()
3222      * @see #viewAsIntegralLanes()
3223      * @see #convertShape(VectorOperators.Conversion,VectorSpecies,int)
3224      * @see #reinterpretShape(VectorSpecies,int)
3225      */
3226     public abstract <F> Vector<F> convert(VectorOperators.Conversion<E,F> conv, int part);
3227 
3228     /**
3229      * Converts this vector to a vector of the given species, shape and
3230      * element type, converting lane values from the current {@code ETYPE}
3231      * to a new lane type (called {@code FTYPE} here) according to the
3232      * indicated {@linkplain VectorOperators.Conversion conversion}.
3233      *
3234      * This is a lane-wise operation which copies {@code ETYPE} values
3235      * from the input vector to corresponding {@code FTYPE} values in
3236      * the result.
3237      *
3238      * <p> If the old and new species have the same shape, the behavior
3239      * is exactly the same as the simpler, shape-invariant method
3240      * {@link #convert(VectorOperators.Conversion,int) convert()}.
3241      * In such cases, the simpler method {@code convert()} should be
3242      * used, to make code easier to reason about.
3243      * Otherwise, this is a <em>shape-changing</em> operation, and may
3244      * have special implementation costs.
3245      *
3246      * <p> As a combined effect of shape changes and lane size changes,
3247      * the input and output species may have different lane counts, causing
3248      * <a href="Vector.html#expansion">expansion or contraction</a>.
3249      * In this case a non-zero {@code part} parameter selects
3250      * partial results from an expanded logical result, or steers
3251      * the results of a contracted logical result into a physical
3252      * output vector of the required output species.
3253      *
3254      * <p >The following pseudocode illustrates the behavior of this
3255      * method for in-place, expanding, and contracting conversions.
3256      * (This pseudocode also applies to the shape-invariant method,
3257      * but with shape restrictions on the output species.)
3258      * Note that only one of the three code paths is relevant to any
3259      * particular combination of conversion operator and shapes.
3260      *
3261      * <pre>{@code
3262      * FTYPE scalar_conversion_op(ETYPE s);
3263      * EVector a = ...;
3264      * VectorSpecies<F> rsp = ...;
3265      * int part = ...;
3266      * VectorSpecies<E> dsp = a.species();
3267      * int domlen = dsp.length();
3268      * int ranlen = rsp.length();
3269      * FTYPE[] logical = new FTYPE[domlen];
3270      * for (int i = 0; i < domlen; i++) {
3271      *   logical[i] = scalar_conversion_op(a.lane(i));
3272      * }
3273      * FTYPE[] physical;
3274      * if (domlen == ranlen) { // in-place
3275      *     assert part == 0; //else AIOOBE
3276      *     physical = logical;
3277      * } else if (domlen > ranlen) { // expanding
3278      *     int M = domlen / ranlen;
3279      *     assert 0 <= part && part < M; //else AIOOBE
3280      *     int origin = part * ranlen;
3281      *     physical = Arrays.copyOfRange(logical, origin, origin + ranlen);
3282      * } else { // (domlen < ranlen) // contracting
3283      *     int M = ranlen / domlen;
3284      *     assert 0 >= part && part > -M; //else AIOOBE
3285      *     int origin = -part * domlen;
3286      *     System.arraycopy(logical, 0, physical, origin, domlen);
3287      * }
3288      * return FVector.fromArray(ran, physical, 0);
3289      * }</pre>
3290      *
3291      * @param conv the desired scalar conversion to apply lane-wise
3292      * @param rsp the desired output species
3293      * @param part the <a href="Vector.html#expansion">part number</a>
3294      *        of the result, or zero if neither expanding nor contracting
3295      * @param <F> the boxed element type of the output species
3296      * @return a vector converted by element type from this vector
3297      * @see #convert(VectorOperators.Conversion,int)
3298      * @see #castShape(VectorSpecies,int)
3299      * @see #reinterpretShape(VectorSpecies,int)
3300      */
3301     public abstract <F> Vector<F> convertShape(VectorOperators.Conversion<E,F> conv, VectorSpecies<F> rsp, int part);
3302 
3303     /**
3304      * Convenience method for converting a vector from one lane type
3305      * to another, reshaping as needed when lane sizes change.
3306      *
3307      * This method returns the value of this expression:
3308      * {@code convertShape(conv,rsp,part)}, where {@code conv} is
3309      * {@code VectorOperators.Conversion.ofCast(E.class,F.class)}.
3310      *
3311      * <p> If the old and new species have different shape, this is a
3312      * <em>shape-changing</em> operation, and may have special
3313      * implementation costs.
3314      *
3315      * @param rsp the desired output species
3316      * @param part the <a href="Vector.html#expansion">part number</a>
3317      *        of the result, or zero if neither expanding nor contracting
3318      * @param <F> the boxed element type of the output species
3319      * @return a vector converted by element type from this vector
3320      * @see VectorOperators.Conversion#ofCast(Class,Class)
3321      * @see Vector#convertShape(VectorOperators.Conversion,VectorSpecies,int)
3322      */
3323     // Does this carry its weight?
3324     public abstract <F> Vector<F> castShape(VectorSpecies<F> rsp, int part);
3325 
3326     /**
3327      * Checks that this vector has the given element type,
3328      * and returns this vector unchanged.
3329      * The effect is similar to this pseudocode:
3330      * {@code elementType == species().elementType()
3331      *        ? this
3332      *        : throw new ClassCastException()}.
3333      *
3334      * @param elementType the required lane type
3335      * @param <F> the boxed element type of the required lane type
3336      * @return the same vector
3337      * @throws ClassCastException if the vector has the wrong element type
3338      * @see VectorSpecies#check(Class)
3339      * @see VectorMask#check(Class)
3340      * @see Vector#check(VectorSpecies)
3341      * @see VectorShuffle#check(VectorSpecies)
3342      */
3343     public abstract <F> Vector<F> check(Class<F> elementType);
3344 
3345     /**
3346      * Checks that this vector has the given species,
3347      * and returns this vector unchanged.
3348      * The effect is similar to this pseudocode:
3349      * {@code species == species()
3350      *        ? this
3351      *        : throw new ClassCastException()}.
3352      *
3353      * @param species the required species
3354      * @param <F> the boxed element type of the required species
3355      * @return the same vector
3356      * @throws ClassCastException if the vector has the wrong species
3357      * @see Vector#check(Class)
3358      * @see VectorMask#check(VectorSpecies)
3359      * @see VectorShuffle#check(VectorSpecies)
3360      */
3361     public abstract <F> Vector<F> check(VectorSpecies<F> species);
3362 
3363     //Array stores
3364 
3365     /**
3366      * Stores this vector into a byte array starting at an offset
3367      * using explicit byte order.
3368      * <p>
3369      * Bytes are extracted from primitive lane elements according
3370      * to the specified byte ordering.
3371      * The lanes are stored according to their
3372      * <a href="Vector.html#lane-order">memory ordering</a>.
3373      * <p>
3374      * This method behaves as if it calls
3375      * {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
3376      * intoByteBuffer()} as follows:
3377      * <pre>{@code
3378      * var bb = ByteBuffer.wrap(a);
3379      * var m = maskAll(true);
3380      * intoByteBuffer(bb, offset, bo, m);
3381      * }</pre>
3382      *
3383      * @param a the byte array
3384      * @param offset the offset into the array
3385      * @param bo the intended byte order
3386      * @throws IndexOutOfBoundsException
3387      *         if {@code offset+N*ESIZE < 0}
3388      *         or {@code offset+(N+1)*ESIZE > a.length}
3389      *         for any lane {@code N} in the vector
3390      */
3391     public abstract void intoByteArray(byte[] a, int offset,
3392                                        ByteOrder bo);
3393 
3394     /**
3395      * Stores this vector into a byte array starting at an offset
3396      * using explicit byte order and a mask.
3397      * <p>
3398      * Bytes are extracted from primitive lane elements according
3399      * to the specified byte ordering.
3400      * The lanes are stored according to their
3401      * <a href="Vector.html#lane-order">memory ordering</a>.
3402      * <p>
3403      * This method behaves as if it calls
3404      * {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
3405      * intoByteBuffer()} as follows:
3406      * <pre>{@code
3407      * var bb = ByteBuffer.wrap(a);
3408      * intoByteBuffer(bb, offset, bo, m);
3409      * }</pre>
3410      *
3411      * @param a the byte array
3412      * @param offset the offset into the array
3413      * @param bo the intended byte order
3414      * @param m the mask controlling lane selection
3415      * @throws IndexOutOfBoundsException
3416      *         if {@code offset+N*ESIZE < 0}
3417      *         or {@code offset+(N+1)*ESIZE > a.length}
3418      *         for any lane {@code N} in the vector
3419      *         where the mask is set
3420      */
3421     public abstract void intoByteArray(byte[] a, int offset,
3422                                        ByteOrder bo,
3423                                        VectorMask<E> m);
3424 
3425     /**
3426      * Stores this vector into a byte buffer starting at an offset
3427      * using explicit byte order.
3428      * <p>
3429      * Bytes are extracted from primitive lane elements according
3430      * to the specified byte ordering.
3431      * The lanes are stored according to their
3432      * <a href="Vector.html#lane-order">memory ordering</a>.
3433      * <p>
3434      * This method behaves as if it calls
3435      * {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
3436      * intoByteBuffer()} as follows:
3437      * <pre>{@code
3438      * var m = maskAll(true);
3439      * intoByteBuffer(bb, offset, bo, m);
3440      * }</pre>
3441      *
3442      * @param bb the byte buffer
3443      * @param offset the offset into the array
3444      * @param bo the intended byte order
3445      * @throws IndexOutOfBoundsException
3446      *         if {@code offset+N*ESIZE < 0}
3447      *         or {@code offset+(N+1)*ESIZE > bb.limit()}
3448      *         for any lane {@code N} in the vector
3449      * @throws java.nio.ReadOnlyBufferException
3450      *         if the byte buffer is read-only
3451      */
3452     public abstract void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo);
3453 
3454     /**
3455      * Stores this vector into a byte buffer starting at an offset
3456      * using explicit byte order and a mask.
3457      * <p>
3458      * Bytes are extracted from primitive lane elements according
3459      * to the specified byte ordering.
3460      * The lanes are stored according to their
3461      * <a href="Vector.html#lane-order">memory ordering</a>.
3462      * <p>
3463      * The following pseudocode illustrates the behavior, where
3464      * the primitive element type is not of {@code byte},
3465      * {@code EBuffer} is the primitive buffer type, {@code ETYPE} is the
3466      * primitive element type, and {@code EVector} is the primitive
3467      * vector type for this vector:
3468      * <pre>{@code
3469      * EBuffer eb = bb.duplicate()
3470      *     .position(offset)
3471      *     .order(bo).asEBuffer();
3472      * ETYPE[] a = this.toArray();
3473      * for (int n = 0; n < a.length; n++) {
3474      *     if (m.laneIsSet(n)) {
3475      *         eb.put(n, a[n]);
3476      *     }
3477      * }
3478      * }</pre>
3479      * When the primitive element type is of {@code byte} the primitive
3480      * byte buffer is obtained as follows, where operation on the buffer
3481      * remains the same as in the prior pseudocode:
3482      * <pre>{@code
3483      * ByteBuffer eb = bb.duplicate()
3484      *     .position(offset);
3485      * }</pre>
3486      *
3487      * @implNote
3488      * This operation is likely to be more efficient if
3489      * the specified byte order is the same as
3490      * {@linkplain ByteOrder#nativeOrder()
3491      * the platform native order},
3492      * since this method will not need to reorder
3493      * the bytes of lane values.
3494      * In the special case where {@code ETYPE} is
3495      * {@code byte}, the byte order argument is
3496      * ignored.
3497      *
3498      * @param bb the byte buffer
3499      * @param offset the offset into the array
3500      * @param bo the intended byte order
3501      * @param m the mask controlling lane selection
3502      * @throws IndexOutOfBoundsException
3503      *         if {@code offset+N*ESIZE < 0}
3504      *         or {@code offset+(N+1)*ESIZE > bb.limit()}
3505      *         for any lane {@code N} in the vector
3506      *         where the mask is set
3507      * @throws java.nio.ReadOnlyBufferException
3508      *         if the byte buffer is read-only
3509      */
3510     public abstract void intoByteBuffer(ByteBuffer bb, int offset,
3511                                         ByteOrder bo, VectorMask<E> m);
3512 
3513     /**
3514      * Returns a packed array containing all the lane values.
3515      * The array length is the same as the vector length.
3516      * The element type of the array is the same as the element
3517      * type of the vector.
3518      * The array elements are stored in lane order.
3519      * Overrides of this method on subtypes of {@code Vector}
3520      * which specify the element type have an accurately typed
3521      * array result.
3522      *
3523      * @apiNote
3524      * Usually {@linkplain FloatVector#toArray() strongly typed access}
3525      * is preferable, if you are working with a vector
3526      * subtype that has a known element type.
3527      *
3528      * @return an accurately typed array containing
3529      *         the lane values of this vector
3530      * @see ByteVector#toArray()
3531      * @see IntVector#toArray()
3532      * @see DoubleVector#toArray()
3533      */
3534     public abstract Object toArray();
3535 
3536     /**
3537      * Returns an {@code int[]} array containing all
3538      * the lane values, converted to the type {@code int}.
3539      * The array length is the same as the vector length.
3540      * The array elements are converted as if by casting
3541      * and stored in lane order.
3542      *
3543      * This operation may fail if the vector element type is {@code
3544      * float} or {@code double}, when lanes contain fractional or
3545      * out-of-range values.  If any vector lane value is not
3546      * representable as an {@code int}, an exception is thrown.
3547      *
3548      * @apiNote
3549      * Usually {@linkplain FloatVector#toArray() strongly typed access}
3550      * is preferable, if you are working with a vector
3551      * subtype that has a known element type.
3552      *
3553      * @return an {@code int[]} array containing
3554      *         the lane values of this vector
3555      * @throws UnsupportedOperationException
3556      *         if any lane value cannot be represented as an
3557      *         {@code int} array element
3558      * @see #toArray()
3559      * @see #toLongArray()
3560      * @see #toDoubleArray()
3561      * @see IntVector#toArray()
3562      */
3563     public abstract int[] toIntArray();
3564 
3565     /**
3566      * Returns a {@code long[]} array containing all
3567      * the lane values, converted to the type {@code long}.
3568      * The array length is the same as the vector length.
3569      * The array elements are converted as if by casting
3570      * and stored in lane order.
3571      *
3572      * This operation may fail if the vector element type is {@code
3573      * float} or {@code double}, when lanes contain fractional or
3574      * out-of-range values.  If any vector lane value is not
3575      * representable as a {@code long}, an exception is thrown.
3576      *
3577      * @apiNote
3578      * Usually {@linkplain FloatVector#toArray() strongly typed access}
3579      * is preferable, if you are working with a vector
3580      * subtype that has a known element type.
3581      *
3582      * @return a {@code long[]} array containing
3583      *         the lane values of this vector
3584      * @throws UnsupportedOperationException
3585      *         if any lane value cannot be represented as a
3586      *         {@code long} array element
3587      * @see #toArray()
3588      * @see #toIntArray()
3589      * @see #toDoubleArray()
3590      * @see LongVector#toArray()
3591      */
3592     public abstract long[] toLongArray();
3593 
3594     /**
3595      * Returns a {@code double[]} array containing all
3596      * the lane values, converted to the type {@code double}.
3597      * The array length is the same as the vector length.
3598      * The array elements are converted as if by casting
3599      * and stored in lane order.
3600      * This operation can lose precision
3601      * if the vector element type is {@code long}.
3602      *
3603      * @apiNote
3604      * Usually {@link FloatVector#toArray() strongly typed access}
3605      * is preferable, if you are working with a vector
3606      * subtype that has a known element type.
3607      *
3608      * @return a {@code double[]} array containing
3609      *         the lane values of this vector,
3610      *         possibly rounded to representable
3611      *         {@code double} values
3612      * @see #toArray()
3613      * @see #toIntArray()
3614      * @see #toLongArray()
3615      * @see DoubleVector#toArray()
3616      */
3617     public abstract double[] toDoubleArray();
3618 
3619     /**
3620      * Returns a string representation of this vector, of the form
3621      * {@code "[0,1,2...]"}, reporting the lane values of this
3622      * vector, in lane order.
3623      *
3624      * The string is produced as if by a call to
3625      * {@link Arrays#toString(int[]) Arrays.toString()},
3626      * as appropriate to the array returned by
3627      * {@link #toArray() this.toArray()}.
3628      *
3629      * @return a string of the form {@code "[0,1,2...]"}
3630      * reporting the lane values of this vector
3631      */
3632     @Override
3633     public abstract String toString();
3634 
3635     /**
3636      * Indicates whether this vector is identical to some other object.
3637      * Two vectors are identical only if they have the same species
3638      * and same lane values, in the same order.
3639      * <p>The comparison of lane values is produced as if by a call to
3640      * {@link Arrays#equals(int[],int[]) Arrays.equals()},
3641      * as appropriate to the arrays returned by
3642      * {@link #toArray toArray()} on both vectors.
3643      *
3644      * @return whether this vector is identical to some other object
3645      * @see #eq
3646      */
3647     @Override
3648     public abstract boolean equals(Object obj);
3649 
3650     /**
3651      * Returns a hash code value for the vector.
3652      * based on the lane values and the vector species.
3653      *
3654      * @return  a hash code value for this vector
3655      */
3656     @Override
3657     public abstract int hashCode();
3658 
3659     // ==== JROSE NAME CHANGES ====
3660 
3661     // RAISED FROM SUBCLASSES (with generalized type)
3662     // * toArray() -> ETYPE[] <: Object (erased return type for interop)
3663     // * toString(), equals(Object), hashCode() (documented)
3664     // ADDED
3665     // * compare(OP,v) to replace most of the comparison methods
3666     // * maskAll(boolean) to replace maskAllTrue/False
3667     // * toLongArray(), toDoubleArray() (generic unboxed access)
3668     // * check(Class), check(VectorSpecies) (static type-safety checks)
3669     // * enum Comparison (enum of EQ, NE, GT, LT, GE, LE)
3670     // * zero(VS), broadcast(long) (basic factories)
3671     // * reinterpretAsEs(), viewAsXLanes (bytewise reinterpreting views)
3672     // * addIndex(int) (iota function)
3673 
3674 }