1 /* 2 * Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package jdk.incubator.vector; 26 27 import java.nio.ByteBuffer; 28 import java.nio.ByteOrder; 29 import java.nio.ReadOnlyBufferException; 30 import java.util.Arrays; 31 import java.util.Objects; 32 import java.util.function.Function; 33 import java.util.function.UnaryOperator; 34 35 import jdk.internal.misc.ScopedMemoryAccess; 36 import jdk.internal.misc.Unsafe; 37 import jdk.internal.vm.annotation.ForceInline; 38 import jdk.internal.vm.vector.VectorSupport; 39 40 import static jdk.internal.vm.vector.VectorSupport.*; 41 import static jdk.incubator.vector.VectorIntrinsics.*; 42 43 import static jdk.incubator.vector.VectorOperators.*; 44 45 // -- This file was mechanically generated: Do not edit! -- // 46 47 /** 48 * A specialized {@link Vector} representing an ordered immutable sequence of 49 * {@code float} values. 50 */ 51 @SuppressWarnings("cast") // warning: redundant cast 52 public abstract class FloatVector extends AbstractVector<Float> { 53 54 FloatVector(float[] vec) { 55 super(vec); 56 } 57 58 static final int FORBID_OPCODE_KIND = VO_NOFP; 59 60 @ForceInline 61 static int opCode(Operator op) { 62 return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND); 63 } 64 @ForceInline 65 static int opCode(Operator op, int requireKind) { 66 requireKind |= VO_OPCODE_VALID; 67 return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND); 68 } 69 @ForceInline 70 static boolean opKind(Operator op, int bit) { 71 return VectorOperators.opKind(op, bit); 72 } 73 74 // Virtualized factories and operators, 75 // coded with portable definitions. 76 // These are all @ForceInline in case 77 // they need to be used performantly. 78 // The various shape-specific subclasses 79 // also specialize them by wrapping 80 // them in a call like this: 81 // return (Byte128Vector) 82 // super.bOp((Byte128Vector) o); 83 // The purpose of that is to forcibly inline 84 // the generic definition from this file 85 // into a sharply type- and size-specific 86 // wrapper in the subclass file, so that 87 // the JIT can specialize the code. 88 // The code is only inlined and expanded 89 // if it gets hot. Think of it as a cheap 90 // and lazy version of C++ templates. 91 92 // Virtualized getter 93 94 /*package-private*/ 95 abstract float[] vec(); 96 97 // Virtualized constructors 98 99 /** 100 * Build a vector directly using my own constructor. 101 * It is an error if the array is aliased elsewhere. 102 */ 103 /*package-private*/ 104 abstract FloatVector vectorFactory(float[] vec); 105 106 /** 107 * Build a mask directly using my species. 108 * It is an error if the array is aliased elsewhere. 109 */ 110 /*package-private*/ 111 @ForceInline 112 final 113 AbstractMask<Float> maskFactory(boolean[] bits) { 114 return vspecies().maskFactory(bits); 115 } 116 117 // Constant loader (takes dummy as vector arg) 118 interface FVOp { 119 float apply(int i); 120 } 121 122 /*package-private*/ 123 @ForceInline 124 final 125 FloatVector vOp(FVOp f) { 126 float[] res = new float[length()]; 127 for (int i = 0; i < res.length; i++) { 128 res[i] = f.apply(i); 129 } 130 return vectorFactory(res); 131 } 132 133 @ForceInline 134 final 135 FloatVector vOp(VectorMask<Float> m, FVOp f) { 136 float[] res = new float[length()]; 137 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 138 for (int i = 0; i < res.length; i++) { 139 if (mbits[i]) { 140 res[i] = f.apply(i); 141 } 142 } 143 return vectorFactory(res); 144 } 145 146 // Unary operator 147 148 /*package-private*/ 149 interface FUnOp { 150 float apply(int i, float a); 151 } 152 153 /*package-private*/ 154 abstract 155 FloatVector uOp(FUnOp f); 156 @ForceInline 157 final 158 FloatVector uOpTemplate(FUnOp f) { 159 float[] vec = vec(); 160 float[] res = new float[length()]; 161 for (int i = 0; i < res.length; i++) { 162 res[i] = f.apply(i, vec[i]); 163 } 164 return vectorFactory(res); 165 } 166 167 /*package-private*/ 168 abstract 169 FloatVector uOp(VectorMask<Float> m, 170 FUnOp f); 171 @ForceInline 172 final 173 FloatVector uOpTemplate(VectorMask<Float> m, 174 FUnOp f) { 175 if (m == null) { 176 return uOpTemplate(f); 177 } 178 float[] vec = vec(); 179 float[] res = new float[length()]; 180 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 181 for (int i = 0; i < res.length; i++) { 182 res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i]; 183 } 184 return vectorFactory(res); 185 } 186 187 // Binary operator 188 189 /*package-private*/ 190 interface FBinOp { 191 float apply(int i, float a, float b); 192 } 193 194 /*package-private*/ 195 abstract 196 FloatVector bOp(Vector<Float> o, 197 FBinOp f); 198 @ForceInline 199 final 200 FloatVector bOpTemplate(Vector<Float> o, 201 FBinOp f) { 202 float[] res = new float[length()]; 203 float[] vec1 = this.vec(); 204 float[] vec2 = ((FloatVector)o).vec(); 205 for (int i = 0; i < res.length; i++) { 206 res[i] = f.apply(i, vec1[i], vec2[i]); 207 } 208 return vectorFactory(res); 209 } 210 211 /*package-private*/ 212 abstract 213 FloatVector bOp(Vector<Float> o, 214 VectorMask<Float> m, 215 FBinOp f); 216 @ForceInline 217 final 218 FloatVector bOpTemplate(Vector<Float> o, 219 VectorMask<Float> m, 220 FBinOp f) { 221 if (m == null) { 222 return bOpTemplate(o, f); 223 } 224 float[] res = new float[length()]; 225 float[] vec1 = this.vec(); 226 float[] vec2 = ((FloatVector)o).vec(); 227 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 228 for (int i = 0; i < res.length; i++) { 229 res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i]; 230 } 231 return vectorFactory(res); 232 } 233 234 // Ternary operator 235 236 /*package-private*/ 237 interface FTriOp { 238 float apply(int i, float a, float b, float c); 239 } 240 241 /*package-private*/ 242 abstract 243 FloatVector tOp(Vector<Float> o1, 244 Vector<Float> o2, 245 FTriOp f); 246 @ForceInline 247 final 248 FloatVector tOpTemplate(Vector<Float> o1, 249 Vector<Float> o2, 250 FTriOp f) { 251 float[] res = new float[length()]; 252 float[] vec1 = this.vec(); 253 float[] vec2 = ((FloatVector)o1).vec(); 254 float[] vec3 = ((FloatVector)o2).vec(); 255 for (int i = 0; i < res.length; i++) { 256 res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]); 257 } 258 return vectorFactory(res); 259 } 260 261 /*package-private*/ 262 abstract 263 FloatVector tOp(Vector<Float> o1, 264 Vector<Float> o2, 265 VectorMask<Float> m, 266 FTriOp f); 267 @ForceInline 268 final 269 FloatVector tOpTemplate(Vector<Float> o1, 270 Vector<Float> o2, 271 VectorMask<Float> m, 272 FTriOp f) { 273 if (m == null) { 274 return tOpTemplate(o1, o2, f); 275 } 276 float[] res = new float[length()]; 277 float[] vec1 = this.vec(); 278 float[] vec2 = ((FloatVector)o1).vec(); 279 float[] vec3 = ((FloatVector)o2).vec(); 280 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 281 for (int i = 0; i < res.length; i++) { 282 res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i]; 283 } 284 return vectorFactory(res); 285 } 286 287 // Reduction operator 288 289 /*package-private*/ 290 abstract 291 float rOp(float v, VectorMask<Float> m, FBinOp f); 292 293 @ForceInline 294 final 295 float rOpTemplate(float v, VectorMask<Float> m, FBinOp f) { 296 if (m == null) { 297 return rOpTemplate(v, f); 298 } 299 float[] vec = vec(); 300 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 301 for (int i = 0; i < vec.length; i++) { 302 v = mbits[i] ? f.apply(i, v, vec[i]) : v; 303 } 304 return v; 305 } 306 307 @ForceInline 308 final 309 float rOpTemplate(float v, FBinOp f) { 310 float[] vec = vec(); 311 for (int i = 0; i < vec.length; i++) { 312 v = f.apply(i, v, vec[i]); 313 } 314 return v; 315 } 316 317 // Memory reference 318 319 /*package-private*/ 320 interface FLdOp<M> { 321 float apply(M memory, int offset, int i); 322 } 323 324 /*package-private*/ 325 @ForceInline 326 final 327 <M> FloatVector ldOp(M memory, int offset, 328 FLdOp<M> f) { 329 //dummy; no vec = vec(); 330 float[] res = new float[length()]; 331 for (int i = 0; i < res.length; i++) { 332 res[i] = f.apply(memory, offset, i); 333 } 334 return vectorFactory(res); 335 } 336 337 /*package-private*/ 338 @ForceInline 339 final 340 <M> FloatVector ldOp(M memory, int offset, 341 VectorMask<Float> m, 342 FLdOp<M> f) { 343 //float[] vec = vec(); 344 float[] res = new float[length()]; 345 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 346 for (int i = 0; i < res.length; i++) { 347 if (mbits[i]) { 348 res[i] = f.apply(memory, offset, i); 349 } 350 } 351 return vectorFactory(res); 352 } 353 354 interface FStOp<M> { 355 void apply(M memory, int offset, int i, float a); 356 } 357 358 /*package-private*/ 359 @ForceInline 360 final 361 <M> void stOp(M memory, int offset, 362 FStOp<M> f) { 363 float[] vec = vec(); 364 for (int i = 0; i < vec.length; i++) { 365 f.apply(memory, offset, i, vec[i]); 366 } 367 } 368 369 /*package-private*/ 370 @ForceInline 371 final 372 <M> void stOp(M memory, int offset, 373 VectorMask<Float> m, 374 FStOp<M> f) { 375 float[] vec = vec(); 376 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 377 for (int i = 0; i < vec.length; i++) { 378 if (mbits[i]) { 379 f.apply(memory, offset, i, vec[i]); 380 } 381 } 382 } 383 384 // Binary test 385 386 /*package-private*/ 387 interface FBinTest { 388 boolean apply(int cond, int i, float a, float b); 389 } 390 391 /*package-private*/ 392 @ForceInline 393 final 394 AbstractMask<Float> bTest(int cond, 395 Vector<Float> o, 396 FBinTest f) { 397 float[] vec1 = vec(); 398 float[] vec2 = ((FloatVector)o).vec(); 399 boolean[] bits = new boolean[length()]; 400 for (int i = 0; i < length(); i++){ 401 bits[i] = f.apply(cond, i, vec1[i], vec2[i]); 402 } 403 return maskFactory(bits); 404 } 405 406 407 /*package-private*/ 408 @Override 409 abstract FloatSpecies vspecies(); 410 411 /*package-private*/ 412 @ForceInline 413 static long toBits(float e) { 414 return Float.floatToRawIntBits(e); 415 } 416 417 /*package-private*/ 418 @ForceInline 419 static float fromBits(long bits) { 420 return Float.intBitsToFloat((int)bits); 421 } 422 423 // Static factories (other than memory operations) 424 425 // Note: A surprising behavior in javadoc 426 // sometimes makes a lone /** {@inheritDoc} */ 427 // comment drop the method altogether, 428 // apparently if the method mentions an 429 // parameter or return type of Vector<Float> 430 // instead of Vector<E> as originally specified. 431 // Adding an empty HTML fragment appears to 432 // nudge javadoc into providing the desired 433 // inherited documentation. We use the HTML 434 // comment <!--workaround--> for this. 435 436 /** 437 * Returns a vector of the given species 438 * where all lane elements are set to 439 * zero, the default primitive value. 440 * 441 * @param species species of the desired zero vector 442 * @return a zero vector 443 */ 444 @ForceInline 445 public static FloatVector zero(VectorSpecies<Float> species) { 446 FloatSpecies vsp = (FloatSpecies) species; 447 return VectorSupport.fromBitsCoerced(vsp.vectorType(), float.class, species.length(), 448 toBits(0.0f), MODE_BROADCAST, vsp, 449 ((bits_, s_) -> s_.rvOp(i -> bits_))); 450 } 451 452 /** 453 * Returns a vector of the same species as this one 454 * where all lane elements are set to 455 * the primitive value {@code e}. 456 * 457 * The contents of the current vector are discarded; 458 * only the species is relevant to this operation. 459 * 460 * <p> This method returns the value of this expression: 461 * {@code FloatVector.broadcast(this.species(), e)}. 462 * 463 * @apiNote 464 * Unlike the similar method named {@code broadcast()} 465 * in the supertype {@code Vector}, this method does not 466 * need to validate its argument, and cannot throw 467 * {@code IllegalArgumentException}. This method is 468 * therefore preferable to the supertype method. 469 * 470 * @param e the value to broadcast 471 * @return a vector where all lane elements are set to 472 * the primitive value {@code e} 473 * @see #broadcast(VectorSpecies,long) 474 * @see Vector#broadcast(long) 475 * @see VectorSpecies#broadcast(long) 476 */ 477 public abstract FloatVector broadcast(float e); 478 479 /** 480 * Returns a vector of the given species 481 * where all lane elements are set to 482 * the primitive value {@code e}. 483 * 484 * @param species species of the desired vector 485 * @param e the value to broadcast 486 * @return a vector where all lane elements are set to 487 * the primitive value {@code e} 488 * @see #broadcast(long) 489 * @see Vector#broadcast(long) 490 * @see VectorSpecies#broadcast(long) 491 */ 492 @ForceInline 493 public static FloatVector broadcast(VectorSpecies<Float> species, float e) { 494 FloatSpecies vsp = (FloatSpecies) species; 495 return vsp.broadcast(e); 496 } 497 498 /*package-private*/ 499 @ForceInline 500 final FloatVector broadcastTemplate(float e) { 501 FloatSpecies vsp = vspecies(); 502 return vsp.broadcast(e); 503 } 504 505 /** 506 * {@inheritDoc} <!--workaround--> 507 * @apiNote 508 * When working with vector subtypes like {@code FloatVector}, 509 * {@linkplain #broadcast(float) the more strongly typed method} 510 * is typically selected. It can be explicitly selected 511 * using a cast: {@code v.broadcast((float)e)}. 512 * The two expressions will produce numerically identical results. 513 */ 514 @Override 515 public abstract FloatVector broadcast(long e); 516 517 /** 518 * Returns a vector of the given species 519 * where all lane elements are set to 520 * the primitive value {@code e}. 521 * 522 * The {@code long} value must be accurately representable 523 * by the {@code ETYPE} of the vector species, so that 524 * {@code e==(long)(ETYPE)e}. 525 * 526 * @param species species of the desired vector 527 * @param e the value to broadcast 528 * @return a vector where all lane elements are set to 529 * the primitive value {@code e} 530 * @throws IllegalArgumentException 531 * if the given {@code long} value cannot 532 * be represented by the vector's {@code ETYPE} 533 * @see #broadcast(VectorSpecies,float) 534 * @see VectorSpecies#checkValue(long) 535 */ 536 @ForceInline 537 public static FloatVector broadcast(VectorSpecies<Float> species, long e) { 538 FloatSpecies vsp = (FloatSpecies) species; 539 return vsp.broadcast(e); 540 } 541 542 /*package-private*/ 543 @ForceInline 544 final FloatVector broadcastTemplate(long e) { 545 return vspecies().broadcast(e); 546 } 547 548 // Unary lanewise support 549 550 /** 551 * {@inheritDoc} <!--workaround--> 552 */ 553 public abstract 554 FloatVector lanewise(VectorOperators.Unary op); 555 556 @ForceInline 557 final 558 FloatVector lanewiseTemplate(VectorOperators.Unary op) { 559 if (opKind(op, VO_SPECIAL)) { 560 if (op == ZOMO) { 561 return blend(broadcast(-1), compare(NE, 0)); 562 } 563 } 564 int opc = opCode(op); 565 return VectorSupport.unaryOp( 566 opc, getClass(), null, float.class, length(), 567 this, null, 568 UN_IMPL.find(op, opc, FloatVector::unaryOperations)); 569 } 570 571 /** 572 * {@inheritDoc} <!--workaround--> 573 */ 574 @Override 575 public abstract 576 FloatVector lanewise(VectorOperators.Unary op, 577 VectorMask<Float> m); 578 @ForceInline 579 final 580 FloatVector lanewiseTemplate(VectorOperators.Unary op, 581 Class<? extends VectorMask<Float>> maskClass, 582 VectorMask<Float> m) { 583 m.check(maskClass, this); 584 if (opKind(op, VO_SPECIAL)) { 585 if (op == ZOMO) { 586 return blend(broadcast(-1), compare(NE, 0, m)); 587 } 588 } 589 int opc = opCode(op); 590 return VectorSupport.unaryOp( 591 opc, getClass(), maskClass, float.class, length(), 592 this, m, 593 UN_IMPL.find(op, opc, FloatVector::unaryOperations)); 594 } 595 596 private static final 597 ImplCache<Unary, UnaryOperation<FloatVector, VectorMask<Float>>> 598 UN_IMPL = new ImplCache<>(Unary.class, FloatVector.class); 599 600 private static UnaryOperation<FloatVector, VectorMask<Float>> unaryOperations(int opc_) { 601 switch (opc_) { 602 case VECTOR_OP_NEG: return (v0, m) -> 603 v0.uOp(m, (i, a) -> (float) -a); 604 case VECTOR_OP_ABS: return (v0, m) -> 605 v0.uOp(m, (i, a) -> (float) Math.abs(a)); 606 case VECTOR_OP_SIN: return (v0, m) -> 607 v0.uOp(m, (i, a) -> (float) Math.sin(a)); 608 case VECTOR_OP_COS: return (v0, m) -> 609 v0.uOp(m, (i, a) -> (float) Math.cos(a)); 610 case VECTOR_OP_TAN: return (v0, m) -> 611 v0.uOp(m, (i, a) -> (float) Math.tan(a)); 612 case VECTOR_OP_ASIN: return (v0, m) -> 613 v0.uOp(m, (i, a) -> (float) Math.asin(a)); 614 case VECTOR_OP_ACOS: return (v0, m) -> 615 v0.uOp(m, (i, a) -> (float) Math.acos(a)); 616 case VECTOR_OP_ATAN: return (v0, m) -> 617 v0.uOp(m, (i, a) -> (float) Math.atan(a)); 618 case VECTOR_OP_EXP: return (v0, m) -> 619 v0.uOp(m, (i, a) -> (float) Math.exp(a)); 620 case VECTOR_OP_LOG: return (v0, m) -> 621 v0.uOp(m, (i, a) -> (float) Math.log(a)); 622 case VECTOR_OP_LOG10: return (v0, m) -> 623 v0.uOp(m, (i, a) -> (float) Math.log10(a)); 624 case VECTOR_OP_SQRT: return (v0, m) -> 625 v0.uOp(m, (i, a) -> (float) Math.sqrt(a)); 626 case VECTOR_OP_CBRT: return (v0, m) -> 627 v0.uOp(m, (i, a) -> (float) Math.cbrt(a)); 628 case VECTOR_OP_SINH: return (v0, m) -> 629 v0.uOp(m, (i, a) -> (float) Math.sinh(a)); 630 case VECTOR_OP_COSH: return (v0, m) -> 631 v0.uOp(m, (i, a) -> (float) Math.cosh(a)); 632 case VECTOR_OP_TANH: return (v0, m) -> 633 v0.uOp(m, (i, a) -> (float) Math.tanh(a)); 634 case VECTOR_OP_EXPM1: return (v0, m) -> 635 v0.uOp(m, (i, a) -> (float) Math.expm1(a)); 636 case VECTOR_OP_LOG1P: return (v0, m) -> 637 v0.uOp(m, (i, a) -> (float) Math.log1p(a)); 638 default: return null; 639 } 640 } 641 642 // Binary lanewise support 643 644 /** 645 * {@inheritDoc} <!--workaround--> 646 * @see #lanewise(VectorOperators.Binary,float) 647 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 648 */ 649 @Override 650 public abstract 651 FloatVector lanewise(VectorOperators.Binary op, 652 Vector<Float> v); 653 @ForceInline 654 final 655 FloatVector lanewiseTemplate(VectorOperators.Binary op, 656 Vector<Float> v) { 657 FloatVector that = (FloatVector) v; 658 that.check(this); 659 660 if (opKind(op, VO_SPECIAL )) { 661 if (op == FIRST_NONZERO) { 662 // FIXME: Support this in the JIT. 663 VectorMask<Integer> thisNZ 664 = this.viewAsIntegralLanes().compare(NE, (int) 0); 665 that = that.blend((float) 0, thisNZ.cast(vspecies())); 666 op = OR_UNCHECKED; 667 // FIXME: Support OR_UNCHECKED on float/double also! 668 return this.viewAsIntegralLanes() 669 .lanewise(op, that.viewAsIntegralLanes()) 670 .viewAsFloatingLanes(); 671 } 672 } 673 674 int opc = opCode(op); 675 return VectorSupport.binaryOp( 676 opc, getClass(), null, float.class, length(), 677 this, that, null, 678 BIN_IMPL.find(op, opc, FloatVector::binaryOperations)); 679 } 680 681 /** 682 * {@inheritDoc} <!--workaround--> 683 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 684 */ 685 @Override 686 public abstract 687 FloatVector lanewise(VectorOperators.Binary op, 688 Vector<Float> v, 689 VectorMask<Float> m); 690 @ForceInline 691 final 692 FloatVector lanewiseTemplate(VectorOperators.Binary op, 693 Class<? extends VectorMask<Float>> maskClass, 694 Vector<Float> v, VectorMask<Float> m) { 695 FloatVector that = (FloatVector) v; 696 that.check(this); 697 m.check(maskClass, this); 698 699 if (opKind(op, VO_SPECIAL )) { 700 if (op == FIRST_NONZERO) { 701 return blend(lanewise(op, v), m); 702 } 703 } 704 705 int opc = opCode(op); 706 return VectorSupport.binaryOp( 707 opc, getClass(), maskClass, float.class, length(), 708 this, that, m, 709 BIN_IMPL.find(op, opc, FloatVector::binaryOperations)); 710 } 711 712 private static final 713 ImplCache<Binary, BinaryOperation<FloatVector, VectorMask<Float>>> 714 BIN_IMPL = new ImplCache<>(Binary.class, FloatVector.class); 715 716 private static BinaryOperation<FloatVector, VectorMask<Float>> binaryOperations(int opc_) { 717 switch (opc_) { 718 case VECTOR_OP_ADD: return (v0, v1, vm) -> 719 v0.bOp(v1, vm, (i, a, b) -> (float)(a + b)); 720 case VECTOR_OP_SUB: return (v0, v1, vm) -> 721 v0.bOp(v1, vm, (i, a, b) -> (float)(a - b)); 722 case VECTOR_OP_MUL: return (v0, v1, vm) -> 723 v0.bOp(v1, vm, (i, a, b) -> (float)(a * b)); 724 case VECTOR_OP_DIV: return (v0, v1, vm) -> 725 v0.bOp(v1, vm, (i, a, b) -> (float)(a / b)); 726 case VECTOR_OP_MAX: return (v0, v1, vm) -> 727 v0.bOp(v1, vm, (i, a, b) -> (float)Math.max(a, b)); 728 case VECTOR_OP_MIN: return (v0, v1, vm) -> 729 v0.bOp(v1, vm, (i, a, b) -> (float)Math.min(a, b)); 730 case VECTOR_OP_OR: return (v0, v1, vm) -> 731 v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b))); 732 case VECTOR_OP_ATAN2: return (v0, v1, vm) -> 733 v0.bOp(v1, vm, (i, a, b) -> (float) Math.atan2(a, b)); 734 case VECTOR_OP_POW: return (v0, v1, vm) -> 735 v0.bOp(v1, vm, (i, a, b) -> (float) Math.pow(a, b)); 736 case VECTOR_OP_HYPOT: return (v0, v1, vm) -> 737 v0.bOp(v1, vm, (i, a, b) -> (float) Math.hypot(a, b)); 738 default: return null; 739 } 740 } 741 742 // FIXME: Maybe all of the public final methods in this file (the 743 // simple ones that just call lanewise) should be pushed down to 744 // the X-VectorBits template. They can't optimize properly at 745 // this level, and must rely on inlining. Does it work? 746 // (If it works, of course keep the code here.) 747 748 /** 749 * Combines the lane values of this vector 750 * with the value of a broadcast scalar. 751 * 752 * This is a lane-wise binary operation which applies 753 * the selected operation to each lane. 754 * The return value will be equal to this expression: 755 * {@code this.lanewise(op, this.broadcast(e))}. 756 * 757 * @param op the operation used to process lane values 758 * @param e the input scalar 759 * @return the result of applying the operation lane-wise 760 * to the two input vectors 761 * @throws UnsupportedOperationException if this vector does 762 * not support the requested operation 763 * @see #lanewise(VectorOperators.Binary,Vector) 764 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 765 */ 766 @ForceInline 767 public final 768 FloatVector lanewise(VectorOperators.Binary op, 769 float e) { 770 return lanewise(op, broadcast(e)); 771 } 772 773 /** 774 * Combines the lane values of this vector 775 * with the value of a broadcast scalar, 776 * with selection of lane elements controlled by a mask. 777 * 778 * This is a masked lane-wise binary operation which applies 779 * the selected operation to each lane. 780 * The return value will be equal to this expression: 781 * {@code this.lanewise(op, this.broadcast(e), m)}. 782 * 783 * @param op the operation used to process lane values 784 * @param e the input scalar 785 * @param m the mask controlling lane selection 786 * @return the result of applying the operation lane-wise 787 * to the input vector and the scalar 788 * @throws UnsupportedOperationException if this vector does 789 * not support the requested operation 790 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 791 * @see #lanewise(VectorOperators.Binary,float) 792 */ 793 @ForceInline 794 public final 795 FloatVector lanewise(VectorOperators.Binary op, 796 float e, 797 VectorMask<Float> m) { 798 return lanewise(op, broadcast(e), m); 799 } 800 801 /** 802 * {@inheritDoc} <!--workaround--> 803 * @apiNote 804 * When working with vector subtypes like {@code FloatVector}, 805 * {@linkplain #lanewise(VectorOperators.Binary,float) 806 * the more strongly typed method} 807 * is typically selected. It can be explicitly selected 808 * using a cast: {@code v.lanewise(op,(float)e)}. 809 * The two expressions will produce numerically identical results. 810 */ 811 @ForceInline 812 public final 813 FloatVector lanewise(VectorOperators.Binary op, 814 long e) { 815 float e1 = (float) e; 816 if ((long)e1 != e) { 817 vspecies().checkValue(e); // for exception 818 } 819 return lanewise(op, e1); 820 } 821 822 /** 823 * {@inheritDoc} <!--workaround--> 824 * @apiNote 825 * When working with vector subtypes like {@code FloatVector}, 826 * {@linkplain #lanewise(VectorOperators.Binary,float,VectorMask) 827 * the more strongly typed method} 828 * is typically selected. It can be explicitly selected 829 * using a cast: {@code v.lanewise(op,(float)e,m)}. 830 * The two expressions will produce numerically identical results. 831 */ 832 @ForceInline 833 public final 834 FloatVector lanewise(VectorOperators.Binary op, 835 long e, VectorMask<Float> m) { 836 float e1 = (float) e; 837 if ((long)e1 != e) { 838 vspecies().checkValue(e); // for exception 839 } 840 return lanewise(op, e1, m); 841 } 842 843 844 // Ternary lanewise support 845 846 // Ternary operators come in eight variations: 847 // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2]) 848 // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask) 849 850 // It is annoying to support all of these variations of masking 851 // and broadcast, but it would be more surprising not to continue 852 // the obvious pattern started by unary and binary. 853 854 /** 855 * {@inheritDoc} <!--workaround--> 856 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 857 * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask) 858 * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask) 859 * @see #lanewise(VectorOperators.Ternary,float,float) 860 * @see #lanewise(VectorOperators.Ternary,Vector,float) 861 * @see #lanewise(VectorOperators.Ternary,float,Vector) 862 */ 863 @Override 864 public abstract 865 FloatVector lanewise(VectorOperators.Ternary op, 866 Vector<Float> v1, 867 Vector<Float> v2); 868 @ForceInline 869 final 870 FloatVector lanewiseTemplate(VectorOperators.Ternary op, 871 Vector<Float> v1, 872 Vector<Float> v2) { 873 FloatVector that = (FloatVector) v1; 874 FloatVector tother = (FloatVector) v2; 875 // It's a word: https://www.dictionary.com/browse/tother 876 // See also Chapter 11 of Dickens, Our Mutual Friend: 877 // "Totherest Governor," replied Mr Riderhood... 878 that.check(this); 879 tother.check(this); 880 int opc = opCode(op); 881 return VectorSupport.ternaryOp( 882 opc, getClass(), null, float.class, length(), 883 this, that, tother, null, 884 TERN_IMPL.find(op, opc, FloatVector::ternaryOperations)); 885 } 886 887 /** 888 * {@inheritDoc} <!--workaround--> 889 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 890 * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask) 891 * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask) 892 */ 893 @Override 894 public abstract 895 FloatVector lanewise(VectorOperators.Ternary op, 896 Vector<Float> v1, 897 Vector<Float> v2, 898 VectorMask<Float> m); 899 @ForceInline 900 final 901 FloatVector lanewiseTemplate(VectorOperators.Ternary op, 902 Class<? extends VectorMask<Float>> maskClass, 903 Vector<Float> v1, 904 Vector<Float> v2, 905 VectorMask<Float> m) { 906 FloatVector that = (FloatVector) v1; 907 FloatVector tother = (FloatVector) v2; 908 // It's a word: https://www.dictionary.com/browse/tother 909 // See also Chapter 11 of Dickens, Our Mutual Friend: 910 // "Totherest Governor," replied Mr Riderhood... 911 that.check(this); 912 tother.check(this); 913 m.check(maskClass, this); 914 915 int opc = opCode(op); 916 return VectorSupport.ternaryOp( 917 opc, getClass(), maskClass, float.class, length(), 918 this, that, tother, m, 919 TERN_IMPL.find(op, opc, FloatVector::ternaryOperations)); 920 } 921 922 private static final 923 ImplCache<Ternary, TernaryOperation<FloatVector, VectorMask<Float>>> 924 TERN_IMPL = new ImplCache<>(Ternary.class, FloatVector.class); 925 926 private static TernaryOperation<FloatVector, VectorMask<Float>> ternaryOperations(int opc_) { 927 switch (opc_) { 928 case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> 929 v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); 930 default: return null; 931 } 932 } 933 934 /** 935 * Combines the lane values of this vector 936 * with the values of two broadcast scalars. 937 * 938 * This is a lane-wise ternary operation which applies 939 * the selected operation to each lane. 940 * The return value will be equal to this expression: 941 * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}. 942 * 943 * @param op the operation used to combine lane values 944 * @param e1 the first input scalar 945 * @param e2 the second input scalar 946 * @return the result of applying the operation lane-wise 947 * to the input vector and the scalars 948 * @throws UnsupportedOperationException if this vector does 949 * not support the requested operation 950 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 951 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 952 */ 953 @ForceInline 954 public final 955 FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2) 956 float e1, 957 float e2) { 958 return lanewise(op, broadcast(e1), broadcast(e2)); 959 } 960 961 /** 962 * Combines the lane values of this vector 963 * with the values of two broadcast scalars, 964 * with selection of lane elements controlled by a mask. 965 * 966 * This is a masked lane-wise ternary operation which applies 967 * the selected operation to each lane. 968 * The return value will be equal to this expression: 969 * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}. 970 * 971 * @param op the operation used to combine lane values 972 * @param e1 the first input scalar 973 * @param e2 the second input scalar 974 * @param m the mask controlling lane selection 975 * @return the result of applying the operation lane-wise 976 * to the input vector and the scalars 977 * @throws UnsupportedOperationException if this vector does 978 * not support the requested operation 979 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 980 * @see #lanewise(VectorOperators.Ternary,float,float) 981 */ 982 @ForceInline 983 public final 984 FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m) 985 float e1, 986 float e2, 987 VectorMask<Float> m) { 988 return lanewise(op, broadcast(e1), broadcast(e2), m); 989 } 990 991 /** 992 * Combines the lane values of this vector 993 * with the values of another vector and a broadcast scalar. 994 * 995 * This is a lane-wise ternary operation which applies 996 * the selected operation to each lane. 997 * The return value will be equal to this expression: 998 * {@code this.lanewise(op, v1, this.broadcast(e2))}. 999 * 1000 * @param op the operation used to combine lane values 1001 * @param v1 the other input vector 1002 * @param e2 the input scalar 1003 * @return the result of applying the operation lane-wise 1004 * to the input vectors and the scalar 1005 * @throws UnsupportedOperationException if this vector does 1006 * not support the requested operation 1007 * @see #lanewise(VectorOperators.Ternary,float,float) 1008 * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask) 1009 */ 1010 @ForceInline 1011 public final 1012 FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2) 1013 Vector<Float> v1, 1014 float e2) { 1015 return lanewise(op, v1, broadcast(e2)); 1016 } 1017 1018 /** 1019 * Combines the lane values of this vector 1020 * with the values of another vector and a broadcast scalar, 1021 * with selection of lane elements controlled by a mask. 1022 * 1023 * This is a masked lane-wise ternary operation which applies 1024 * the selected operation to each lane. 1025 * The return value will be equal to this expression: 1026 * {@code this.lanewise(op, v1, this.broadcast(e2), m)}. 1027 * 1028 * @param op the operation used to combine lane values 1029 * @param v1 the other input vector 1030 * @param e2 the input scalar 1031 * @param m the mask controlling lane selection 1032 * @return the result of applying the operation lane-wise 1033 * to the input vectors and the scalar 1034 * @throws UnsupportedOperationException if this vector does 1035 * not support the requested operation 1036 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 1037 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 1038 * @see #lanewise(VectorOperators.Ternary,Vector,float) 1039 */ 1040 @ForceInline 1041 public final 1042 FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m) 1043 Vector<Float> v1, 1044 float e2, 1045 VectorMask<Float> m) { 1046 return lanewise(op, v1, broadcast(e2), m); 1047 } 1048 1049 /** 1050 * Combines the lane values of this vector 1051 * with the values of another vector and a broadcast scalar. 1052 * 1053 * This is a lane-wise ternary operation which applies 1054 * the selected operation to each lane. 1055 * The return value will be equal to this expression: 1056 * {@code this.lanewise(op, this.broadcast(e1), v2)}. 1057 * 1058 * @param op the operation used to combine lane values 1059 * @param e1 the input scalar 1060 * @param v2 the other input vector 1061 * @return the result of applying the operation lane-wise 1062 * to the input vectors and the scalar 1063 * @throws UnsupportedOperationException if this vector does 1064 * not support the requested operation 1065 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 1066 * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask) 1067 */ 1068 @ForceInline 1069 public final 1070 FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2) 1071 float e1, 1072 Vector<Float> v2) { 1073 return lanewise(op, broadcast(e1), v2); 1074 } 1075 1076 /** 1077 * Combines the lane values of this vector 1078 * with the values of another vector and a broadcast scalar, 1079 * with selection of lane elements controlled by a mask. 1080 * 1081 * This is a masked lane-wise ternary operation which applies 1082 * the selected operation to each lane. 1083 * The return value will be equal to this expression: 1084 * {@code this.lanewise(op, this.broadcast(e1), v2, m)}. 1085 * 1086 * @param op the operation used to combine lane values 1087 * @param e1 the input scalar 1088 * @param v2 the other input vector 1089 * @param m the mask controlling lane selection 1090 * @return the result of applying the operation lane-wise 1091 * to the input vectors and the scalar 1092 * @throws UnsupportedOperationException if this vector does 1093 * not support the requested operation 1094 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 1095 * @see #lanewise(VectorOperators.Ternary,float,Vector) 1096 */ 1097 @ForceInline 1098 public final 1099 FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m) 1100 float e1, 1101 Vector<Float> v2, 1102 VectorMask<Float> m) { 1103 return lanewise(op, broadcast(e1), v2, m); 1104 } 1105 1106 // (Thus endeth the Great and Mighty Ternary Ogdoad.) 1107 // https://en.wikipedia.org/wiki/Ogdoad 1108 1109 /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV 1110 // 1111 // These include masked and non-masked versions. 1112 // This subclass adds broadcast (masked or not). 1113 1114 /** 1115 * {@inheritDoc} <!--workaround--> 1116 * @see #add(float) 1117 */ 1118 @Override 1119 @ForceInline 1120 public final FloatVector add(Vector<Float> v) { 1121 return lanewise(ADD, v); 1122 } 1123 1124 /** 1125 * Adds this vector to the broadcast of an input scalar. 1126 * 1127 * This is a lane-wise binary operation which applies 1128 * the primitive addition operation ({@code +}) to each lane. 1129 * 1130 * This method is also equivalent to the expression 1131 * {@link #lanewise(VectorOperators.Binary,float) 1132 * lanewise}{@code (}{@link VectorOperators#ADD 1133 * ADD}{@code , e)}. 1134 * 1135 * @param e the input scalar 1136 * @return the result of adding each lane of this vector to the scalar 1137 * @see #add(Vector) 1138 * @see #broadcast(float) 1139 * @see #add(float,VectorMask) 1140 * @see VectorOperators#ADD 1141 * @see #lanewise(VectorOperators.Binary,Vector) 1142 * @see #lanewise(VectorOperators.Binary,float) 1143 */ 1144 @ForceInline 1145 public final 1146 FloatVector add(float e) { 1147 return lanewise(ADD, e); 1148 } 1149 1150 /** 1151 * {@inheritDoc} <!--workaround--> 1152 * @see #add(float,VectorMask) 1153 */ 1154 @Override 1155 @ForceInline 1156 public final FloatVector add(Vector<Float> v, 1157 VectorMask<Float> m) { 1158 return lanewise(ADD, v, m); 1159 } 1160 1161 /** 1162 * Adds this vector to the broadcast of an input scalar, 1163 * selecting lane elements controlled by a mask. 1164 * 1165 * This is a masked lane-wise binary operation which applies 1166 * the primitive addition operation ({@code +}) to each lane. 1167 * 1168 * This method is also equivalent to the expression 1169 * {@link #lanewise(VectorOperators.Binary,float,VectorMask) 1170 * lanewise}{@code (}{@link VectorOperators#ADD 1171 * ADD}{@code , s, m)}. 1172 * 1173 * @param e the input scalar 1174 * @param m the mask controlling lane selection 1175 * @return the result of adding each lane of this vector to the scalar 1176 * @see #add(Vector,VectorMask) 1177 * @see #broadcast(float) 1178 * @see #add(float) 1179 * @see VectorOperators#ADD 1180 * @see #lanewise(VectorOperators.Binary,Vector) 1181 * @see #lanewise(VectorOperators.Binary,float) 1182 */ 1183 @ForceInline 1184 public final FloatVector add(float e, 1185 VectorMask<Float> m) { 1186 return lanewise(ADD, e, m); 1187 } 1188 1189 /** 1190 * {@inheritDoc} <!--workaround--> 1191 * @see #sub(float) 1192 */ 1193 @Override 1194 @ForceInline 1195 public final FloatVector sub(Vector<Float> v) { 1196 return lanewise(SUB, v); 1197 } 1198 1199 /** 1200 * Subtracts an input scalar from this vector. 1201 * 1202 * This is a masked lane-wise binary operation which applies 1203 * the primitive subtraction operation ({@code -}) to each lane. 1204 * 1205 * This method is also equivalent to the expression 1206 * {@link #lanewise(VectorOperators.Binary,float) 1207 * lanewise}{@code (}{@link VectorOperators#SUB 1208 * SUB}{@code , e)}. 1209 * 1210 * @param e the input scalar 1211 * @return the result of subtracting the scalar from each lane of this vector 1212 * @see #sub(Vector) 1213 * @see #broadcast(float) 1214 * @see #sub(float,VectorMask) 1215 * @see VectorOperators#SUB 1216 * @see #lanewise(VectorOperators.Binary,Vector) 1217 * @see #lanewise(VectorOperators.Binary,float) 1218 */ 1219 @ForceInline 1220 public final FloatVector sub(float e) { 1221 return lanewise(SUB, e); 1222 } 1223 1224 /** 1225 * {@inheritDoc} <!--workaround--> 1226 * @see #sub(float,VectorMask) 1227 */ 1228 @Override 1229 @ForceInline 1230 public final FloatVector sub(Vector<Float> v, 1231 VectorMask<Float> m) { 1232 return lanewise(SUB, v, m); 1233 } 1234 1235 /** 1236 * Subtracts an input scalar from this vector 1237 * under the control of a mask. 1238 * 1239 * This is a masked lane-wise binary operation which applies 1240 * the primitive subtraction operation ({@code -}) to each lane. 1241 * 1242 * This method is also equivalent to the expression 1243 * {@link #lanewise(VectorOperators.Binary,float,VectorMask) 1244 * lanewise}{@code (}{@link VectorOperators#SUB 1245 * SUB}{@code , s, m)}. 1246 * 1247 * @param e the input scalar 1248 * @param m the mask controlling lane selection 1249 * @return the result of subtracting the scalar from each lane of this vector 1250 * @see #sub(Vector,VectorMask) 1251 * @see #broadcast(float) 1252 * @see #sub(float) 1253 * @see VectorOperators#SUB 1254 * @see #lanewise(VectorOperators.Binary,Vector) 1255 * @see #lanewise(VectorOperators.Binary,float) 1256 */ 1257 @ForceInline 1258 public final FloatVector sub(float e, 1259 VectorMask<Float> m) { 1260 return lanewise(SUB, e, m); 1261 } 1262 1263 /** 1264 * {@inheritDoc} <!--workaround--> 1265 * @see #mul(float) 1266 */ 1267 @Override 1268 @ForceInline 1269 public final FloatVector mul(Vector<Float> v) { 1270 return lanewise(MUL, v); 1271 } 1272 1273 /** 1274 * Multiplies this vector by the broadcast of an input scalar. 1275 * 1276 * This is a lane-wise binary operation which applies 1277 * the primitive multiplication operation ({@code *}) to each lane. 1278 * 1279 * This method is also equivalent to the expression 1280 * {@link #lanewise(VectorOperators.Binary,float) 1281 * lanewise}{@code (}{@link VectorOperators#MUL 1282 * MUL}{@code , e)}. 1283 * 1284 * @param e the input scalar 1285 * @return the result of multiplying this vector by the given scalar 1286 * @see #mul(Vector) 1287 * @see #broadcast(float) 1288 * @see #mul(float,VectorMask) 1289 * @see VectorOperators#MUL 1290 * @see #lanewise(VectorOperators.Binary,Vector) 1291 * @see #lanewise(VectorOperators.Binary,float) 1292 */ 1293 @ForceInline 1294 public final FloatVector mul(float e) { 1295 return lanewise(MUL, e); 1296 } 1297 1298 /** 1299 * {@inheritDoc} <!--workaround--> 1300 * @see #mul(float,VectorMask) 1301 */ 1302 @Override 1303 @ForceInline 1304 public final FloatVector mul(Vector<Float> v, 1305 VectorMask<Float> m) { 1306 return lanewise(MUL, v, m); 1307 } 1308 1309 /** 1310 * Multiplies this vector by the broadcast of an input scalar, 1311 * selecting lane elements controlled by a mask. 1312 * 1313 * This is a masked lane-wise binary operation which applies 1314 * the primitive multiplication operation ({@code *}) to each lane. 1315 * 1316 * This method is also equivalent to the expression 1317 * {@link #lanewise(VectorOperators.Binary,float,VectorMask) 1318 * lanewise}{@code (}{@link VectorOperators#MUL 1319 * MUL}{@code , s, m)}. 1320 * 1321 * @param e the input scalar 1322 * @param m the mask controlling lane selection 1323 * @return the result of muling each lane of this vector to the scalar 1324 * @see #mul(Vector,VectorMask) 1325 * @see #broadcast(float) 1326 * @see #mul(float) 1327 * @see VectorOperators#MUL 1328 * @see #lanewise(VectorOperators.Binary,Vector) 1329 * @see #lanewise(VectorOperators.Binary,float) 1330 */ 1331 @ForceInline 1332 public final FloatVector mul(float e, 1333 VectorMask<Float> m) { 1334 return lanewise(MUL, e, m); 1335 } 1336 1337 /** 1338 * {@inheritDoc} <!--workaround--> 1339 * @apiNote Because the underlying scalar operator is an IEEE 1340 * floating point number, division by zero in fact will 1341 * not throw an exception, but will yield a signed 1342 * infinity or NaN. 1343 */ 1344 @Override 1345 @ForceInline 1346 public final FloatVector div(Vector<Float> v) { 1347 return lanewise(DIV, v); 1348 } 1349 1350 /** 1351 * Divides this vector by the broadcast of an input scalar. 1352 * 1353 * This is a lane-wise binary operation which applies 1354 * the primitive division operation ({@code /}) to each lane. 1355 * 1356 * This method is also equivalent to the expression 1357 * {@link #lanewise(VectorOperators.Binary,float) 1358 * lanewise}{@code (}{@link VectorOperators#DIV 1359 * DIV}{@code , e)}. 1360 * 1361 * @apiNote Because the underlying scalar operator is an IEEE 1362 * floating point number, division by zero in fact will 1363 * not throw an exception, but will yield a signed 1364 * infinity or NaN. 1365 * 1366 * @param e the input scalar 1367 * @return the result of dividing each lane of this vector by the scalar 1368 * @see #div(Vector) 1369 * @see #broadcast(float) 1370 * @see #div(float,VectorMask) 1371 * @see VectorOperators#DIV 1372 * @see #lanewise(VectorOperators.Binary,Vector) 1373 * @see #lanewise(VectorOperators.Binary,float) 1374 */ 1375 @ForceInline 1376 public final FloatVector div(float e) { 1377 return lanewise(DIV, e); 1378 } 1379 1380 /** 1381 * {@inheritDoc} <!--workaround--> 1382 * @see #div(float,VectorMask) 1383 * @apiNote Because the underlying scalar operator is an IEEE 1384 * floating point number, division by zero in fact will 1385 * not throw an exception, but will yield a signed 1386 * infinity or NaN. 1387 */ 1388 @Override 1389 @ForceInline 1390 public final FloatVector div(Vector<Float> v, 1391 VectorMask<Float> m) { 1392 return lanewise(DIV, v, m); 1393 } 1394 1395 /** 1396 * Divides this vector by the broadcast of an input scalar, 1397 * selecting lane elements controlled by a mask. 1398 * 1399 * This is a masked lane-wise binary operation which applies 1400 * the primitive division operation ({@code /}) to each lane. 1401 * 1402 * This method is also equivalent to the expression 1403 * {@link #lanewise(VectorOperators.Binary,float,VectorMask) 1404 * lanewise}{@code (}{@link VectorOperators#DIV 1405 * DIV}{@code , s, m)}. 1406 * 1407 * @apiNote Because the underlying scalar operator is an IEEE 1408 * floating point number, division by zero in fact will 1409 * not throw an exception, but will yield a signed 1410 * infinity or NaN. 1411 * 1412 * @param e the input scalar 1413 * @param m the mask controlling lane selection 1414 * @return the result of dividing each lane of this vector by the scalar 1415 * @see #div(Vector,VectorMask) 1416 * @see #broadcast(float) 1417 * @see #div(float) 1418 * @see VectorOperators#DIV 1419 * @see #lanewise(VectorOperators.Binary,Vector) 1420 * @see #lanewise(VectorOperators.Binary,float) 1421 */ 1422 @ForceInline 1423 public final FloatVector div(float e, 1424 VectorMask<Float> m) { 1425 return lanewise(DIV, e, m); 1426 } 1427 1428 /// END OF FULL-SERVICE BINARY METHODS 1429 1430 /// SECOND-TIER BINARY METHODS 1431 // 1432 // There are no masked versions. 1433 1434 /** 1435 * {@inheritDoc} <!--workaround--> 1436 * @apiNote 1437 * For this method, floating point negative 1438 * zero {@code -0.0} is treated as a value distinct from, and less 1439 * than the default value (positive zero). 1440 */ 1441 @Override 1442 @ForceInline 1443 public final FloatVector min(Vector<Float> v) { 1444 return lanewise(MIN, v); 1445 } 1446 1447 // FIXME: "broadcast of an input scalar" is really wordy. Reduce? 1448 /** 1449 * Computes the smaller of this vector and the broadcast of an input scalar. 1450 * 1451 * This is a lane-wise binary operation which applies the 1452 * operation {@code Math.min()} to each pair of 1453 * corresponding lane values. 1454 * 1455 * This method is also equivalent to the expression 1456 * {@link #lanewise(VectorOperators.Binary,float) 1457 * lanewise}{@code (}{@link VectorOperators#MIN 1458 * MIN}{@code , e)}. 1459 * 1460 * @param e the input scalar 1461 * @return the result of multiplying this vector by the given scalar 1462 * @see #min(Vector) 1463 * @see #broadcast(float) 1464 * @see VectorOperators#MIN 1465 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 1466 * @apiNote 1467 * For this method, floating point negative 1468 * zero {@code -0.0} is treated as a value distinct from, and less 1469 * than the default value (positive zero). 1470 */ 1471 @ForceInline 1472 public final FloatVector min(float e) { 1473 return lanewise(MIN, e); 1474 } 1475 1476 /** 1477 * {@inheritDoc} <!--workaround--> 1478 * @apiNote 1479 * For this method, floating point negative 1480 * zero {@code -0.0} is treated as a value distinct from, and less 1481 * than the default value (positive zero). 1482 */ 1483 @Override 1484 @ForceInline 1485 public final FloatVector max(Vector<Float> v) { 1486 return lanewise(MAX, v); 1487 } 1488 1489 /** 1490 * Computes the larger of this vector and the broadcast of an input scalar. 1491 * 1492 * This is a lane-wise binary operation which applies the 1493 * operation {@code Math.max()} to each pair of 1494 * corresponding lane values. 1495 * 1496 * This method is also equivalent to the expression 1497 * {@link #lanewise(VectorOperators.Binary,float) 1498 * lanewise}{@code (}{@link VectorOperators#MAX 1499 * MAX}{@code , e)}. 1500 * 1501 * @param e the input scalar 1502 * @return the result of multiplying this vector by the given scalar 1503 * @see #max(Vector) 1504 * @see #broadcast(float) 1505 * @see VectorOperators#MAX 1506 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 1507 * @apiNote 1508 * For this method, floating point negative 1509 * zero {@code -0.0} is treated as a value distinct from, and less 1510 * than the default value (positive zero). 1511 */ 1512 @ForceInline 1513 public final FloatVector max(float e) { 1514 return lanewise(MAX, e); 1515 } 1516 1517 1518 // common FP operator: pow 1519 /** 1520 * Raises this vector to the power of a second input vector. 1521 * 1522 * This is a lane-wise binary operation which applies an operation 1523 * conforming to the specification of 1524 * {@link Math#pow Math.pow(a,b)} 1525 * to each pair of corresponding lane values. 1526 * The operation is adapted to cast the operands and the result, 1527 * specifically widening {@code float} operands to {@code double} 1528 * operands and narrowing the {@code double} result to a {@code float} 1529 * result. 1530 * 1531 * This method is also equivalent to the expression 1532 * {@link #lanewise(VectorOperators.Binary,Vector) 1533 * lanewise}{@code (}{@link VectorOperators#POW 1534 * POW}{@code , b)}. 1535 * 1536 * <p> 1537 * This is not a full-service named operation like 1538 * {@link #add(Vector) add}. A masked version of 1539 * this operation is not directly available 1540 * but may be obtained via the masked version of 1541 * {@code lanewise}. 1542 * 1543 * @param b a vector exponent by which to raise this vector 1544 * @return the {@code b}-th power of this vector 1545 * @see #pow(float) 1546 * @see VectorOperators#POW 1547 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1548 */ 1549 @ForceInline 1550 public final FloatVector pow(Vector<Float> b) { 1551 return lanewise(POW, b); 1552 } 1553 1554 /** 1555 * Raises this vector to a scalar power. 1556 * 1557 * This is a lane-wise binary operation which applies an operation 1558 * conforming to the specification of 1559 * {@link Math#pow Math.pow(a,b)} 1560 * to each pair of corresponding lane values. 1561 * The operation is adapted to cast the operands and the result, 1562 * specifically widening {@code float} operands to {@code double} 1563 * operands and narrowing the {@code double} result to a {@code float} 1564 * result. 1565 * 1566 * This method is also equivalent to the expression 1567 * {@link #lanewise(VectorOperators.Binary,Vector) 1568 * lanewise}{@code (}{@link VectorOperators#POW 1569 * POW}{@code , b)}. 1570 * 1571 * @param b a scalar exponent by which to raise this vector 1572 * @return the {@code b}-th power of this vector 1573 * @see #pow(Vector) 1574 * @see VectorOperators#POW 1575 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 1576 */ 1577 @ForceInline 1578 public final FloatVector pow(float b) { 1579 return lanewise(POW, b); 1580 } 1581 1582 /// UNARY METHODS 1583 1584 /** 1585 * {@inheritDoc} <!--workaround--> 1586 */ 1587 @Override 1588 @ForceInline 1589 public final 1590 FloatVector neg() { 1591 return lanewise(NEG); 1592 } 1593 1594 /** 1595 * {@inheritDoc} <!--workaround--> 1596 */ 1597 @Override 1598 @ForceInline 1599 public final 1600 FloatVector abs() { 1601 return lanewise(ABS); 1602 } 1603 1604 1605 // sqrt 1606 /** 1607 * Computes the square root of this vector. 1608 * 1609 * This is a lane-wise unary operation which applies an operation 1610 * conforming to the specification of 1611 * {@link Math#sqrt Math.sqrt(a)} 1612 * to each lane value. 1613 * The operation is adapted to cast the operand and the result, 1614 * specifically widening the {@code float} operand to a {@code double} 1615 * operand and narrowing the {@code double} result to a {@code float} 1616 * result. 1617 * 1618 * This method is also equivalent to the expression 1619 * {@link #lanewise(VectorOperators.Unary) 1620 * lanewise}{@code (}{@link VectorOperators#SQRT 1621 * SQRT}{@code )}. 1622 * 1623 * @return the square root of this vector 1624 * @see VectorOperators#SQRT 1625 * @see #lanewise(VectorOperators.Unary,VectorMask) 1626 */ 1627 @ForceInline 1628 public final FloatVector sqrt() { 1629 return lanewise(SQRT); 1630 } 1631 1632 /// COMPARISONS 1633 1634 /** 1635 * {@inheritDoc} <!--workaround--> 1636 */ 1637 @Override 1638 @ForceInline 1639 public final 1640 VectorMask<Float> eq(Vector<Float> v) { 1641 return compare(EQ, v); 1642 } 1643 1644 /** 1645 * Tests if this vector is equal to an input scalar. 1646 * 1647 * This is a lane-wise binary test operation which applies 1648 * the primitive equals operation ({@code ==}) to each lane. 1649 * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}. 1650 * 1651 * @param e the input scalar 1652 * @return the result mask of testing if this vector 1653 * is equal to {@code e} 1654 * @see #compare(VectorOperators.Comparison,float) 1655 */ 1656 @ForceInline 1657 public final 1658 VectorMask<Float> eq(float e) { 1659 return compare(EQ, e); 1660 } 1661 1662 /** 1663 * {@inheritDoc} <!--workaround--> 1664 */ 1665 @Override 1666 @ForceInline 1667 public final 1668 VectorMask<Float> lt(Vector<Float> v) { 1669 return compare(LT, v); 1670 } 1671 1672 /** 1673 * Tests if this vector is less than an input scalar. 1674 * 1675 * This is a lane-wise binary test operation which applies 1676 * the primitive less than operation ({@code <}) to each lane. 1677 * The result is the same as {@code compare(VectorOperators.LT, e)}. 1678 * 1679 * @param e the input scalar 1680 * @return the mask result of testing if this vector 1681 * is less than the input scalar 1682 * @see #compare(VectorOperators.Comparison,float) 1683 */ 1684 @ForceInline 1685 public final 1686 VectorMask<Float> lt(float e) { 1687 return compare(LT, e); 1688 } 1689 1690 /** 1691 * {@inheritDoc} <!--workaround--> 1692 */ 1693 @Override 1694 public abstract 1695 VectorMask<Float> test(VectorOperators.Test op); 1696 1697 /*package-private*/ 1698 @ForceInline 1699 final 1700 <M extends VectorMask<Float>> 1701 M testTemplate(Class<M> maskType, Test op) { 1702 FloatSpecies vsp = vspecies(); 1703 if (opKind(op, VO_SPECIAL)) { 1704 IntVector bits = this.viewAsIntegralLanes(); 1705 VectorMask<Integer> m; 1706 if (op == IS_DEFAULT) { 1707 m = bits.compare(EQ, (int) 0); 1708 } else if (op == IS_NEGATIVE) { 1709 m = bits.compare(LT, (int) 0); 1710 } 1711 else if (op == IS_FINITE || 1712 op == IS_NAN || 1713 op == IS_INFINITE) { 1714 // first kill the sign: 1715 bits = bits.and(Integer.MAX_VALUE); 1716 // next find the bit pattern for infinity: 1717 int infbits = (int) toBits(Float.POSITIVE_INFINITY); 1718 // now compare: 1719 if (op == IS_FINITE) { 1720 m = bits.compare(LT, infbits); 1721 } else if (op == IS_NAN) { 1722 m = bits.compare(GT, infbits); 1723 } else { 1724 m = bits.compare(EQ, infbits); 1725 } 1726 } 1727 else { 1728 throw new AssertionError(op); 1729 } 1730 return maskType.cast(m.cast(vsp)); 1731 } 1732 int opc = opCode(op); 1733 throw new AssertionError(op); 1734 } 1735 1736 /** 1737 * {@inheritDoc} <!--workaround--> 1738 */ 1739 @Override 1740 public abstract 1741 VectorMask<Float> test(VectorOperators.Test op, 1742 VectorMask<Float> m); 1743 1744 /*package-private*/ 1745 @ForceInline 1746 final 1747 <M extends VectorMask<Float>> 1748 M testTemplate(Class<M> maskType, Test op, M mask) { 1749 FloatSpecies vsp = vspecies(); 1750 mask.check(maskType, this); 1751 if (opKind(op, VO_SPECIAL)) { 1752 IntVector bits = this.viewAsIntegralLanes(); 1753 VectorMask<Integer> m = mask.cast(IntVector.species(shape())); 1754 if (op == IS_DEFAULT) { 1755 m = bits.compare(EQ, (int) 0, m); 1756 } else if (op == IS_NEGATIVE) { 1757 m = bits.compare(LT, (int) 0, m); 1758 } 1759 else if (op == IS_FINITE || 1760 op == IS_NAN || 1761 op == IS_INFINITE) { 1762 // first kill the sign: 1763 bits = bits.and(Integer.MAX_VALUE); 1764 // next find the bit pattern for infinity: 1765 int infbits = (int) toBits(Float.POSITIVE_INFINITY); 1766 // now compare: 1767 if (op == IS_FINITE) { 1768 m = bits.compare(LT, infbits, m); 1769 } else if (op == IS_NAN) { 1770 m = bits.compare(GT, infbits, m); 1771 } else { 1772 m = bits.compare(EQ, infbits, m); 1773 } 1774 } 1775 else { 1776 throw new AssertionError(op); 1777 } 1778 return maskType.cast(m.cast(vsp)); 1779 } 1780 int opc = opCode(op); 1781 throw new AssertionError(op); 1782 } 1783 1784 /** 1785 * {@inheritDoc} <!--workaround--> 1786 */ 1787 @Override 1788 public abstract 1789 VectorMask<Float> compare(VectorOperators.Comparison op, Vector<Float> v); 1790 1791 /*package-private*/ 1792 @ForceInline 1793 final 1794 <M extends VectorMask<Float>> 1795 M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v) { 1796 FloatVector that = (FloatVector) v; 1797 that.check(this); 1798 int opc = opCode(op); 1799 return VectorSupport.compare( 1800 opc, getClass(), maskType, float.class, length(), 1801 this, that, null, 1802 (cond, v0, v1, m1) -> { 1803 AbstractMask<Float> m 1804 = v0.bTest(cond, v1, (cond_, i, a, b) 1805 -> compareWithOp(cond, a, b)); 1806 @SuppressWarnings("unchecked") 1807 M m2 = (M) m; 1808 return m2; 1809 }); 1810 } 1811 1812 /*package-private*/ 1813 @ForceInline 1814 final 1815 <M extends VectorMask<Float>> 1816 M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v, M m) { 1817 FloatVector that = (FloatVector) v; 1818 that.check(this); 1819 m.check(maskType, this); 1820 int opc = opCode(op); 1821 return VectorSupport.compare( 1822 opc, getClass(), maskType, float.class, length(), 1823 this, that, m, 1824 (cond, v0, v1, m1) -> { 1825 AbstractMask<Float> cmpM 1826 = v0.bTest(cond, v1, (cond_, i, a, b) 1827 -> compareWithOp(cond, a, b)); 1828 @SuppressWarnings("unchecked") 1829 M m2 = (M) cmpM.and(m1); 1830 return m2; 1831 }); 1832 } 1833 1834 @ForceInline 1835 private static boolean compareWithOp(int cond, float a, float b) { 1836 return switch (cond) { 1837 case BT_eq -> a == b; 1838 case BT_ne -> a != b; 1839 case BT_lt -> a < b; 1840 case BT_le -> a <= b; 1841 case BT_gt -> a > b; 1842 case BT_ge -> a >= b; 1843 default -> throw new AssertionError(); 1844 }; 1845 } 1846 1847 /** 1848 * Tests this vector by comparing it with an input scalar, 1849 * according to the given comparison operation. 1850 * 1851 * This is a lane-wise binary test operation which applies 1852 * the comparison operation to each lane. 1853 * <p> 1854 * The result is the same as 1855 * {@code compare(op, broadcast(species(), e))}. 1856 * That is, the scalar may be regarded as broadcast to 1857 * a vector of the same species, and then compared 1858 * against the original vector, using the selected 1859 * comparison operation. 1860 * 1861 * @param op the operation used to compare lane values 1862 * @param e the input scalar 1863 * @return the mask result of testing lane-wise if this vector 1864 * compares to the input, according to the selected 1865 * comparison operator 1866 * @see FloatVector#compare(VectorOperators.Comparison,Vector) 1867 * @see #eq(float) 1868 * @see #lt(float) 1869 */ 1870 public abstract 1871 VectorMask<Float> compare(Comparison op, float e); 1872 1873 /*package-private*/ 1874 @ForceInline 1875 final 1876 <M extends VectorMask<Float>> 1877 M compareTemplate(Class<M> maskType, Comparison op, float e) { 1878 return compareTemplate(maskType, op, broadcast(e)); 1879 } 1880 1881 /** 1882 * Tests this vector by comparing it with an input scalar, 1883 * according to the given comparison operation, 1884 * in lanes selected by a mask. 1885 * 1886 * This is a masked lane-wise binary test operation which applies 1887 * to each pair of corresponding lane values. 1888 * 1889 * The returned result is equal to the expression 1890 * {@code compare(op,s).and(m)}. 1891 * 1892 * @param op the operation used to compare lane values 1893 * @param e the input scalar 1894 * @param m the mask controlling lane selection 1895 * @return the mask result of testing lane-wise if this vector 1896 * compares to the input, according to the selected 1897 * comparison operator, 1898 * and only in the lanes selected by the mask 1899 * @see FloatVector#compare(VectorOperators.Comparison,Vector,VectorMask) 1900 */ 1901 @ForceInline 1902 public final VectorMask<Float> compare(VectorOperators.Comparison op, 1903 float e, 1904 VectorMask<Float> m) { 1905 return compare(op, broadcast(e), m); 1906 } 1907 1908 /** 1909 * {@inheritDoc} <!--workaround--> 1910 */ 1911 @Override 1912 public abstract 1913 VectorMask<Float> compare(Comparison op, long e); 1914 1915 /*package-private*/ 1916 @ForceInline 1917 final 1918 <M extends VectorMask<Float>> 1919 M compareTemplate(Class<M> maskType, Comparison op, long e) { 1920 return compareTemplate(maskType, op, broadcast(e)); 1921 } 1922 1923 /** 1924 * {@inheritDoc} <!--workaround--> 1925 */ 1926 @Override 1927 @ForceInline 1928 public final 1929 VectorMask<Float> compare(Comparison op, long e, VectorMask<Float> m) { 1930 return compare(op, broadcast(e), m); 1931 } 1932 1933 1934 1935 /** 1936 * {@inheritDoc} <!--workaround--> 1937 */ 1938 @Override public abstract 1939 FloatVector blend(Vector<Float> v, VectorMask<Float> m); 1940 1941 /*package-private*/ 1942 @ForceInline 1943 final 1944 <M extends VectorMask<Float>> 1945 FloatVector 1946 blendTemplate(Class<M> maskType, FloatVector v, M m) { 1947 v.check(this); 1948 return VectorSupport.blend( 1949 getClass(), maskType, float.class, length(), 1950 this, v, m, 1951 (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b)); 1952 } 1953 1954 /** 1955 * {@inheritDoc} <!--workaround--> 1956 */ 1957 @Override public abstract FloatVector addIndex(int scale); 1958 1959 /*package-private*/ 1960 @ForceInline 1961 final FloatVector addIndexTemplate(int scale) { 1962 FloatSpecies vsp = vspecies(); 1963 // make sure VLENGTH*scale doesn't overflow: 1964 vsp.checkScale(scale); 1965 return VectorSupport.indexVector( 1966 getClass(), float.class, length(), 1967 this, scale, vsp, 1968 (v, scale_, s) 1969 -> { 1970 // If the platform doesn't support an INDEX 1971 // instruction directly, load IOTA from memory 1972 // and multiply. 1973 FloatVector iota = s.iota(); 1974 float sc = (float) scale_; 1975 return v.add(sc == 1 ? iota : iota.mul(sc)); 1976 }); 1977 } 1978 1979 /** 1980 * Replaces selected lanes of this vector with 1981 * a scalar value 1982 * under the control of a mask. 1983 * 1984 * This is a masked lane-wise binary operation which 1985 * selects each lane value from one or the other input. 1986 * 1987 * The returned result is equal to the expression 1988 * {@code blend(broadcast(e),m)}. 1989 * 1990 * @param e the input scalar, containing the replacement lane value 1991 * @param m the mask controlling lane selection of the scalar 1992 * @return the result of blending the lane elements of this vector with 1993 * the scalar value 1994 */ 1995 @ForceInline 1996 public final FloatVector blend(float e, 1997 VectorMask<Float> m) { 1998 return blend(broadcast(e), m); 1999 } 2000 2001 /** 2002 * Replaces selected lanes of this vector with 2003 * a scalar value 2004 * under the control of a mask. 2005 * 2006 * This is a masked lane-wise binary operation which 2007 * selects each lane value from one or the other input. 2008 * 2009 * The returned result is equal to the expression 2010 * {@code blend(broadcast(e),m)}. 2011 * 2012 * @param e the input scalar, containing the replacement lane value 2013 * @param m the mask controlling lane selection of the scalar 2014 * @return the result of blending the lane elements of this vector with 2015 * the scalar value 2016 */ 2017 @ForceInline 2018 public final FloatVector blend(long e, 2019 VectorMask<Float> m) { 2020 return blend(broadcast(e), m); 2021 } 2022 2023 /** 2024 * {@inheritDoc} <!--workaround--> 2025 */ 2026 @Override 2027 public abstract 2028 FloatVector slice(int origin, Vector<Float> v1); 2029 2030 /*package-private*/ 2031 final 2032 @ForceInline 2033 FloatVector sliceTemplate(int origin, Vector<Float> v1) { 2034 FloatVector that = (FloatVector) v1; 2035 that.check(this); 2036 Objects.checkIndex(origin, length() + 1); 2037 VectorShuffle<Float> iota = iotaShuffle(); 2038 VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin)))); 2039 iota = iotaShuffle(origin, 1, true); 2040 return that.rearrange(iota).blend(this.rearrange(iota), blendMask); 2041 } 2042 2043 /** 2044 * {@inheritDoc} <!--workaround--> 2045 */ 2046 @Override 2047 @ForceInline 2048 public final 2049 FloatVector slice(int origin, 2050 Vector<Float> w, 2051 VectorMask<Float> m) { 2052 return broadcast(0).blend(slice(origin, w), m); 2053 } 2054 2055 /** 2056 * {@inheritDoc} <!--workaround--> 2057 */ 2058 @Override 2059 public abstract 2060 FloatVector slice(int origin); 2061 2062 /*package-private*/ 2063 final 2064 @ForceInline 2065 FloatVector sliceTemplate(int origin) { 2066 Objects.checkIndex(origin, length() + 1); 2067 VectorShuffle<Float> iota = iotaShuffle(); 2068 VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin)))); 2069 iota = iotaShuffle(origin, 1, true); 2070 return vspecies().zero().blend(this.rearrange(iota), blendMask); 2071 } 2072 2073 /** 2074 * {@inheritDoc} <!--workaround--> 2075 */ 2076 @Override 2077 public abstract 2078 FloatVector unslice(int origin, Vector<Float> w, int part); 2079 2080 /*package-private*/ 2081 final 2082 @ForceInline 2083 FloatVector 2084 unsliceTemplate(int origin, Vector<Float> w, int part) { 2085 FloatVector that = (FloatVector) w; 2086 that.check(this); 2087 Objects.checkIndex(origin, length() + 1); 2088 VectorShuffle<Float> iota = iotaShuffle(); 2089 VectorMask<Float> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, 2090 (broadcast((float)(origin)))); 2091 iota = iotaShuffle(-origin, 1, true); 2092 return that.blend(this.rearrange(iota), blendMask); 2093 } 2094 2095 /*package-private*/ 2096 final 2097 @ForceInline 2098 <M extends VectorMask<Float>> 2099 FloatVector 2100 unsliceTemplate(Class<M> maskType, int origin, Vector<Float> w, int part, M m) { 2101 FloatVector that = (FloatVector) w; 2102 that.check(this); 2103 FloatVector slice = that.sliceTemplate(origin, that); 2104 slice = slice.blendTemplate(maskType, this, m); 2105 return slice.unsliceTemplate(origin, w, part); 2106 } 2107 2108 /** 2109 * {@inheritDoc} <!--workaround--> 2110 */ 2111 @Override 2112 public abstract 2113 FloatVector unslice(int origin, Vector<Float> w, int part, VectorMask<Float> m); 2114 2115 /** 2116 * {@inheritDoc} <!--workaround--> 2117 */ 2118 @Override 2119 public abstract 2120 FloatVector unslice(int origin); 2121 2122 /*package-private*/ 2123 final 2124 @ForceInline 2125 FloatVector 2126 unsliceTemplate(int origin) { 2127 Objects.checkIndex(origin, length() + 1); 2128 VectorShuffle<Float> iota = iotaShuffle(); 2129 VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.GE, 2130 (broadcast((float)(origin)))); 2131 iota = iotaShuffle(-origin, 1, true); 2132 return vspecies().zero().blend(this.rearrange(iota), blendMask); 2133 } 2134 2135 private ArrayIndexOutOfBoundsException 2136 wrongPartForSlice(int part) { 2137 String msg = String.format("bad part number %d for slice operation", 2138 part); 2139 return new ArrayIndexOutOfBoundsException(msg); 2140 } 2141 2142 /** 2143 * {@inheritDoc} <!--workaround--> 2144 */ 2145 @Override 2146 public abstract 2147 FloatVector rearrange(VectorShuffle<Float> m); 2148 2149 /*package-private*/ 2150 @ForceInline 2151 final 2152 <S extends VectorShuffle<Float>> 2153 FloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) { 2154 shuffle.checkIndexes(); 2155 return VectorSupport.rearrangeOp( 2156 getClass(), shuffletype, null, float.class, length(), 2157 this, shuffle, null, 2158 (v1, s_, m_) -> v1.uOp((i, a) -> { 2159 int ei = s_.laneSource(i); 2160 return v1.lane(ei); 2161 })); 2162 } 2163 2164 /** 2165 * {@inheritDoc} <!--workaround--> 2166 */ 2167 @Override 2168 public abstract 2169 FloatVector rearrange(VectorShuffle<Float> s, 2170 VectorMask<Float> m); 2171 2172 /*package-private*/ 2173 @ForceInline 2174 final 2175 <S extends VectorShuffle<Float>, M extends VectorMask<Float>> 2176 FloatVector rearrangeTemplate(Class<S> shuffletype, 2177 Class<M> masktype, 2178 S shuffle, 2179 M m) { 2180 2181 m.check(masktype, this); 2182 VectorMask<Float> valid = shuffle.laneIsValid(); 2183 if (m.andNot(valid).anyTrue()) { 2184 shuffle.checkIndexes(); 2185 throw new AssertionError(); 2186 } 2187 return VectorSupport.rearrangeOp( 2188 getClass(), shuffletype, masktype, float.class, length(), 2189 this, shuffle, m, 2190 (v1, s_, m_) -> v1.uOp((i, a) -> { 2191 int ei = s_.laneSource(i); 2192 return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); 2193 })); 2194 } 2195 2196 /** 2197 * {@inheritDoc} <!--workaround--> 2198 */ 2199 @Override 2200 public abstract 2201 FloatVector rearrange(VectorShuffle<Float> s, 2202 Vector<Float> v); 2203 2204 /*package-private*/ 2205 @ForceInline 2206 final 2207 <S extends VectorShuffle<Float>> 2208 FloatVector rearrangeTemplate(Class<S> shuffletype, 2209 S shuffle, 2210 FloatVector v) { 2211 VectorMask<Float> valid = shuffle.laneIsValid(); 2212 @SuppressWarnings("unchecked") 2213 S ws = (S) shuffle.wrapIndexes(); 2214 FloatVector r0 = 2215 VectorSupport.rearrangeOp( 2216 getClass(), shuffletype, null, float.class, length(), 2217 this, ws, null, 2218 (v0, s_, m_) -> v0.uOp((i, a) -> { 2219 int ei = s_.laneSource(i); 2220 return v0.lane(ei); 2221 })); 2222 FloatVector r1 = 2223 VectorSupport.rearrangeOp( 2224 getClass(), shuffletype, null, float.class, length(), 2225 v, ws, null, 2226 (v1, s_, m_) -> v1.uOp((i, a) -> { 2227 int ei = s_.laneSource(i); 2228 return v1.lane(ei); 2229 })); 2230 return r1.blend(r0, valid); 2231 } 2232 2233 @ForceInline 2234 private final 2235 VectorShuffle<Float> toShuffle0(FloatSpecies dsp) { 2236 float[] a = toArray(); 2237 int[] sa = new int[a.length]; 2238 for (int i = 0; i < a.length; i++) { 2239 sa[i] = (int) a[i]; 2240 } 2241 return VectorShuffle.fromArray(dsp, sa, 0); 2242 } 2243 2244 /*package-private*/ 2245 @ForceInline 2246 final 2247 VectorShuffle<Float> toShuffleTemplate(Class<?> shuffleType) { 2248 FloatSpecies vsp = vspecies(); 2249 return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, 2250 getClass(), float.class, length(), 2251 shuffleType, byte.class, length(), 2252 this, vsp, 2253 FloatVector::toShuffle0); 2254 } 2255 2256 /** 2257 * {@inheritDoc} <!--workaround--> 2258 */ 2259 @Override 2260 public abstract 2261 FloatVector selectFrom(Vector<Float> v); 2262 2263 /*package-private*/ 2264 @ForceInline 2265 final FloatVector selectFromTemplate(FloatVector v) { 2266 return v.rearrange(this.toShuffle()); 2267 } 2268 2269 /** 2270 * {@inheritDoc} <!--workaround--> 2271 */ 2272 @Override 2273 public abstract 2274 FloatVector selectFrom(Vector<Float> s, VectorMask<Float> m); 2275 2276 /*package-private*/ 2277 @ForceInline 2278 final FloatVector selectFromTemplate(FloatVector v, 2279 AbstractMask<Float> m) { 2280 return v.rearrange(this.toShuffle(), m); 2281 } 2282 2283 /// Ternary operations 2284 2285 2286 /** 2287 * Multiplies this vector by a second input vector, and sums 2288 * the result with a third. 2289 * 2290 * Extended precision is used for the intermediate result, 2291 * avoiding possible loss of precision from rounding once 2292 * for each of the two operations. 2293 * The result is numerically close to {@code this.mul(b).add(c)}, 2294 * and is typically closer to the true mathematical result. 2295 * 2296 * This is a lane-wise ternary operation which applies an operation 2297 * conforming to the specification of 2298 * {@link Math#fma(float,float,float) Math.fma(a,b,c)} 2299 * to each lane. 2300 * The operation is adapted to cast the operands and the result, 2301 * specifically widening {@code float} operands to {@code double} 2302 * operands and narrowing the {@code double} result to a {@code float} 2303 * result. 2304 * 2305 * This method is also equivalent to the expression 2306 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2307 * lanewise}{@code (}{@link VectorOperators#FMA 2308 * FMA}{@code , b, c)}. 2309 * 2310 * @param b the second input vector, supplying multiplier values 2311 * @param c the third input vector, supplying addend values 2312 * @return the product of this vector and the second input vector 2313 * summed with the third input vector, using extended precision 2314 * for the intermediate result 2315 * @see #fma(float,float) 2316 * @see VectorOperators#FMA 2317 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 2318 */ 2319 @ForceInline 2320 public final 2321 FloatVector fma(Vector<Float> b, Vector<Float> c) { 2322 return lanewise(FMA, b, c); 2323 } 2324 2325 /** 2326 * Multiplies this vector by a scalar multiplier, and sums 2327 * the result with a scalar addend. 2328 * 2329 * Extended precision is used for the intermediate result, 2330 * avoiding possible loss of precision from rounding once 2331 * for each of the two operations. 2332 * The result is numerically close to {@code this.mul(b).add(c)}, 2333 * and is typically closer to the true mathematical result. 2334 * 2335 * This is a lane-wise ternary operation which applies an operation 2336 * conforming to the specification of 2337 * {@link Math#fma(float,float,float) Math.fma(a,b,c)} 2338 * to each lane. 2339 * The operation is adapted to cast the operands and the result, 2340 * specifically widening {@code float} operands to {@code double} 2341 * operands and narrowing the {@code double} result to a {@code float} 2342 * result. 2343 * 2344 * This method is also equivalent to the expression 2345 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2346 * lanewise}{@code (}{@link VectorOperators#FMA 2347 * FMA}{@code , b, c)}. 2348 * 2349 * @param b the scalar multiplier 2350 * @param c the scalar addend 2351 * @return the product of this vector and the scalar multiplier 2352 * summed with scalar addend, using extended precision 2353 * for the intermediate result 2354 * @see #fma(Vector,Vector) 2355 * @see VectorOperators#FMA 2356 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 2357 */ 2358 @ForceInline 2359 public final 2360 FloatVector fma(float b, float c) { 2361 return lanewise(FMA, b, c); 2362 } 2363 2364 // Don't bother with (Vector,float) and (float,Vector) overloadings. 2365 2366 // Type specific horizontal reductions 2367 2368 /** 2369 * Returns a value accumulated from all the lanes of this vector. 2370 * 2371 * This is an associative cross-lane reduction operation which 2372 * applies the specified operation to all the lane elements. 2373 * <p> 2374 * A few reduction operations do not support arbitrary reordering 2375 * of their operands, yet are included here because of their 2376 * usefulness. 2377 * <ul> 2378 * <li> 2379 * In the case of {@code FIRST_NONZERO}, the reduction returns 2380 * the value from the lowest-numbered non-zero lane. 2381 * (As with {@code MAX} and {@code MIN}, floating point negative 2382 * zero {@code -0.0} is treated as a value distinct from 2383 * the default value, positive zero. So a first-nonzero lane reduction 2384 * might return {@code -0.0} even in the presence of non-zero 2385 * lane values.) 2386 * <li> 2387 * In the case of {@code ADD} and {@code MUL}, the 2388 * precise result will reflect the choice of an arbitrary order 2389 * of operations, which may even vary over time. 2390 * For further details see the section 2391 * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>. 2392 * <li> 2393 * All other reduction operations are fully commutative and 2394 * associative. The implementation can choose any order of 2395 * processing, yet it will always produce the same result. 2396 * </ul> 2397 * 2398 * @param op the operation used to combine lane values 2399 * @return the accumulated result 2400 * @throws UnsupportedOperationException if this vector does 2401 * not support the requested operation 2402 * @see #reduceLanes(VectorOperators.Associative,VectorMask) 2403 * @see #add(Vector) 2404 * @see #mul(Vector) 2405 * @see #min(Vector) 2406 * @see #max(Vector) 2407 * @see VectorOperators#FIRST_NONZERO 2408 */ 2409 public abstract float reduceLanes(VectorOperators.Associative op); 2410 2411 /** 2412 * Returns a value accumulated from selected lanes of this vector, 2413 * controlled by a mask. 2414 * 2415 * This is an associative cross-lane reduction operation which 2416 * applies the specified operation to the selected lane elements. 2417 * <p> 2418 * If no elements are selected, an operation-specific identity 2419 * value is returned. 2420 * <ul> 2421 * <li> 2422 * If the operation is 2423 * {@code ADD} 2424 * or {@code FIRST_NONZERO}, 2425 * then the identity value is positive zero, the default {@code float} value. 2426 * <li> 2427 * If the operation is {@code MUL}, 2428 * then the identity value is one. 2429 * <li> 2430 * If the operation is {@code MAX}, 2431 * then the identity value is {@code Float.NEGATIVE_INFINITY}. 2432 * <li> 2433 * If the operation is {@code MIN}, 2434 * then the identity value is {@code Float.POSITIVE_INFINITY}. 2435 * </ul> 2436 * <p> 2437 * A few reduction operations do not support arbitrary reordering 2438 * of their operands, yet are included here because of their 2439 * usefulness. 2440 * <ul> 2441 * <li> 2442 * In the case of {@code FIRST_NONZERO}, the reduction returns 2443 * the value from the lowest-numbered non-zero lane. 2444 * (As with {@code MAX} and {@code MIN}, floating point negative 2445 * zero {@code -0.0} is treated as a value distinct from 2446 * the default value, positive zero. So a first-nonzero lane reduction 2447 * might return {@code -0.0} even in the presence of non-zero 2448 * lane values.) 2449 * <li> 2450 * In the case of {@code ADD} and {@code MUL}, the 2451 * precise result will reflect the choice of an arbitrary order 2452 * of operations, which may even vary over time. 2453 * For further details see the section 2454 * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>. 2455 * <li> 2456 * All other reduction operations are fully commutative and 2457 * associative. The implementation can choose any order of 2458 * processing, yet it will always produce the same result. 2459 * </ul> 2460 * 2461 * @param op the operation used to combine lane values 2462 * @param m the mask controlling lane selection 2463 * @return the reduced result accumulated from the selected lane values 2464 * @throws UnsupportedOperationException if this vector does 2465 * not support the requested operation 2466 * @see #reduceLanes(VectorOperators.Associative) 2467 */ 2468 public abstract float reduceLanes(VectorOperators.Associative op, 2469 VectorMask<Float> m); 2470 2471 /*package-private*/ 2472 @ForceInline 2473 final 2474 float reduceLanesTemplate(VectorOperators.Associative op, 2475 Class<? extends VectorMask<Float>> maskClass, 2476 VectorMask<Float> m) { 2477 m.check(maskClass, this); 2478 if (op == FIRST_NONZERO) { 2479 // FIXME: The JIT should handle this. 2480 FloatVector v = broadcast((float) 0).blend(this, m); 2481 return v.reduceLanesTemplate(op); 2482 } 2483 int opc = opCode(op); 2484 return fromBits(VectorSupport.reductionCoerced( 2485 opc, getClass(), maskClass, float.class, length(), 2486 this, m, 2487 REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations))); 2488 } 2489 2490 /*package-private*/ 2491 @ForceInline 2492 final 2493 float reduceLanesTemplate(VectorOperators.Associative op) { 2494 if (op == FIRST_NONZERO) { 2495 // FIXME: The JIT should handle this. 2496 VectorMask<Integer> thisNZ 2497 = this.viewAsIntegralLanes().compare(NE, (int) 0); 2498 int ft = thisNZ.firstTrue(); 2499 return ft < length() ? this.lane(ft) : (float) 0; 2500 } 2501 int opc = opCode(op); 2502 return fromBits(VectorSupport.reductionCoerced( 2503 opc, getClass(), null, float.class, length(), 2504 this, null, 2505 REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations))); 2506 } 2507 2508 private static final 2509 ImplCache<Associative, ReductionOperation<FloatVector, VectorMask<Float>>> 2510 REDUCE_IMPL = new ImplCache<>(Associative.class, FloatVector.class); 2511 2512 private static ReductionOperation<FloatVector, VectorMask<Float>> reductionOperations(int opc_) { 2513 switch (opc_) { 2514 case VECTOR_OP_ADD: return (v, m) -> 2515 toBits(v.rOp((float)0, m, (i, a, b) -> (float)(a + b))); 2516 case VECTOR_OP_MUL: return (v, m) -> 2517 toBits(v.rOp((float)1, m, (i, a, b) -> (float)(a * b))); 2518 case VECTOR_OP_MIN: return (v, m) -> 2519 toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (float) Math.min(a, b))); 2520 case VECTOR_OP_MAX: return (v, m) -> 2521 toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (float) Math.max(a, b))); 2522 default: return null; 2523 } 2524 } 2525 2526 private static final float MIN_OR_INF = Float.NEGATIVE_INFINITY; 2527 private static final float MAX_OR_INF = Float.POSITIVE_INFINITY; 2528 2529 public @Override abstract long reduceLanesToLong(VectorOperators.Associative op); 2530 public @Override abstract long reduceLanesToLong(VectorOperators.Associative op, 2531 VectorMask<Float> m); 2532 2533 // Type specific accessors 2534 2535 /** 2536 * Gets the lane element at lane index {@code i} 2537 * 2538 * @param i the lane index 2539 * @return the lane element at lane index {@code i} 2540 * @throws IllegalArgumentException if the index is is out of range 2541 * ({@code < 0 || >= length()}) 2542 */ 2543 public abstract float lane(int i); 2544 2545 /** 2546 * Replaces the lane element of this vector at lane index {@code i} with 2547 * value {@code e}. 2548 * 2549 * This is a cross-lane operation and behaves as if it returns the result 2550 * of blending this vector with an input vector that is the result of 2551 * broadcasting {@code e} and a mask that has only one lane set at lane 2552 * index {@code i}. 2553 * 2554 * @param i the lane index of the lane element to be replaced 2555 * @param e the value to be placed 2556 * @return the result of replacing the lane element of this vector at lane 2557 * index {@code i} with value {@code e}. 2558 * @throws IllegalArgumentException if the index is is out of range 2559 * ({@code < 0 || >= length()}) 2560 */ 2561 public abstract FloatVector withLane(int i, float e); 2562 2563 // Memory load operations 2564 2565 /** 2566 * Returns an array of type {@code float[]} 2567 * containing all the lane values. 2568 * The array length is the same as the vector length. 2569 * The array elements are stored in lane order. 2570 * <p> 2571 * This method behaves as if it stores 2572 * this vector into an allocated array 2573 * (using {@link #intoArray(float[], int) intoArray}) 2574 * and returns the array as follows: 2575 * <pre>{@code 2576 * float[] a = new float[this.length()]; 2577 * this.intoArray(a, 0); 2578 * return a; 2579 * }</pre> 2580 * 2581 * @return an array containing the lane values of this vector 2582 */ 2583 @ForceInline 2584 @Override 2585 public final float[] toArray() { 2586 float[] a = new float[vspecies().laneCount()]; 2587 intoArray(a, 0); 2588 return a; 2589 } 2590 2591 /** {@inheritDoc} <!--workaround--> 2592 */ 2593 @ForceInline 2594 @Override 2595 public final int[] toIntArray() { 2596 float[] a = toArray(); 2597 int[] res = new int[a.length]; 2598 for (int i = 0; i < a.length; i++) { 2599 float e = a[i]; 2600 res[i] = (int) FloatSpecies.toIntegralChecked(e, true); 2601 } 2602 return res; 2603 } 2604 2605 /** {@inheritDoc} <!--workaround--> 2606 */ 2607 @ForceInline 2608 @Override 2609 public final long[] toLongArray() { 2610 float[] a = toArray(); 2611 long[] res = new long[a.length]; 2612 for (int i = 0; i < a.length; i++) { 2613 float e = a[i]; 2614 res[i] = FloatSpecies.toIntegralChecked(e, false); 2615 } 2616 return res; 2617 } 2618 2619 /** {@inheritDoc} <!--workaround--> 2620 * @implNote 2621 * When this method is used on used on vectors 2622 * of type {@code FloatVector}, 2623 * there will be no loss of precision. 2624 */ 2625 @ForceInline 2626 @Override 2627 public final double[] toDoubleArray() { 2628 float[] a = toArray(); 2629 double[] res = new double[a.length]; 2630 for (int i = 0; i < a.length; i++) { 2631 res[i] = (double) a[i]; 2632 } 2633 return res; 2634 } 2635 2636 /** 2637 * Loads a vector from a byte array starting at an offset. 2638 * Bytes are composed into primitive lane elements according 2639 * to the specified byte order. 2640 * The vector is arranged into lanes according to 2641 * <a href="Vector.html#lane-order">memory ordering</a>. 2642 * <p> 2643 * This method behaves as if it returns the result of calling 2644 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2645 * fromByteBuffer()} as follows: 2646 * <pre>{@code 2647 * var bb = ByteBuffer.wrap(a); 2648 * var m = species.maskAll(true); 2649 * return fromByteBuffer(species, bb, offset, bo, m); 2650 * }</pre> 2651 * 2652 * @param species species of desired vector 2653 * @param a the byte array 2654 * @param offset the offset into the array 2655 * @param bo the intended byte order 2656 * @return a vector loaded from a byte array 2657 * @throws IndexOutOfBoundsException 2658 * if {@code offset+N*ESIZE < 0} 2659 * or {@code offset+(N+1)*ESIZE > a.length} 2660 * for any lane {@code N} in the vector 2661 */ 2662 @ForceInline 2663 public static 2664 FloatVector fromByteArray(VectorSpecies<Float> species, 2665 byte[] a, int offset, 2666 ByteOrder bo) { 2667 offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length); 2668 FloatSpecies vsp = (FloatSpecies) species; 2669 return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo); 2670 } 2671 2672 /** 2673 * Loads a vector from a byte array starting at an offset 2674 * and using a mask. 2675 * Lanes where the mask is unset are filled with the default 2676 * value of {@code float} (positive zero). 2677 * Bytes are composed into primitive lane elements according 2678 * to the specified byte order. 2679 * The vector is arranged into lanes according to 2680 * <a href="Vector.html#lane-order">memory ordering</a>. 2681 * <p> 2682 * This method behaves as if it returns the result of calling 2683 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2684 * fromByteBuffer()} as follows: 2685 * <pre>{@code 2686 * var bb = ByteBuffer.wrap(a); 2687 * return fromByteBuffer(species, bb, offset, bo, m); 2688 * }</pre> 2689 * 2690 * @param species species of desired vector 2691 * @param a the byte array 2692 * @param offset the offset into the array 2693 * @param bo the intended byte order 2694 * @param m the mask controlling lane selection 2695 * @return a vector loaded from a byte array 2696 * @throws IndexOutOfBoundsException 2697 * if {@code offset+N*ESIZE < 0} 2698 * or {@code offset+(N+1)*ESIZE > a.length} 2699 * for any lane {@code N} in the vector 2700 * where the mask is set 2701 */ 2702 @ForceInline 2703 public static 2704 FloatVector fromByteArray(VectorSpecies<Float> species, 2705 byte[] a, int offset, 2706 ByteOrder bo, 2707 VectorMask<Float> m) { 2708 FloatSpecies vsp = (FloatSpecies) species; 2709 if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) { 2710 return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo); 2711 } 2712 2713 // FIXME: optimize 2714 checkMaskFromIndexSize(offset, vsp, m, 4, a.length); 2715 ByteBuffer wb = wrapper(a, bo); 2716 return vsp.ldOp(wb, offset, (AbstractMask<Float>)m, 2717 (wb_, o, i) -> wb_.getFloat(o + i * 4)); 2718 } 2719 2720 /** 2721 * Loads a vector from an array of type {@code float[]} 2722 * starting at an offset. 2723 * For each vector lane, where {@code N} is the vector lane index, the 2724 * array element at index {@code offset + N} is placed into the 2725 * resulting vector at lane index {@code N}. 2726 * 2727 * @param species species of desired vector 2728 * @param a the array 2729 * @param offset the offset into the array 2730 * @return the vector loaded from an array 2731 * @throws IndexOutOfBoundsException 2732 * if {@code offset+N < 0} or {@code offset+N >= a.length} 2733 * for any lane {@code N} in the vector 2734 */ 2735 @ForceInline 2736 public static 2737 FloatVector fromArray(VectorSpecies<Float> species, 2738 float[] a, int offset) { 2739 offset = checkFromIndexSize(offset, species.length(), a.length); 2740 FloatSpecies vsp = (FloatSpecies) species; 2741 return vsp.dummyVector().fromArray0(a, offset); 2742 } 2743 2744 /** 2745 * Loads a vector from an array of type {@code float[]} 2746 * starting at an offset and using a mask. 2747 * Lanes where the mask is unset are filled with the default 2748 * value of {@code float} (positive zero). 2749 * For each vector lane, where {@code N} is the vector lane index, 2750 * if the mask lane at index {@code N} is set then the array element at 2751 * index {@code offset + N} is placed into the resulting vector at lane index 2752 * {@code N}, otherwise the default element value is placed into the 2753 * resulting vector at lane index {@code N}. 2754 * 2755 * @param species species of desired vector 2756 * @param a the array 2757 * @param offset the offset into the array 2758 * @param m the mask controlling lane selection 2759 * @return the vector loaded from an array 2760 * @throws IndexOutOfBoundsException 2761 * if {@code offset+N < 0} or {@code offset+N >= a.length} 2762 * for any lane {@code N} in the vector 2763 * where the mask is set 2764 */ 2765 @ForceInline 2766 public static 2767 FloatVector fromArray(VectorSpecies<Float> species, 2768 float[] a, int offset, 2769 VectorMask<Float> m) { 2770 FloatSpecies vsp = (FloatSpecies) species; 2771 if (offset >= 0 && offset <= (a.length - species.length())) { 2772 return vsp.dummyVector().fromArray0(a, offset, m); 2773 } 2774 2775 // FIXME: optimize 2776 checkMaskFromIndexSize(offset, vsp, m, 1, a.length); 2777 return vsp.vOp(m, i -> a[offset + i]); 2778 } 2779 2780 /** 2781 * Gathers a new vector composed of elements from an array of type 2782 * {@code float[]}, 2783 * using indexes obtained by adding a fixed {@code offset} to a 2784 * series of secondary offsets from an <em>index map</em>. 2785 * The index map is a contiguous sequence of {@code VLENGTH} 2786 * elements in a second array of {@code int}s, starting at a given 2787 * {@code mapOffset}. 2788 * <p> 2789 * For each vector lane, where {@code N} is the vector lane index, 2790 * the lane is loaded from the array 2791 * element {@code a[f(N)]}, where {@code f(N)} is the 2792 * index mapping expression 2793 * {@code offset + indexMap[mapOffset + N]]}. 2794 * 2795 * @param species species of desired vector 2796 * @param a the array 2797 * @param offset the offset into the array, may be negative if relative 2798 * indexes in the index map compensate to produce a value within the 2799 * array bounds 2800 * @param indexMap the index map 2801 * @param mapOffset the offset into the index map 2802 * @return the vector loaded from the indexed elements of the array 2803 * @throws IndexOutOfBoundsException 2804 * if {@code mapOffset+N < 0} 2805 * or if {@code mapOffset+N >= indexMap.length}, 2806 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 2807 * is an invalid index into {@code a}, 2808 * for any lane {@code N} in the vector 2809 * @see FloatVector#toIntArray() 2810 */ 2811 @ForceInline 2812 public static 2813 FloatVector fromArray(VectorSpecies<Float> species, 2814 float[] a, int offset, 2815 int[] indexMap, int mapOffset) { 2816 FloatSpecies vsp = (FloatSpecies) species; 2817 IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); 2818 Objects.requireNonNull(a); 2819 Objects.requireNonNull(indexMap); 2820 Class<? extends FloatVector> vectorType = vsp.vectorType(); 2821 2822 // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k] 2823 IntVector vix = IntVector 2824 .fromArray(isp, indexMap, mapOffset) 2825 .add(offset); 2826 2827 vix = VectorIntrinsics.checkIndex(vix, a.length); 2828 2829 return VectorSupport.loadWithMap( 2830 vectorType, null, float.class, vsp.laneCount(), 2831 isp.vectorType(), 2832 a, ARRAY_BASE, vix, null, 2833 a, offset, indexMap, mapOffset, vsp, 2834 (c, idx, iMap, idy, s, vm) -> 2835 s.vOp(n -> c[idx + iMap[idy+n]])); 2836 } 2837 2838 /** 2839 * Gathers a new vector composed of elements from an array of type 2840 * {@code float[]}, 2841 * under the control of a mask, and 2842 * using indexes obtained by adding a fixed {@code offset} to a 2843 * series of secondary offsets from an <em>index map</em>. 2844 * The index map is a contiguous sequence of {@code VLENGTH} 2845 * elements in a second array of {@code int}s, starting at a given 2846 * {@code mapOffset}. 2847 * <p> 2848 * For each vector lane, where {@code N} is the vector lane index, 2849 * if the lane is set in the mask, 2850 * the lane is loaded from the array 2851 * element {@code a[f(N)]}, where {@code f(N)} is the 2852 * index mapping expression 2853 * {@code offset + indexMap[mapOffset + N]]}. 2854 * Unset lanes in the resulting vector are set to zero. 2855 * 2856 * @param species species of desired vector 2857 * @param a the array 2858 * @param offset the offset into the array, may be negative if relative 2859 * indexes in the index map compensate to produce a value within the 2860 * array bounds 2861 * @param indexMap the index map 2862 * @param mapOffset the offset into the index map 2863 * @param m the mask controlling lane selection 2864 * @return the vector loaded from the indexed elements of the array 2865 * @throws IndexOutOfBoundsException 2866 * if {@code mapOffset+N < 0} 2867 * or if {@code mapOffset+N >= indexMap.length}, 2868 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 2869 * is an invalid index into {@code a}, 2870 * for any lane {@code N} in the vector 2871 * where the mask is set 2872 * @see FloatVector#toIntArray() 2873 */ 2874 @ForceInline 2875 public static 2876 FloatVector fromArray(VectorSpecies<Float> species, 2877 float[] a, int offset, 2878 int[] indexMap, int mapOffset, 2879 VectorMask<Float> m) { 2880 if (m.allTrue()) { 2881 return fromArray(species, a, offset, indexMap, mapOffset); 2882 } 2883 else { 2884 FloatSpecies vsp = (FloatSpecies) species; 2885 return vsp.dummyVector().fromArray0(a, offset, indexMap, mapOffset, m); 2886 } 2887 } 2888 2889 2890 2891 /** 2892 * Loads a vector from a {@linkplain ByteBuffer byte buffer} 2893 * starting at an offset into the byte buffer. 2894 * Bytes are composed into primitive lane elements according 2895 * to the specified byte order. 2896 * The vector is arranged into lanes according to 2897 * <a href="Vector.html#lane-order">memory ordering</a>. 2898 * <p> 2899 * This method behaves as if it returns the result of calling 2900 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2901 * fromByteBuffer()} as follows: 2902 * <pre>{@code 2903 * var m = species.maskAll(true); 2904 * return fromByteBuffer(species, bb, offset, bo, m); 2905 * }</pre> 2906 * 2907 * @param species species of desired vector 2908 * @param bb the byte buffer 2909 * @param offset the offset into the byte buffer 2910 * @param bo the intended byte order 2911 * @return a vector loaded from a byte buffer 2912 * @throws IndexOutOfBoundsException 2913 * if {@code offset+N*4 < 0} 2914 * or {@code offset+N*4 >= bb.limit()} 2915 * for any lane {@code N} in the vector 2916 */ 2917 @ForceInline 2918 public static 2919 FloatVector fromByteBuffer(VectorSpecies<Float> species, 2920 ByteBuffer bb, int offset, 2921 ByteOrder bo) { 2922 offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit()); 2923 FloatSpecies vsp = (FloatSpecies) species; 2924 return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo); 2925 } 2926 2927 /** 2928 * Loads a vector from a {@linkplain ByteBuffer byte buffer} 2929 * starting at an offset into the byte buffer 2930 * and using a mask. 2931 * Lanes where the mask is unset are filled with the default 2932 * value of {@code float} (positive zero). 2933 * Bytes are composed into primitive lane elements according 2934 * to the specified byte order. 2935 * The vector is arranged into lanes according to 2936 * <a href="Vector.html#lane-order">memory ordering</a>. 2937 * <p> 2938 * The following pseudocode illustrates the behavior: 2939 * <pre>{@code 2940 * FloatBuffer eb = bb.duplicate() 2941 * .position(offset) 2942 * .order(bo).asFloatBuffer(); 2943 * float[] ar = new float[species.length()]; 2944 * for (int n = 0; n < ar.length; n++) { 2945 * if (m.laneIsSet(n)) { 2946 * ar[n] = eb.get(n); 2947 * } 2948 * } 2949 * FloatVector r = FloatVector.fromArray(species, ar, 0); 2950 * }</pre> 2951 * @implNote 2952 * This operation is likely to be more efficient if 2953 * the specified byte order is the same as 2954 * {@linkplain ByteOrder#nativeOrder() 2955 * the platform native order}, 2956 * since this method will not need to reorder 2957 * the bytes of lane values. 2958 * 2959 * @param species species of desired vector 2960 * @param bb the byte buffer 2961 * @param offset the offset into the byte buffer 2962 * @param bo the intended byte order 2963 * @param m the mask controlling lane selection 2964 * @return a vector loaded from a byte buffer 2965 * @throws IndexOutOfBoundsException 2966 * if {@code offset+N*4 < 0} 2967 * or {@code offset+N*4 >= bb.limit()} 2968 * for any lane {@code N} in the vector 2969 * where the mask is set 2970 */ 2971 @ForceInline 2972 public static 2973 FloatVector fromByteBuffer(VectorSpecies<Float> species, 2974 ByteBuffer bb, int offset, 2975 ByteOrder bo, 2976 VectorMask<Float> m) { 2977 FloatSpecies vsp = (FloatSpecies) species; 2978 if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) { 2979 return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo); 2980 } 2981 2982 // FIXME: optimize 2983 checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit()); 2984 ByteBuffer wb = wrapper(bb, bo); 2985 return vsp.ldOp(wb, offset, (AbstractMask<Float>)m, 2986 (wb_, o, i) -> wb_.getFloat(o + i * 4)); 2987 } 2988 2989 // Memory store operations 2990 2991 /** 2992 * Stores this vector into an array of type {@code float[]} 2993 * starting at an offset. 2994 * <p> 2995 * For each vector lane, where {@code N} is the vector lane index, 2996 * the lane element at index {@code N} is stored into the array 2997 * element {@code a[offset+N]}. 2998 * 2999 * @param a the array, of type {@code float[]} 3000 * @param offset the offset into the array 3001 * @throws IndexOutOfBoundsException 3002 * if {@code offset+N < 0} or {@code offset+N >= a.length} 3003 * for any lane {@code N} in the vector 3004 */ 3005 @ForceInline 3006 public final 3007 void intoArray(float[] a, int offset) { 3008 offset = checkFromIndexSize(offset, length(), a.length); 3009 FloatSpecies vsp = vspecies(); 3010 VectorSupport.store( 3011 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3012 a, arrayAddress(a, offset), 3013 this, 3014 a, offset, 3015 (arr, off, v) 3016 -> v.stOp(arr, off, 3017 (arr_, off_, i, e) -> arr_[off_ + i] = e)); 3018 } 3019 3020 /** 3021 * Stores this vector into an array of type {@code float[]} 3022 * starting at offset and using a mask. 3023 * <p> 3024 * For each vector lane, where {@code N} is the vector lane index, 3025 * the lane element at index {@code N} is stored into the array 3026 * element {@code a[offset+N]}. 3027 * If the mask lane at {@code N} is unset then the corresponding 3028 * array element {@code a[offset+N]} is left unchanged. 3029 * <p> 3030 * Array range checking is done for lanes where the mask is set. 3031 * Lanes where the mask is unset are not stored and do not need 3032 * to correspond to legitimate elements of {@code a}. 3033 * That is, unset lanes may correspond to array indexes less than 3034 * zero or beyond the end of the array. 3035 * 3036 * @param a the array, of type {@code float[]} 3037 * @param offset the offset into the array 3038 * @param m the mask controlling lane storage 3039 * @throws IndexOutOfBoundsException 3040 * if {@code offset+N < 0} or {@code offset+N >= a.length} 3041 * for any lane {@code N} in the vector 3042 * where the mask is set 3043 */ 3044 @ForceInline 3045 public final 3046 void intoArray(float[] a, int offset, 3047 VectorMask<Float> m) { 3048 if (m.allTrue()) { 3049 intoArray(a, offset); 3050 } else { 3051 FloatSpecies vsp = vspecies(); 3052 checkMaskFromIndexSize(offset, vsp, m, 1, a.length); 3053 intoArray0(a, offset, m); 3054 } 3055 } 3056 3057 /** 3058 * Scatters this vector into an array of type {@code float[]} 3059 * using indexes obtained by adding a fixed {@code offset} to a 3060 * series of secondary offsets from an <em>index map</em>. 3061 * The index map is a contiguous sequence of {@code VLENGTH} 3062 * elements in a second array of {@code int}s, starting at a given 3063 * {@code mapOffset}. 3064 * <p> 3065 * For each vector lane, where {@code N} is the vector lane index, 3066 * the lane element at index {@code N} is stored into the array 3067 * element {@code a[f(N)]}, where {@code f(N)} is the 3068 * index mapping expression 3069 * {@code offset + indexMap[mapOffset + N]]}. 3070 * 3071 * @param a the array 3072 * @param offset an offset to combine with the index map offsets 3073 * @param indexMap the index map 3074 * @param mapOffset the offset into the index map 3075 * @throws IndexOutOfBoundsException 3076 * if {@code mapOffset+N < 0} 3077 * or if {@code mapOffset+N >= indexMap.length}, 3078 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 3079 * is an invalid index into {@code a}, 3080 * for any lane {@code N} in the vector 3081 * @see FloatVector#toIntArray() 3082 */ 3083 @ForceInline 3084 public final 3085 void intoArray(float[] a, int offset, 3086 int[] indexMap, int mapOffset) { 3087 FloatSpecies vsp = vspecies(); 3088 IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); 3089 // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] 3090 IntVector vix = IntVector 3091 .fromArray(isp, indexMap, mapOffset) 3092 .add(offset); 3093 3094 vix = VectorIntrinsics.checkIndex(vix, a.length); 3095 3096 VectorSupport.storeWithMap( 3097 vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), 3098 isp.vectorType(), 3099 a, arrayAddress(a, 0), vix, 3100 this, null, 3101 a, offset, indexMap, mapOffset, 3102 (arr, off, v, map, mo, vm) 3103 -> v.stOp(arr, off, 3104 (arr_, off_, i, e) -> { 3105 int j = map[mo + i]; 3106 arr[off + j] = e; 3107 })); 3108 } 3109 3110 /** 3111 * Scatters this vector into an array of type {@code float[]}, 3112 * under the control of a mask, and 3113 * using indexes obtained by adding a fixed {@code offset} to a 3114 * series of secondary offsets from an <em>index map</em>. 3115 * The index map is a contiguous sequence of {@code VLENGTH} 3116 * elements in a second array of {@code int}s, starting at a given 3117 * {@code mapOffset}. 3118 * <p> 3119 * For each vector lane, where {@code N} is the vector lane index, 3120 * if the mask lane at index {@code N} is set then 3121 * the lane element at index {@code N} is stored into the array 3122 * element {@code a[f(N)]}, where {@code f(N)} is the 3123 * index mapping expression 3124 * {@code offset + indexMap[mapOffset + N]]}. 3125 * 3126 * @param a the array 3127 * @param offset an offset to combine with the index map offsets 3128 * @param indexMap the index map 3129 * @param mapOffset the offset into the index map 3130 * @param m the mask 3131 * @throws IndexOutOfBoundsException 3132 * if {@code mapOffset+N < 0} 3133 * or if {@code mapOffset+N >= indexMap.length}, 3134 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 3135 * is an invalid index into {@code a}, 3136 * for any lane {@code N} in the vector 3137 * where the mask is set 3138 * @see FloatVector#toIntArray() 3139 */ 3140 @ForceInline 3141 public final 3142 void intoArray(float[] a, int offset, 3143 int[] indexMap, int mapOffset, 3144 VectorMask<Float> m) { 3145 if (m.allTrue()) { 3146 intoArray(a, offset, indexMap, mapOffset); 3147 } 3148 else { 3149 intoArray0(a, offset, indexMap, mapOffset, m); 3150 } 3151 } 3152 3153 3154 3155 /** 3156 * {@inheritDoc} <!--workaround--> 3157 */ 3158 @Override 3159 @ForceInline 3160 public final 3161 void intoByteArray(byte[] a, int offset, 3162 ByteOrder bo) { 3163 offset = checkFromIndexSize(offset, byteSize(), a.length); 3164 maybeSwap(bo).intoByteArray0(a, offset); 3165 } 3166 3167 /** 3168 * {@inheritDoc} <!--workaround--> 3169 */ 3170 @Override 3171 @ForceInline 3172 public final 3173 void intoByteArray(byte[] a, int offset, 3174 ByteOrder bo, 3175 VectorMask<Float> m) { 3176 if (m.allTrue()) { 3177 intoByteArray(a, offset, bo); 3178 } else { 3179 FloatSpecies vsp = vspecies(); 3180 checkMaskFromIndexSize(offset, vsp, m, 4, a.length); 3181 maybeSwap(bo).intoByteArray0(a, offset, m); 3182 } 3183 } 3184 3185 /** 3186 * {@inheritDoc} <!--workaround--> 3187 */ 3188 @Override 3189 @ForceInline 3190 public final 3191 void intoByteBuffer(ByteBuffer bb, int offset, 3192 ByteOrder bo) { 3193 if (ScopedMemoryAccess.isReadOnly(bb)) { 3194 throw new ReadOnlyBufferException(); 3195 } 3196 offset = checkFromIndexSize(offset, byteSize(), bb.limit()); 3197 maybeSwap(bo).intoByteBuffer0(bb, offset); 3198 } 3199 3200 /** 3201 * {@inheritDoc} <!--workaround--> 3202 */ 3203 @Override 3204 @ForceInline 3205 public final 3206 void intoByteBuffer(ByteBuffer bb, int offset, 3207 ByteOrder bo, 3208 VectorMask<Float> m) { 3209 if (m.allTrue()) { 3210 intoByteBuffer(bb, offset, bo); 3211 } else { 3212 if (bb.isReadOnly()) { 3213 throw new ReadOnlyBufferException(); 3214 } 3215 FloatSpecies vsp = vspecies(); 3216 checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit()); 3217 maybeSwap(bo).intoByteBuffer0(bb, offset, m); 3218 } 3219 } 3220 3221 // ================================================ 3222 3223 // Low-level memory operations. 3224 // 3225 // Note that all of these operations *must* inline into a context 3226 // where the exact species of the involved vector is a 3227 // compile-time constant. Otherwise, the intrinsic generation 3228 // will fail and performance will suffer. 3229 // 3230 // In many cases this is achieved by re-deriving a version of the 3231 // method in each concrete subclass (per species). The re-derived 3232 // method simply calls one of these generic methods, with exact 3233 // parameters for the controlling metadata, which is either a 3234 // typed vector or constant species instance. 3235 3236 // Unchecked loading operations in native byte order. 3237 // Caller is responsible for applying index checks, masking, and 3238 // byte swapping. 3239 3240 /*package-private*/ 3241 abstract 3242 FloatVector fromArray0(float[] a, int offset); 3243 @ForceInline 3244 final 3245 FloatVector fromArray0Template(float[] a, int offset) { 3246 FloatSpecies vsp = vspecies(); 3247 return VectorSupport.load( 3248 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3249 a, arrayAddress(a, offset), 3250 a, offset, vsp, 3251 (arr, off, s) -> s.ldOp(arr, off, 3252 (arr_, off_, i) -> arr_[off_ + i])); 3253 } 3254 3255 /*package-private*/ 3256 abstract 3257 FloatVector fromArray0(float[] a, int offset, VectorMask<Float> m); 3258 @ForceInline 3259 final 3260 <M extends VectorMask<Float>> 3261 FloatVector fromArray0Template(Class<M> maskClass, float[] a, int offset, M m) { 3262 m.check(species()); 3263 FloatSpecies vsp = vspecies(); 3264 return VectorSupport.loadMasked( 3265 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), 3266 a, arrayAddress(a, offset), m, 3267 a, offset, vsp, 3268 (arr, off, s, vm) -> s.ldOp(arr, off, vm, 3269 (arr_, off_, i) -> arr_[off_ + i])); 3270 } 3271 3272 /*package-private*/ 3273 abstract 3274 FloatVector fromArray0(float[] a, int offset, 3275 int[] indexMap, int mapOffset, 3276 VectorMask<Float> m); 3277 @ForceInline 3278 final 3279 <M extends VectorMask<Float>> 3280 FloatVector fromArray0Template(Class<M> maskClass, float[] a, int offset, 3281 int[] indexMap, int mapOffset, M m) { 3282 FloatSpecies vsp = vspecies(); 3283 IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); 3284 Objects.requireNonNull(a); 3285 Objects.requireNonNull(indexMap); 3286 m.check(vsp); 3287 Class<? extends FloatVector> vectorType = vsp.vectorType(); 3288 3289 // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k] 3290 IntVector vix = IntVector 3291 .fromArray(isp, indexMap, mapOffset) 3292 .add(offset); 3293 3294 // FIXME: Check index under mask controlling. 3295 vix = VectorIntrinsics.checkIndex(vix, a.length); 3296 3297 return VectorSupport.loadWithMap( 3298 vectorType, maskClass, float.class, vsp.laneCount(), 3299 isp.vectorType(), 3300 a, ARRAY_BASE, vix, m, 3301 a, offset, indexMap, mapOffset, vsp, 3302 (c, idx, iMap, idy, s, vm) -> 3303 s.vOp(vm, n -> c[idx + iMap[idy+n]])); 3304 } 3305 3306 3307 3308 @Override 3309 abstract 3310 FloatVector fromByteArray0(byte[] a, int offset); 3311 @ForceInline 3312 final 3313 FloatVector fromByteArray0Template(byte[] a, int offset) { 3314 FloatSpecies vsp = vspecies(); 3315 return VectorSupport.load( 3316 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3317 a, byteArrayAddress(a, offset), 3318 a, offset, vsp, 3319 (arr, off, s) -> { 3320 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); 3321 return s.ldOp(wb, off, 3322 (wb_, o, i) -> wb_.getFloat(o + i * 4)); 3323 }); 3324 } 3325 3326 abstract 3327 FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m); 3328 @ForceInline 3329 final 3330 <M extends VectorMask<Float>> 3331 FloatVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) { 3332 FloatSpecies vsp = vspecies(); 3333 m.check(vsp); 3334 return VectorSupport.loadMasked( 3335 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), 3336 a, byteArrayAddress(a, offset), m, 3337 a, offset, vsp, 3338 (arr, off, s, vm) -> { 3339 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); 3340 return s.ldOp(wb, off, vm, 3341 (wb_, o, i) -> wb_.getFloat(o + i * 4)); 3342 }); 3343 } 3344 3345 abstract 3346 FloatVector fromByteBuffer0(ByteBuffer bb, int offset); 3347 @ForceInline 3348 final 3349 FloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) { 3350 FloatSpecies vsp = vspecies(); 3351 return ScopedMemoryAccess.loadFromByteBuffer( 3352 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3353 bb, offset, vsp, 3354 (buf, off, s) -> { 3355 ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); 3356 return s.ldOp(wb, off, 3357 (wb_, o, i) -> wb_.getFloat(o + i * 4)); 3358 }); 3359 } 3360 3361 abstract 3362 FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m); 3363 @ForceInline 3364 final 3365 <M extends VectorMask<Float>> 3366 FloatVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) { 3367 FloatSpecies vsp = vspecies(); 3368 m.check(vsp); 3369 return ScopedMemoryAccess.loadFromByteBufferMasked( 3370 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), 3371 bb, offset, m, vsp, 3372 (buf, off, s, vm) -> { 3373 ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); 3374 return s.ldOp(wb, off, vm, 3375 (wb_, o, i) -> wb_.getFloat(o + i * 4)); 3376 }); 3377 } 3378 3379 // Unchecked storing operations in native byte order. 3380 // Caller is responsible for applying index checks, masking, and 3381 // byte swapping. 3382 3383 abstract 3384 void intoArray0(float[] a, int offset); 3385 @ForceInline 3386 final 3387 void intoArray0Template(float[] a, int offset) { 3388 FloatSpecies vsp = vspecies(); 3389 VectorSupport.store( 3390 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3391 a, arrayAddress(a, offset), 3392 this, a, offset, 3393 (arr, off, v) 3394 -> v.stOp(arr, off, 3395 (arr_, off_, i, e) -> arr_[off_+i] = e)); 3396 } 3397 3398 abstract 3399 void intoArray0(float[] a, int offset, VectorMask<Float> m); 3400 @ForceInline 3401 final 3402 <M extends VectorMask<Float>> 3403 void intoArray0Template(Class<M> maskClass, float[] a, int offset, M m) { 3404 m.check(species()); 3405 FloatSpecies vsp = vspecies(); 3406 VectorSupport.storeMasked( 3407 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), 3408 a, arrayAddress(a, offset), 3409 this, m, a, offset, 3410 (arr, off, v, vm) 3411 -> v.stOp(arr, off, vm, 3412 (arr_, off_, i, e) -> arr_[off_ + i] = e)); 3413 } 3414 3415 abstract 3416 void intoArray0(float[] a, int offset, 3417 int[] indexMap, int mapOffset, 3418 VectorMask<Float> m); 3419 @ForceInline 3420 final 3421 <M extends VectorMask<Float>> 3422 void intoArray0Template(Class<M> maskClass, float[] a, int offset, 3423 int[] indexMap, int mapOffset, M m) { 3424 m.check(species()); 3425 FloatSpecies vsp = vspecies(); 3426 IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); 3427 // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] 3428 IntVector vix = IntVector 3429 .fromArray(isp, indexMap, mapOffset) 3430 .add(offset); 3431 3432 // FIXME: Check index under mask controlling. 3433 vix = VectorIntrinsics.checkIndex(vix, a.length); 3434 3435 VectorSupport.storeWithMap( 3436 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), 3437 isp.vectorType(), 3438 a, arrayAddress(a, 0), vix, 3439 this, m, 3440 a, offset, indexMap, mapOffset, 3441 (arr, off, v, map, mo, vm) 3442 -> v.stOp(arr, off, vm, 3443 (arr_, off_, i, e) -> { 3444 int j = map[mo + i]; 3445 arr[off + j] = e; 3446 })); 3447 } 3448 3449 3450 abstract 3451 void intoByteArray0(byte[] a, int offset); 3452 @ForceInline 3453 final 3454 void intoByteArray0Template(byte[] a, int offset) { 3455 FloatSpecies vsp = vspecies(); 3456 VectorSupport.store( 3457 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3458 a, byteArrayAddress(a, offset), 3459 this, a, offset, 3460 (arr, off, v) -> { 3461 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); 3462 v.stOp(wb, off, 3463 (tb_, o, i, e) -> tb_.putFloat(o + i * 4, e)); 3464 }); 3465 } 3466 3467 abstract 3468 void intoByteArray0(byte[] a, int offset, VectorMask<Float> m); 3469 @ForceInline 3470 final 3471 <M extends VectorMask<Float>> 3472 void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) { 3473 FloatSpecies vsp = vspecies(); 3474 m.check(vsp); 3475 VectorSupport.storeMasked( 3476 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), 3477 a, byteArrayAddress(a, offset), 3478 this, m, a, offset, 3479 (arr, off, v, vm) -> { 3480 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); 3481 v.stOp(wb, off, vm, 3482 (tb_, o, i, e) -> tb_.putFloat(o + i * 4, e)); 3483 }); 3484 } 3485 3486 @ForceInline 3487 final 3488 void intoByteBuffer0(ByteBuffer bb, int offset) { 3489 FloatSpecies vsp = vspecies(); 3490 ScopedMemoryAccess.storeIntoByteBuffer( 3491 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3492 this, bb, offset, 3493 (buf, off, v) -> { 3494 ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); 3495 v.stOp(wb, off, 3496 (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e)); 3497 }); 3498 } 3499 3500 abstract 3501 void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m); 3502 @ForceInline 3503 final 3504 <M extends VectorMask<Float>> 3505 void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) { 3506 FloatSpecies vsp = vspecies(); 3507 m.check(vsp); 3508 ScopedMemoryAccess.storeIntoByteBufferMasked( 3509 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), 3510 this, m, bb, offset, 3511 (buf, off, v, vm) -> { 3512 ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); 3513 v.stOp(wb, off, vm, 3514 (wb_, o, i, e) -> wb_.putFloat(o + i * 4, e)); 3515 }); 3516 } 3517 3518 3519 // End of low-level memory operations. 3520 3521 private static 3522 void checkMaskFromIndexSize(int offset, 3523 FloatSpecies vsp, 3524 VectorMask<Float> m, 3525 int scale, 3526 int limit) { 3527 ((AbstractMask<Float>)m) 3528 .checkIndexByLane(offset, limit, vsp.iota(), scale); 3529 } 3530 3531 @ForceInline 3532 private void conditionalStoreNYI(int offset, 3533 FloatSpecies vsp, 3534 VectorMask<Float> m, 3535 int scale, 3536 int limit) { 3537 if (offset < 0 || offset + vsp.laneCount() * scale > limit) { 3538 String msg = 3539 String.format("unimplemented: store @%d in [0..%d), %s in %s", 3540 offset, limit, m, vsp); 3541 throw new AssertionError(msg); 3542 } 3543 } 3544 3545 /*package-private*/ 3546 @Override 3547 @ForceInline 3548 final 3549 FloatVector maybeSwap(ByteOrder bo) { 3550 if (bo != NATIVE_ENDIAN) { 3551 return this.reinterpretAsBytes() 3552 .rearrange(swapBytesShuffle()) 3553 .reinterpretAsFloats(); 3554 } 3555 return this; 3556 } 3557 3558 static final int ARRAY_SHIFT = 3559 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_FLOAT_INDEX_SCALE); 3560 static final long ARRAY_BASE = 3561 Unsafe.ARRAY_FLOAT_BASE_OFFSET; 3562 3563 @ForceInline 3564 static long arrayAddress(float[] a, int index) { 3565 return ARRAY_BASE + (((long)index) << ARRAY_SHIFT); 3566 } 3567 3568 3569 3570 @ForceInline 3571 static long byteArrayAddress(byte[] a, int index) { 3572 return Unsafe.ARRAY_BYTE_BASE_OFFSET + index; 3573 } 3574 3575 // ================================================ 3576 3577 /// Reinterpreting view methods: 3578 // lanewise reinterpret: viewAsXVector() 3579 // keep shape, redraw lanes: reinterpretAsEs() 3580 3581 /** 3582 * {@inheritDoc} <!--workaround--> 3583 */ 3584 @ForceInline 3585 @Override 3586 public final ByteVector reinterpretAsBytes() { 3587 // Going to ByteVector, pay close attention to byte order. 3588 assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN); 3589 return asByteVectorRaw(); 3590 //return asByteVectorRaw().rearrange(swapBytesShuffle()); 3591 } 3592 3593 /** 3594 * {@inheritDoc} <!--workaround--> 3595 */ 3596 @ForceInline 3597 @Override 3598 public final IntVector viewAsIntegralLanes() { 3599 LaneType ilt = LaneType.FLOAT.asIntegral(); 3600 return (IntVector) asVectorRaw(ilt); 3601 } 3602 3603 /** 3604 * {@inheritDoc} <!--workaround--> 3605 */ 3606 @ForceInline 3607 @Override 3608 public final 3609 FloatVector 3610 viewAsFloatingLanes() { 3611 return this; 3612 } 3613 3614 // ================================================ 3615 3616 /// Object methods: toString, equals, hashCode 3617 // 3618 // Object methods are defined as if via Arrays.toString, etc., 3619 // is applied to the array of elements. Two equal vectors 3620 // are required to have equal species and equal lane values. 3621 3622 /** 3623 * Returns a string representation of this vector, of the form 3624 * {@code "[0,1,2...]"}, reporting the lane values of this vector, 3625 * in lane order. 3626 * 3627 * The string is produced as if by a call to {@link 3628 * java.util.Arrays#toString(float[]) Arrays.toString()}, 3629 * as appropriate to the {@code float} array returned by 3630 * {@link #toArray this.toArray()}. 3631 * 3632 * @return a string of the form {@code "[0,1,2...]"} 3633 * reporting the lane values of this vector 3634 */ 3635 @Override 3636 @ForceInline 3637 public final 3638 String toString() { 3639 // now that toArray is strongly typed, we can define this 3640 return Arrays.toString(toArray()); 3641 } 3642 3643 /** 3644 * {@inheritDoc} <!--workaround--> 3645 */ 3646 @Override 3647 @ForceInline 3648 public final 3649 boolean equals(Object obj) { 3650 if (obj instanceof Vector) { 3651 Vector<?> that = (Vector<?>) obj; 3652 if (this.species().equals(that.species())) { 3653 return this.eq(that.check(this.species())).allTrue(); 3654 } 3655 } 3656 return false; 3657 } 3658 3659 /** 3660 * {@inheritDoc} <!--workaround--> 3661 */ 3662 @Override 3663 @ForceInline 3664 public final 3665 int hashCode() { 3666 // now that toArray is strongly typed, we can define this 3667 return Objects.hash(species(), Arrays.hashCode(toArray())); 3668 } 3669 3670 // ================================================ 3671 3672 // Species 3673 3674 /** 3675 * Class representing {@link FloatVector}'s of the same {@link VectorShape VectorShape}. 3676 */ 3677 /*package-private*/ 3678 static final class FloatSpecies extends AbstractSpecies<Float> { 3679 private FloatSpecies(VectorShape shape, 3680 Class<? extends FloatVector> vectorType, 3681 Class<? extends AbstractMask<Float>> maskType, 3682 Function<Object, FloatVector> vectorFactory) { 3683 super(shape, LaneType.of(float.class), 3684 vectorType, maskType, 3685 vectorFactory); 3686 assert(this.elementSize() == Float.SIZE); 3687 } 3688 3689 // Specializing overrides: 3690 3691 @Override 3692 @ForceInline 3693 public final Class<Float> elementType() { 3694 return float.class; 3695 } 3696 3697 @Override 3698 @ForceInline 3699 final Class<Float> genericElementType() { 3700 return Float.class; 3701 } 3702 3703 @SuppressWarnings("unchecked") 3704 @Override 3705 @ForceInline 3706 public final Class<? extends FloatVector> vectorType() { 3707 return (Class<? extends FloatVector>) vectorType; 3708 } 3709 3710 @Override 3711 @ForceInline 3712 public final long checkValue(long e) { 3713 longToElementBits(e); // only for exception 3714 return e; 3715 } 3716 3717 /*package-private*/ 3718 @Override 3719 @ForceInline 3720 final FloatVector broadcastBits(long bits) { 3721 return (FloatVector) 3722 VectorSupport.fromBitsCoerced( 3723 vectorType, float.class, laneCount, 3724 bits, MODE_BROADCAST, this, 3725 (bits_, s_) -> s_.rvOp(i -> bits_)); 3726 } 3727 3728 /*package-private*/ 3729 @ForceInline 3730 final FloatVector broadcast(float e) { 3731 return broadcastBits(toBits(e)); 3732 } 3733 3734 @Override 3735 @ForceInline 3736 public final FloatVector broadcast(long e) { 3737 return broadcastBits(longToElementBits(e)); 3738 } 3739 3740 /*package-private*/ 3741 final @Override 3742 @ForceInline 3743 long longToElementBits(long value) { 3744 // Do the conversion, and then test it for failure. 3745 float e = (float) value; 3746 if ((long) e != value) { 3747 throw badElementBits(value, e); 3748 } 3749 return toBits(e); 3750 } 3751 3752 /*package-private*/ 3753 @ForceInline 3754 static long toIntegralChecked(float e, boolean convertToInt) { 3755 long value = convertToInt ? (int) e : (long) e; 3756 if ((float) value != e) { 3757 throw badArrayBits(e, convertToInt, value); 3758 } 3759 return value; 3760 } 3761 3762 /* this non-public one is for internal conversions */ 3763 @Override 3764 @ForceInline 3765 final FloatVector fromIntValues(int[] values) { 3766 VectorIntrinsics.requireLength(values.length, laneCount); 3767 float[] va = new float[laneCount()]; 3768 for (int i = 0; i < va.length; i++) { 3769 int lv = values[i]; 3770 float v = (float) lv; 3771 va[i] = v; 3772 if ((int)v != lv) { 3773 throw badElementBits(lv, v); 3774 } 3775 } 3776 return dummyVector().fromArray0(va, 0); 3777 } 3778 3779 // Virtual constructors 3780 3781 @ForceInline 3782 @Override final 3783 public FloatVector fromArray(Object a, int offset) { 3784 // User entry point: Be careful with inputs. 3785 return FloatVector 3786 .fromArray(this, (float[]) a, offset); 3787 } 3788 3789 @ForceInline 3790 @Override final 3791 FloatVector dummyVector() { 3792 return (FloatVector) super.dummyVector(); 3793 } 3794 3795 /*package-private*/ 3796 final @Override 3797 @ForceInline 3798 FloatVector rvOp(RVOp f) { 3799 float[] res = new float[laneCount()]; 3800 for (int i = 0; i < res.length; i++) { 3801 int bits = (int) f.apply(i); 3802 res[i] = fromBits(bits); 3803 } 3804 return dummyVector().vectorFactory(res); 3805 } 3806 3807 FloatVector vOp(FVOp f) { 3808 float[] res = new float[laneCount()]; 3809 for (int i = 0; i < res.length; i++) { 3810 res[i] = f.apply(i); 3811 } 3812 return dummyVector().vectorFactory(res); 3813 } 3814 3815 FloatVector vOp(VectorMask<Float> m, FVOp f) { 3816 float[] res = new float[laneCount()]; 3817 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 3818 for (int i = 0; i < res.length; i++) { 3819 if (mbits[i]) { 3820 res[i] = f.apply(i); 3821 } 3822 } 3823 return dummyVector().vectorFactory(res); 3824 } 3825 3826 /*package-private*/ 3827 @ForceInline 3828 <M> FloatVector ldOp(M memory, int offset, 3829 FLdOp<M> f) { 3830 return dummyVector().ldOp(memory, offset, f); 3831 } 3832 3833 /*package-private*/ 3834 @ForceInline 3835 <M> FloatVector ldOp(M memory, int offset, 3836 VectorMask<Float> m, 3837 FLdOp<M> f) { 3838 return dummyVector().ldOp(memory, offset, m, f); 3839 } 3840 3841 /*package-private*/ 3842 @ForceInline 3843 <M> void stOp(M memory, int offset, FStOp<M> f) { 3844 dummyVector().stOp(memory, offset, f); 3845 } 3846 3847 /*package-private*/ 3848 @ForceInline 3849 <M> void stOp(M memory, int offset, 3850 AbstractMask<Float> m, 3851 FStOp<M> f) { 3852 dummyVector().stOp(memory, offset, m, f); 3853 } 3854 3855 // N.B. Make sure these constant vectors and 3856 // masks load up correctly into registers. 3857 // 3858 // Also, see if we can avoid all that switching. 3859 // Could we cache both vectors and both masks in 3860 // this species object? 3861 3862 // Zero and iota vector access 3863 @Override 3864 @ForceInline 3865 public final FloatVector zero() { 3866 if ((Class<?>) vectorType() == FloatMaxVector.class) 3867 return FloatMaxVector.ZERO; 3868 switch (vectorBitSize()) { 3869 case 64: return Float64Vector.ZERO; 3870 case 128: return Float128Vector.ZERO; 3871 case 256: return Float256Vector.ZERO; 3872 case 512: return Float512Vector.ZERO; 3873 } 3874 throw new AssertionError(); 3875 } 3876 3877 @Override 3878 @ForceInline 3879 public final FloatVector iota() { 3880 if ((Class<?>) vectorType() == FloatMaxVector.class) 3881 return FloatMaxVector.IOTA; 3882 switch (vectorBitSize()) { 3883 case 64: return Float64Vector.IOTA; 3884 case 128: return Float128Vector.IOTA; 3885 case 256: return Float256Vector.IOTA; 3886 case 512: return Float512Vector.IOTA; 3887 } 3888 throw new AssertionError(); 3889 } 3890 3891 // Mask access 3892 @Override 3893 @ForceInline 3894 public final VectorMask<Float> maskAll(boolean bit) { 3895 if ((Class<?>) vectorType() == FloatMaxVector.class) 3896 return FloatMaxVector.FloatMaxMask.maskAll(bit); 3897 switch (vectorBitSize()) { 3898 case 64: return Float64Vector.Float64Mask.maskAll(bit); 3899 case 128: return Float128Vector.Float128Mask.maskAll(bit); 3900 case 256: return Float256Vector.Float256Mask.maskAll(bit); 3901 case 512: return Float512Vector.Float512Mask.maskAll(bit); 3902 } 3903 throw new AssertionError(); 3904 } 3905 } 3906 3907 /** 3908 * Finds a species for an element type of {@code float} and shape. 3909 * 3910 * @param s the shape 3911 * @return a species for an element type of {@code float} and shape 3912 * @throws IllegalArgumentException if no such species exists for the shape 3913 */ 3914 static FloatSpecies species(VectorShape s) { 3915 Objects.requireNonNull(s); 3916 switch (s.switchKey) { 3917 case VectorShape.SK_64_BIT: return (FloatSpecies) SPECIES_64; 3918 case VectorShape.SK_128_BIT: return (FloatSpecies) SPECIES_128; 3919 case VectorShape.SK_256_BIT: return (FloatSpecies) SPECIES_256; 3920 case VectorShape.SK_512_BIT: return (FloatSpecies) SPECIES_512; 3921 case VectorShape.SK_Max_BIT: return (FloatSpecies) SPECIES_MAX; 3922 default: throw new IllegalArgumentException("Bad shape: " + s); 3923 } 3924 } 3925 3926 /** Species representing {@link FloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */ 3927 public static final VectorSpecies<Float> SPECIES_64 3928 = new FloatSpecies(VectorShape.S_64_BIT, 3929 Float64Vector.class, 3930 Float64Vector.Float64Mask.class, 3931 Float64Vector::new); 3932 3933 /** Species representing {@link FloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ 3934 public static final VectorSpecies<Float> SPECIES_128 3935 = new FloatSpecies(VectorShape.S_128_BIT, 3936 Float128Vector.class, 3937 Float128Vector.Float128Mask.class, 3938 Float128Vector::new); 3939 3940 /** Species representing {@link FloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ 3941 public static final VectorSpecies<Float> SPECIES_256 3942 = new FloatSpecies(VectorShape.S_256_BIT, 3943 Float256Vector.class, 3944 Float256Vector.Float256Mask.class, 3945 Float256Vector::new); 3946 3947 /** Species representing {@link FloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ 3948 public static final VectorSpecies<Float> SPECIES_512 3949 = new FloatSpecies(VectorShape.S_512_BIT, 3950 Float512Vector.class, 3951 Float512Vector.Float512Mask.class, 3952 Float512Vector::new); 3953 3954 /** Species representing {@link FloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ 3955 public static final VectorSpecies<Float> SPECIES_MAX 3956 = new FloatSpecies(VectorShape.S_Max_BIT, 3957 FloatMaxVector.class, 3958 FloatMaxVector.FloatMaxMask.class, 3959 FloatMaxVector::new); 3960 3961 /** 3962 * Preferred species for {@link FloatVector}s. 3963 * A preferred species is a species of maximal bit-size for the platform. 3964 */ 3965 public static final VectorSpecies<Float> SPECIES_PREFERRED 3966 = (FloatSpecies) VectorSpecies.ofPreferred(float.class); 3967 }