1 /*
   2  * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import jdk.incubator.vector.Vector;
  29 import jdk.incubator.vector.VectorMask;
  30 import jdk.incubator.vector.VectorOperators;
  31 import jdk.incubator.vector.VectorShape;
  32 import jdk.incubator.vector.VectorSpecies;
  33 import jdk.incubator.vector.VectorShuffle;
  34 import jdk.incubator.vector.ByteVector;
  35 
  36 import java.util.concurrent.TimeUnit;
  37 import java.util.function.BiFunction;
  38 import java.util.function.IntFunction;
  39 
  40 import org.openjdk.jmh.annotations.*;
  41 import org.openjdk.jmh.infra.Blackhole;
  42 
  43 @BenchmarkMode(Mode.Throughput)
  44 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  45 @State(Scope.Benchmark)
  46 @Warmup(iterations = 3, time = 1)
  47 @Measurement(iterations = 5, time = 1)
  48 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  49 public class Byte128Vector extends AbstractVectorBenchmark {
  50     static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_128;
  51 
  52     static final int INVOC_COUNT = 1; // get rid of outer loop
  53 
  54     static void replaceZero(byte[] a, byte v) {
  55         for (int i = 0; i < a.length; i++) {
  56             if (a[i] == 0) {
  57                 a[i] = v;
  58             }
  59         }
  60     }
  61 
  62     static void replaceZero(byte[] a, boolean[] mask, byte v) {
  63         for (int i = 0; i < a.length; i++) {
  64             if (mask[i % mask.length] && a[i] == 0) {
  65                 a[i] = v;
  66             }
  67         }
  68     }
  69 
  70     @Param("1024")
  71     int size;
  72 
  73     byte[] fill(IntFunction<Byte> f) {
  74         byte[] array = new byte[size];
  75         for (int i = 0; i < array.length; i++) {
  76             array[i] = f.apply(i);
  77         }
  78         return array;
  79     }
  80 
  81     byte[] a, b, c, r;
  82     boolean[] m, mt, rm;
  83     int[] s;
  84 
  85     @Setup
  86     public void init() {
  87         size += size % SPECIES.length(); // FIXME: add post-loops
  88 
  89         a = fill(i -> (byte)(2*i));
  90         b = fill(i -> (byte)(i+1));
  91         c = fill(i -> (byte)(i+5));
  92         r = fill(i -> (byte)0);
  93 
  94         m = fillMask(size, i -> (i % 2) == 0);
  95         mt = fillMask(size, i -> true);
  96         rm = fillMask(size, i -> false);
  97 
  98         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  99     }
 100 
 101     final IntFunction<byte[]> fa = vl -> a;
 102     final IntFunction<byte[]> fb = vl -> b;
 103     final IntFunction<byte[]> fc = vl -> c;
 104     final IntFunction<byte[]> fr = vl -> r;
 105     final IntFunction<boolean[]> fm = vl -> m;
 106     final IntFunction<boolean[]> fmt = vl -> mt;
 107     final IntFunction<boolean[]> fmr = vl -> rm;
 108     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
 109 
 110 
 111     @Benchmark
 112     public void ADD(Blackhole bh) {
 113         byte[] a = fa.apply(SPECIES.length());
 114         byte[] b = fb.apply(SPECIES.length());
 115         byte[] r = fr.apply(SPECIES.length());
 116 
 117         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 118             for (int i = 0; i < a.length; i += SPECIES.length()) {
 119                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 120                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 121                 av.lanewise(VectorOperators.ADD, bv).intoArray(r, i);
 122             }
 123         }
 124 
 125         bh.consume(r);
 126     }
 127 
 128     @Benchmark
 129     public void ADDMasked(Blackhole bh) {
 130         byte[] a = fa.apply(SPECIES.length());
 131         byte[] b = fb.apply(SPECIES.length());
 132         byte[] r = fr.apply(SPECIES.length());
 133         boolean[] mask = fm.apply(SPECIES.length());
 134         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 135 
 136         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 137             for (int i = 0; i < a.length; i += SPECIES.length()) {
 138                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 139                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 140                 av.lanewise(VectorOperators.ADD, bv, vmask).intoArray(r, i);
 141             }
 142         }
 143 
 144         bh.consume(r);
 145     }
 146 
 147     @Benchmark
 148     public void SUB(Blackhole bh) {
 149         byte[] a = fa.apply(SPECIES.length());
 150         byte[] b = fb.apply(SPECIES.length());
 151         byte[] r = fr.apply(SPECIES.length());
 152 
 153         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 154             for (int i = 0; i < a.length; i += SPECIES.length()) {
 155                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 156                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 157                 av.lanewise(VectorOperators.SUB, bv).intoArray(r, i);
 158             }
 159         }
 160 
 161         bh.consume(r);
 162     }
 163 
 164     @Benchmark
 165     public void SUBMasked(Blackhole bh) {
 166         byte[] a = fa.apply(SPECIES.length());
 167         byte[] b = fb.apply(SPECIES.length());
 168         byte[] r = fr.apply(SPECIES.length());
 169         boolean[] mask = fm.apply(SPECIES.length());
 170         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 171 
 172         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 173             for (int i = 0; i < a.length; i += SPECIES.length()) {
 174                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 175                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 176                 av.lanewise(VectorOperators.SUB, bv, vmask).intoArray(r, i);
 177             }
 178         }
 179 
 180         bh.consume(r);
 181     }
 182 
 183     @Benchmark
 184     public void MUL(Blackhole bh) {
 185         byte[] a = fa.apply(SPECIES.length());
 186         byte[] b = fb.apply(SPECIES.length());
 187         byte[] r = fr.apply(SPECIES.length());
 188 
 189         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 190             for (int i = 0; i < a.length; i += SPECIES.length()) {
 191                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 192                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 193                 av.lanewise(VectorOperators.MUL, bv).intoArray(r, i);
 194             }
 195         }
 196 
 197         bh.consume(r);
 198     }
 199 
 200     @Benchmark
 201     public void MULMasked(Blackhole bh) {
 202         byte[] a = fa.apply(SPECIES.length());
 203         byte[] b = fb.apply(SPECIES.length());
 204         byte[] r = fr.apply(SPECIES.length());
 205         boolean[] mask = fm.apply(SPECIES.length());
 206         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 207 
 208         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 209             for (int i = 0; i < a.length; i += SPECIES.length()) {
 210                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 211                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 212                 av.lanewise(VectorOperators.MUL, bv, vmask).intoArray(r, i);
 213             }
 214         }
 215 
 216         bh.consume(r);
 217     }
 218 
 219 
 220 
 221 
 222     @Benchmark
 223     public void DIV(Blackhole bh) {
 224         byte[] a = fa.apply(SPECIES.length());
 225         byte[] b = fb.apply(SPECIES.length());
 226         byte[] r = fr.apply(SPECIES.length());
 227 
 228         replaceZero(b, (byte) 1);
 229 
 230         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 231             for (int i = 0; i < a.length; i += SPECIES.length()) {
 232                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 233                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 234                 av.lanewise(VectorOperators.DIV, bv).intoArray(r, i);
 235             }
 236         }
 237 
 238         bh.consume(r);
 239     }
 240 
 241 
 242 
 243     @Benchmark
 244     public void DIVMasked(Blackhole bh) {
 245         byte[] a = fa.apply(SPECIES.length());
 246         byte[] b = fb.apply(SPECIES.length());
 247         byte[] r = fr.apply(SPECIES.length());
 248         boolean[] mask = fm.apply(SPECIES.length());
 249         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 250 
 251         replaceZero(b, mask, (byte) 1);
 252 
 253         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 254             for (int i = 0; i < a.length; i += SPECIES.length()) {
 255                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 256                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 257                 av.lanewise(VectorOperators.DIV, bv, vmask).intoArray(r, i);
 258             }
 259         }
 260 
 261         bh.consume(r);
 262     }
 263 
 264 
 265     @Benchmark
 266     public void FIRST_NONZERO(Blackhole bh) {
 267         byte[] a = fa.apply(SPECIES.length());
 268         byte[] b = fb.apply(SPECIES.length());
 269         byte[] r = fr.apply(SPECIES.length());
 270 
 271         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 272             for (int i = 0; i < a.length; i += SPECIES.length()) {
 273                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 274                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 275                 av.lanewise(VectorOperators.FIRST_NONZERO, bv).intoArray(r, i);
 276             }
 277         }
 278 
 279         bh.consume(r);
 280     }
 281 
 282     @Benchmark
 283     public void FIRST_NONZEROMasked(Blackhole bh) {
 284         byte[] a = fa.apply(SPECIES.length());
 285         byte[] b = fb.apply(SPECIES.length());
 286         byte[] r = fr.apply(SPECIES.length());
 287         boolean[] mask = fm.apply(SPECIES.length());
 288         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 289 
 290         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 291             for (int i = 0; i < a.length; i += SPECIES.length()) {
 292                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 293                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 294                 av.lanewise(VectorOperators.FIRST_NONZERO, bv, vmask).intoArray(r, i);
 295             }
 296         }
 297 
 298         bh.consume(r);
 299     }
 300 
 301 
 302     @Benchmark
 303     public void AND(Blackhole bh) {
 304         byte[] a = fa.apply(SPECIES.length());
 305         byte[] b = fb.apply(SPECIES.length());
 306         byte[] r = fr.apply(SPECIES.length());
 307 
 308         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 309             for (int i = 0; i < a.length; i += SPECIES.length()) {
 310                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 311                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 312                 av.lanewise(VectorOperators.AND, bv).intoArray(r, i);
 313             }
 314         }
 315 
 316         bh.consume(r);
 317     }
 318 
 319 
 320 
 321     @Benchmark
 322     public void ANDMasked(Blackhole bh) {
 323         byte[] a = fa.apply(SPECIES.length());
 324         byte[] b = fb.apply(SPECIES.length());
 325         byte[] r = fr.apply(SPECIES.length());
 326         boolean[] mask = fm.apply(SPECIES.length());
 327         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 328 
 329         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 330             for (int i = 0; i < a.length; i += SPECIES.length()) {
 331                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 332                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 333                 av.lanewise(VectorOperators.AND, bv, vmask).intoArray(r, i);
 334             }
 335         }
 336 
 337         bh.consume(r);
 338     }
 339 
 340 
 341 
 342     @Benchmark
 343     public void AND_NOT(Blackhole bh) {
 344         byte[] a = fa.apply(SPECIES.length());
 345         byte[] b = fb.apply(SPECIES.length());
 346         byte[] r = fr.apply(SPECIES.length());
 347 
 348         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 349             for (int i = 0; i < a.length; i += SPECIES.length()) {
 350                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 351                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 352                 av.lanewise(VectorOperators.AND_NOT, bv).intoArray(r, i);
 353             }
 354         }
 355 
 356         bh.consume(r);
 357     }
 358 
 359 
 360 
 361     @Benchmark
 362     public void AND_NOTMasked(Blackhole bh) {
 363         byte[] a = fa.apply(SPECIES.length());
 364         byte[] b = fb.apply(SPECIES.length());
 365         byte[] r = fr.apply(SPECIES.length());
 366         boolean[] mask = fm.apply(SPECIES.length());
 367         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 368 
 369         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 370             for (int i = 0; i < a.length; i += SPECIES.length()) {
 371                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 372                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 373                 av.lanewise(VectorOperators.AND_NOT, bv, vmask).intoArray(r, i);
 374             }
 375         }
 376 
 377         bh.consume(r);
 378     }
 379 
 380 
 381 
 382     @Benchmark
 383     public void OR(Blackhole bh) {
 384         byte[] a = fa.apply(SPECIES.length());
 385         byte[] b = fb.apply(SPECIES.length());
 386         byte[] r = fr.apply(SPECIES.length());
 387 
 388         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 389             for (int i = 0; i < a.length; i += SPECIES.length()) {
 390                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 391                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 392                 av.lanewise(VectorOperators.OR, bv).intoArray(r, i);
 393             }
 394         }
 395 
 396         bh.consume(r);
 397     }
 398 
 399 
 400 
 401     @Benchmark
 402     public void ORMasked(Blackhole bh) {
 403         byte[] a = fa.apply(SPECIES.length());
 404         byte[] b = fb.apply(SPECIES.length());
 405         byte[] r = fr.apply(SPECIES.length());
 406         boolean[] mask = fm.apply(SPECIES.length());
 407         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 408 
 409         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 410             for (int i = 0; i < a.length; i += SPECIES.length()) {
 411                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 412                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 413                 av.lanewise(VectorOperators.OR, bv, vmask).intoArray(r, i);
 414             }
 415         }
 416 
 417         bh.consume(r);
 418     }
 419 
 420 
 421 
 422     @Benchmark
 423     public void XOR(Blackhole bh) {
 424         byte[] a = fa.apply(SPECIES.length());
 425         byte[] b = fb.apply(SPECIES.length());
 426         byte[] r = fr.apply(SPECIES.length());
 427 
 428         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 429             for (int i = 0; i < a.length; i += SPECIES.length()) {
 430                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 431                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 432                 av.lanewise(VectorOperators.XOR, bv).intoArray(r, i);
 433             }
 434         }
 435 
 436         bh.consume(r);
 437     }
 438 
 439 
 440 
 441     @Benchmark
 442     public void XORMasked(Blackhole bh) {
 443         byte[] a = fa.apply(SPECIES.length());
 444         byte[] b = fb.apply(SPECIES.length());
 445         byte[] r = fr.apply(SPECIES.length());
 446         boolean[] mask = fm.apply(SPECIES.length());
 447         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 448 
 449         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 450             for (int i = 0; i < a.length; i += SPECIES.length()) {
 451                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 452                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 453                 av.lanewise(VectorOperators.XOR, bv, vmask).intoArray(r, i);
 454             }
 455         }
 456 
 457         bh.consume(r);
 458     }
 459 
 460 
 461 
 462 
 463 
 464     @Benchmark
 465     public void LSHL(Blackhole bh) {
 466         byte[] a = fa.apply(SPECIES.length());
 467         byte[] b = fb.apply(SPECIES.length());
 468         byte[] r = fr.apply(SPECIES.length());
 469 
 470         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 471             for (int i = 0; i < a.length; i += SPECIES.length()) {
 472                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 473                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 474                 av.lanewise(VectorOperators.LSHL, bv).intoArray(r, i);
 475             }
 476         }
 477 
 478         bh.consume(r);
 479     }
 480 
 481 
 482 
 483     @Benchmark
 484     public void LSHLMasked(Blackhole bh) {
 485         byte[] a = fa.apply(SPECIES.length());
 486         byte[] b = fb.apply(SPECIES.length());
 487         byte[] r = fr.apply(SPECIES.length());
 488         boolean[] mask = fm.apply(SPECIES.length());
 489         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 490 
 491         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 492             for (int i = 0; i < a.length; i += SPECIES.length()) {
 493                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 494                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 495                 av.lanewise(VectorOperators.LSHL, bv, vmask).intoArray(r, i);
 496             }
 497         }
 498 
 499         bh.consume(r);
 500     }
 501 
 502 
 503 
 504 
 505 
 506 
 507 
 508     @Benchmark
 509     public void ASHR(Blackhole bh) {
 510         byte[] a = fa.apply(SPECIES.length());
 511         byte[] b = fb.apply(SPECIES.length());
 512         byte[] r = fr.apply(SPECIES.length());
 513 
 514         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 515             for (int i = 0; i < a.length; i += SPECIES.length()) {
 516                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 517                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 518                 av.lanewise(VectorOperators.ASHR, bv).intoArray(r, i);
 519             }
 520         }
 521 
 522         bh.consume(r);
 523     }
 524 
 525 
 526 
 527     @Benchmark
 528     public void ASHRMasked(Blackhole bh) {
 529         byte[] a = fa.apply(SPECIES.length());
 530         byte[] b = fb.apply(SPECIES.length());
 531         byte[] r = fr.apply(SPECIES.length());
 532         boolean[] mask = fm.apply(SPECIES.length());
 533         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 534 
 535         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 536             for (int i = 0; i < a.length; i += SPECIES.length()) {
 537                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 538                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 539                 av.lanewise(VectorOperators.ASHR, bv, vmask).intoArray(r, i);
 540             }
 541         }
 542 
 543         bh.consume(r);
 544     }
 545 
 546 
 547 
 548 
 549 
 550 
 551 
 552     @Benchmark
 553     public void LSHR(Blackhole bh) {
 554         byte[] a = fa.apply(SPECIES.length());
 555         byte[] b = fb.apply(SPECIES.length());
 556         byte[] r = fr.apply(SPECIES.length());
 557 
 558         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 559             for (int i = 0; i < a.length; i += SPECIES.length()) {
 560                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 561                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 562                 av.lanewise(VectorOperators.LSHR, bv).intoArray(r, i);
 563             }
 564         }
 565 
 566         bh.consume(r);
 567     }
 568 
 569 
 570 
 571     @Benchmark
 572     public void LSHRMasked(Blackhole bh) {
 573         byte[] a = fa.apply(SPECIES.length());
 574         byte[] b = fb.apply(SPECIES.length());
 575         byte[] r = fr.apply(SPECIES.length());
 576         boolean[] mask = fm.apply(SPECIES.length());
 577         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 578 
 579         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 580             for (int i = 0; i < a.length; i += SPECIES.length()) {
 581                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 582                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 583                 av.lanewise(VectorOperators.LSHR, bv, vmask).intoArray(r, i);
 584             }
 585         }
 586 
 587         bh.consume(r);
 588     }
 589 
 590 
 591 
 592 
 593 
 594 
 595 
 596     @Benchmark
 597     public void LSHLShift(Blackhole bh) {
 598         byte[] a = fa.apply(SPECIES.length());
 599         byte[] b = fb.apply(SPECIES.length());
 600         byte[] r = fr.apply(SPECIES.length());
 601 
 602         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 603             for (int i = 0; i < a.length; i += SPECIES.length()) {
 604                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 605                 av.lanewise(VectorOperators.LSHL, (int)b[i]).intoArray(r, i);
 606             }
 607         }
 608 
 609         bh.consume(r);
 610     }
 611 
 612 
 613 
 614     @Benchmark
 615     public void LSHLMaskedShift(Blackhole bh) {
 616         byte[] a = fa.apply(SPECIES.length());
 617         byte[] b = fb.apply(SPECIES.length());
 618         byte[] r = fr.apply(SPECIES.length());
 619         boolean[] mask = fm.apply(SPECIES.length());
 620         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 621 
 622         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 623             for (int i = 0; i < a.length; i += SPECIES.length()) {
 624                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 625                 av.lanewise(VectorOperators.LSHL, (int)b[i], vmask).intoArray(r, i);
 626             }
 627         }
 628 
 629         bh.consume(r);
 630     }
 631 
 632 
 633 
 634 
 635 
 636 
 637 
 638     @Benchmark
 639     public void LSHRShift(Blackhole bh) {
 640         byte[] a = fa.apply(SPECIES.length());
 641         byte[] b = fb.apply(SPECIES.length());
 642         byte[] r = fr.apply(SPECIES.length());
 643 
 644         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 645             for (int i = 0; i < a.length; i += SPECIES.length()) {
 646                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 647                 av.lanewise(VectorOperators.LSHR, (int)b[i]).intoArray(r, i);
 648             }
 649         }
 650 
 651         bh.consume(r);
 652     }
 653 
 654 
 655 
 656     @Benchmark
 657     public void LSHRMaskedShift(Blackhole bh) {
 658         byte[] a = fa.apply(SPECIES.length());
 659         byte[] b = fb.apply(SPECIES.length());
 660         byte[] r = fr.apply(SPECIES.length());
 661         boolean[] mask = fm.apply(SPECIES.length());
 662         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 663 
 664         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 665             for (int i = 0; i < a.length; i += SPECIES.length()) {
 666                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 667                 av.lanewise(VectorOperators.LSHR, (int)b[i], vmask).intoArray(r, i);
 668             }
 669         }
 670 
 671         bh.consume(r);
 672     }
 673 
 674 
 675 
 676 
 677 
 678 
 679 
 680     @Benchmark
 681     public void ASHRShift(Blackhole bh) {
 682         byte[] a = fa.apply(SPECIES.length());
 683         byte[] b = fb.apply(SPECIES.length());
 684         byte[] r = fr.apply(SPECIES.length());
 685 
 686         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 687             for (int i = 0; i < a.length; i += SPECIES.length()) {
 688                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 689                 av.lanewise(VectorOperators.ASHR, (int)b[i]).intoArray(r, i);
 690             }
 691         }
 692 
 693         bh.consume(r);
 694     }
 695 
 696 
 697 
 698     @Benchmark
 699     public void ASHRMaskedShift(Blackhole bh) {
 700         byte[] a = fa.apply(SPECIES.length());
 701         byte[] b = fb.apply(SPECIES.length());
 702         byte[] r = fr.apply(SPECIES.length());
 703         boolean[] mask = fm.apply(SPECIES.length());
 704         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 705 
 706         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 707             for (int i = 0; i < a.length; i += SPECIES.length()) {
 708                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 709                 av.lanewise(VectorOperators.ASHR, (int)b[i], vmask).intoArray(r, i);
 710             }
 711         }
 712 
 713         bh.consume(r);
 714     }
 715 
 716 
 717 
 718 
 719     @Benchmark
 720     public void MIN(Blackhole bh) {
 721         byte[] a = fa.apply(SPECIES.length());
 722         byte[] b = fb.apply(SPECIES.length());
 723         byte[] r = fr.apply(SPECIES.length());
 724 
 725         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 726             for (int i = 0; i < a.length; i += SPECIES.length()) {
 727                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 728                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 729                 av.lanewise(VectorOperators.MIN, bv).intoArray(r, i);
 730             }
 731         }
 732 
 733         bh.consume(r);
 734     }
 735 
 736     @Benchmark
 737     public void MAX(Blackhole bh) {
 738         byte[] a = fa.apply(SPECIES.length());
 739         byte[] b = fb.apply(SPECIES.length());
 740         byte[] r = fr.apply(SPECIES.length());
 741 
 742         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 743             for (int i = 0; i < a.length; i += SPECIES.length()) {
 744                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 745                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 746                 av.lanewise(VectorOperators.MAX, bv).intoArray(r, i);
 747             }
 748         }
 749 
 750         bh.consume(r);
 751     }
 752 
 753 
 754     @Benchmark
 755     public void ANDLanes(Blackhole bh) {
 756         byte[] a = fa.apply(SPECIES.length());
 757         byte ra = -1;
 758 
 759         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 760             ra = -1;
 761             for (int i = 0; i < a.length; i += SPECIES.length()) {
 762                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 763                 ra &= av.reduceLanes(VectorOperators.AND);
 764             }
 765         }
 766         bh.consume(ra);
 767     }
 768 
 769 
 770 
 771     @Benchmark
 772     public void ANDMaskedLanes(Blackhole bh) {
 773         byte[] a = fa.apply(SPECIES.length());
 774         boolean[] mask = fm.apply(SPECIES.length());
 775         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 776         byte ra = -1;
 777 
 778         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 779             ra = -1;
 780             for (int i = 0; i < a.length; i += SPECIES.length()) {
 781                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 782                 ra &= av.reduceLanes(VectorOperators.AND, vmask);
 783             }
 784         }
 785         bh.consume(ra);
 786     }
 787 
 788 
 789 
 790     @Benchmark
 791     public void ORLanes(Blackhole bh) {
 792         byte[] a = fa.apply(SPECIES.length());
 793         byte ra = 0;
 794 
 795         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 796             ra = 0;
 797             for (int i = 0; i < a.length; i += SPECIES.length()) {
 798                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 799                 ra |= av.reduceLanes(VectorOperators.OR);
 800             }
 801         }
 802         bh.consume(ra);
 803     }
 804 
 805 
 806 
 807     @Benchmark
 808     public void ORMaskedLanes(Blackhole bh) {
 809         byte[] a = fa.apply(SPECIES.length());
 810         boolean[] mask = fm.apply(SPECIES.length());
 811         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 812         byte ra = 0;
 813 
 814         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 815             ra = 0;
 816             for (int i = 0; i < a.length; i += SPECIES.length()) {
 817                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 818                 ra |= av.reduceLanes(VectorOperators.OR, vmask);
 819             }
 820         }
 821         bh.consume(ra);
 822     }
 823 
 824 
 825 
 826     @Benchmark
 827     public void XORLanes(Blackhole bh) {
 828         byte[] a = fa.apply(SPECIES.length());
 829         byte ra = 0;
 830 
 831         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 832             ra = 0;
 833             for (int i = 0; i < a.length; i += SPECIES.length()) {
 834                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 835                 ra ^= av.reduceLanes(VectorOperators.XOR);
 836             }
 837         }
 838         bh.consume(ra);
 839     }
 840 
 841 
 842 
 843     @Benchmark
 844     public void XORMaskedLanes(Blackhole bh) {
 845         byte[] a = fa.apply(SPECIES.length());
 846         boolean[] mask = fm.apply(SPECIES.length());
 847         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 848         byte ra = 0;
 849 
 850         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 851             ra = 0;
 852             for (int i = 0; i < a.length; i += SPECIES.length()) {
 853                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 854                 ra ^= av.reduceLanes(VectorOperators.XOR, vmask);
 855             }
 856         }
 857         bh.consume(ra);
 858     }
 859 
 860 
 861     @Benchmark
 862     public void ADDLanes(Blackhole bh) {
 863         byte[] a = fa.apply(SPECIES.length());
 864         byte ra = 0;
 865 
 866         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 867             ra = 0;
 868             for (int i = 0; i < a.length; i += SPECIES.length()) {
 869                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 870                 ra += av.reduceLanes(VectorOperators.ADD);
 871             }
 872         }
 873         bh.consume(ra);
 874     }
 875 
 876     @Benchmark
 877     public void ADDMaskedLanes(Blackhole bh) {
 878         byte[] a = fa.apply(SPECIES.length());
 879         boolean[] mask = fm.apply(SPECIES.length());
 880         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 881         byte ra = 0;
 882 
 883         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 884             ra = 0;
 885             for (int i = 0; i < a.length; i += SPECIES.length()) {
 886                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 887                 ra += av.reduceLanes(VectorOperators.ADD, vmask);
 888             }
 889         }
 890         bh.consume(ra);
 891     }
 892 
 893     @Benchmark
 894     public void MULLanes(Blackhole bh) {
 895         byte[] a = fa.apply(SPECIES.length());
 896         byte ra = 1;
 897 
 898         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 899             ra = 1;
 900             for (int i = 0; i < a.length; i += SPECIES.length()) {
 901                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 902                 ra *= av.reduceLanes(VectorOperators.MUL);
 903             }
 904         }
 905         bh.consume(ra);
 906     }
 907 
 908     @Benchmark
 909     public void MULMaskedLanes(Blackhole bh) {
 910         byte[] a = fa.apply(SPECIES.length());
 911         boolean[] mask = fm.apply(SPECIES.length());
 912         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 913         byte ra = 1;
 914 
 915         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 916             ra = 1;
 917             for (int i = 0; i < a.length; i += SPECIES.length()) {
 918                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 919                 ra *= av.reduceLanes(VectorOperators.MUL, vmask);
 920             }
 921         }
 922         bh.consume(ra);
 923     }
 924 
 925     @Benchmark
 926     public void MINLanes(Blackhole bh) {
 927         byte[] a = fa.apply(SPECIES.length());
 928         byte ra = Byte.MAX_VALUE;
 929 
 930         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 931             ra = Byte.MAX_VALUE;
 932             for (int i = 0; i < a.length; i += SPECIES.length()) {
 933                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 934                 ra = (byte)Math.min(ra, av.reduceLanes(VectorOperators.MIN));
 935             }
 936         }
 937         bh.consume(ra);
 938     }
 939 
 940     @Benchmark
 941     public void MINMaskedLanes(Blackhole bh) {
 942         byte[] a = fa.apply(SPECIES.length());
 943         boolean[] mask = fm.apply(SPECIES.length());
 944         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 945         byte ra = Byte.MAX_VALUE;
 946 
 947         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 948             ra = Byte.MAX_VALUE;
 949             for (int i = 0; i < a.length; i += SPECIES.length()) {
 950                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 951                 ra = (byte)Math.min(ra, av.reduceLanes(VectorOperators.MIN, vmask));
 952             }
 953         }
 954         bh.consume(ra);
 955     }
 956 
 957     @Benchmark
 958     public void MAXLanes(Blackhole bh) {
 959         byte[] a = fa.apply(SPECIES.length());
 960         byte ra = Byte.MIN_VALUE;
 961 
 962         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 963             ra = Byte.MIN_VALUE;
 964             for (int i = 0; i < a.length; i += SPECIES.length()) {
 965                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 966                 ra = (byte)Math.max(ra, av.reduceLanes(VectorOperators.MAX));
 967             }
 968         }
 969         bh.consume(ra);
 970     }
 971 
 972     @Benchmark
 973     public void MAXMaskedLanes(Blackhole bh) {
 974         byte[] a = fa.apply(SPECIES.length());
 975         boolean[] mask = fm.apply(SPECIES.length());
 976         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 977         byte ra = Byte.MIN_VALUE;
 978 
 979         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 980             ra = Byte.MIN_VALUE;
 981             for (int i = 0; i < a.length; i += SPECIES.length()) {
 982                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 983                 ra = (byte)Math.max(ra, av.reduceLanes(VectorOperators.MAX, vmask));
 984             }
 985         }
 986         bh.consume(ra);
 987     }
 988 
 989 
 990     @Benchmark
 991     public void anyTrue(Blackhole bh) {
 992         boolean[] mask = fm.apply(SPECIES.length());
 993         boolean[] r = fmr.apply(SPECIES.length());
 994 
 995         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 996             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 997                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
 998                 r[i] = vmask.anyTrue();
 999             }
1000         }
1001 
1002         bh.consume(r);
1003     }
1004 
1005 
1006 
1007     @Benchmark
1008     public void allTrue(Blackhole bh) {
1009         boolean[] mask = fm.apply(SPECIES.length());
1010         boolean[] r = fmr.apply(SPECIES.length());
1011 
1012         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1013             for (int i = 0; i < mask.length; i += SPECIES.length()) {
1014                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
1015                 r[i] = vmask.allTrue();
1016             }
1017         }
1018 
1019         bh.consume(r);
1020     }
1021 
1022 
1023     @Benchmark
1024     public void withLane(Blackhole bh) {
1025         byte[] a = fa.apply(SPECIES.length());
1026         byte[] r = fr.apply(SPECIES.length());
1027 
1028         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1029             for (int i = 0; i < a.length; i += SPECIES.length()) {
1030                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1031                 av.withLane(0, (byte)4).intoArray(r, i);
1032             }
1033         }
1034 
1035         bh.consume(r);
1036     }
1037 
1038     @Benchmark
1039     public Object IS_DEFAULT() {
1040         byte[] a = fa.apply(size);
1041         boolean[] ms = fmt.apply(size);
1042         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1043 
1044         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1045             for (int i = 0; i < a.length; i += SPECIES.length()) {
1046                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1047 
1048                 // accumulate results, so JIT can't eliminate relevant computations
1049                 m = m.and(av.test(VectorOperators.IS_DEFAULT));
1050             }
1051         }
1052 
1053         return m;
1054     }
1055 
1056     @Benchmark
1057     public Object IS_NEGATIVE() {
1058         byte[] a = fa.apply(size);
1059         boolean[] ms = fmt.apply(size);
1060         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1061 
1062         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1063             for (int i = 0; i < a.length; i += SPECIES.length()) {
1064                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1065 
1066                 // accumulate results, so JIT can't eliminate relevant computations
1067                 m = m.and(av.test(VectorOperators.IS_NEGATIVE));
1068             }
1069         }
1070 
1071         return m;
1072     }
1073 
1074 
1075 
1076 
1077     @Benchmark
1078     public Object LT() {
1079         byte[] a = fa.apply(size);
1080         byte[] b = fb.apply(size);
1081         boolean[] ms = fmt.apply(size);
1082         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1083 
1084         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1085             for (int i = 0; i < a.length; i += SPECIES.length()) {
1086                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1087                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1088 
1089                 // accumulate results, so JIT can't eliminate relevant computations
1090                 m = m.and(av.compare(VectorOperators.LT, bv));
1091             }
1092         }
1093 
1094         return m;
1095     }
1096 
1097     @Benchmark
1098     public Object GT() {
1099         byte[] a = fa.apply(size);
1100         byte[] b = fb.apply(size);
1101         boolean[] ms = fmt.apply(size);
1102         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1103 
1104         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1105             for (int i = 0; i < a.length; i += SPECIES.length()) {
1106                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1107                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1108 
1109                 // accumulate results, so JIT can't eliminate relevant computations
1110                 m = m.and(av.compare(VectorOperators.GT, bv));
1111             }
1112         }
1113 
1114         return m;
1115     }
1116 
1117     @Benchmark
1118     public Object EQ() {
1119         byte[] a = fa.apply(size);
1120         byte[] b = fb.apply(size);
1121         boolean[] ms = fmt.apply(size);
1122         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1123 
1124         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1125             for (int i = 0; i < a.length; i += SPECIES.length()) {
1126                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1127                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1128 
1129                 // accumulate results, so JIT can't eliminate relevant computations
1130                 m = m.and(av.compare(VectorOperators.EQ, bv));
1131             }
1132         }
1133 
1134         return m;
1135     }
1136 
1137     @Benchmark
1138     public Object NE() {
1139         byte[] a = fa.apply(size);
1140         byte[] b = fb.apply(size);
1141         boolean[] ms = fmt.apply(size);
1142         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1143 
1144         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1145             for (int i = 0; i < a.length; i += SPECIES.length()) {
1146                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1147                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1148 
1149                 // accumulate results, so JIT can't eliminate relevant computations
1150                 m = m.and(av.compare(VectorOperators.NE, bv));
1151             }
1152         }
1153 
1154         return m;
1155     }
1156 
1157     @Benchmark
1158     public Object LE() {
1159         byte[] a = fa.apply(size);
1160         byte[] b = fb.apply(size);
1161         boolean[] ms = fmt.apply(size);
1162         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1163 
1164         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1165             for (int i = 0; i < a.length; i += SPECIES.length()) {
1166                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1167                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1168 
1169                 // accumulate results, so JIT can't eliminate relevant computations
1170                 m = m.and(av.compare(VectorOperators.LE, bv));
1171             }
1172         }
1173 
1174         return m;
1175     }
1176 
1177     @Benchmark
1178     public Object GE() {
1179         byte[] a = fa.apply(size);
1180         byte[] b = fb.apply(size);
1181         boolean[] ms = fmt.apply(size);
1182         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1183 
1184         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1185             for (int i = 0; i < a.length; i += SPECIES.length()) {
1186                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1187                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1188 
1189                 // accumulate results, so JIT can't eliminate relevant computations
1190                 m = m.and(av.compare(VectorOperators.GE, bv));
1191             }
1192         }
1193 
1194         return m;
1195     }
1196 
1197 
1198     @Benchmark
1199     public Object UNSIGNED_LT() {
1200         byte[] a = fa.apply(size);
1201         byte[] b = fb.apply(size);
1202         boolean[] ms = fmt.apply(size);
1203         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1204 
1205         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1206             for (int i = 0; i < a.length; i += SPECIES.length()) {
1207                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1208                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1209 
1210                 // accumulate results, so JIT can't eliminate relevant computations
1211                 m = m.and(av.compare(VectorOperators.UNSIGNED_LT, bv));
1212             }
1213         }
1214 
1215         return m;
1216     }
1217 
1218 
1219 
1220     @Benchmark
1221     public Object UNSIGNED_GT() {
1222         byte[] a = fa.apply(size);
1223         byte[] b = fb.apply(size);
1224         boolean[] ms = fmt.apply(size);
1225         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1226 
1227         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1228             for (int i = 0; i < a.length; i += SPECIES.length()) {
1229                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1230                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1231 
1232                 // accumulate results, so JIT can't eliminate relevant computations
1233                 m = m.and(av.compare(VectorOperators.UNSIGNED_GT, bv));
1234             }
1235         }
1236 
1237         return m;
1238     }
1239 
1240 
1241 
1242     @Benchmark
1243     public Object UNSIGNED_LE() {
1244         byte[] a = fa.apply(size);
1245         byte[] b = fb.apply(size);
1246         boolean[] ms = fmt.apply(size);
1247         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1248 
1249         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1250             for (int i = 0; i < a.length; i += SPECIES.length()) {
1251                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1252                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1253 
1254                 // accumulate results, so JIT can't eliminate relevant computations
1255                 m = m.and(av.compare(VectorOperators.UNSIGNED_LE, bv));
1256             }
1257         }
1258 
1259         return m;
1260     }
1261 
1262 
1263 
1264     @Benchmark
1265     public Object UNSIGNED_GE() {
1266         byte[] a = fa.apply(size);
1267         byte[] b = fb.apply(size);
1268         boolean[] ms = fmt.apply(size);
1269         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1270 
1271         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1272             for (int i = 0; i < a.length; i += SPECIES.length()) {
1273                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1274                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1275 
1276                 // accumulate results, so JIT can't eliminate relevant computations
1277                 m = m.and(av.compare(VectorOperators.UNSIGNED_GE, bv));
1278             }
1279         }
1280 
1281         return m;
1282     }
1283 
1284 
1285     @Benchmark
1286     public void blend(Blackhole bh) {
1287         byte[] a = fa.apply(SPECIES.length());
1288         byte[] b = fb.apply(SPECIES.length());
1289         byte[] r = fr.apply(SPECIES.length());
1290         boolean[] mask = fm.apply(SPECIES.length());
1291         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1292 
1293         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1294             for (int i = 0; i < a.length; i += SPECIES.length()) {
1295                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1296                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1297                 av.blend(bv, vmask).intoArray(r, i);
1298             }
1299         }
1300 
1301         bh.consume(r);
1302     }
1303 
1304     @Benchmark
1305     public void rearrange(Blackhole bh) {
1306         byte[] a = fa.apply(SPECIES.length());
1307         int[] order = fs.apply(a.length, SPECIES.length());
1308         byte[] r = fr.apply(SPECIES.length());
1309 
1310         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1311             for (int i = 0; i < a.length; i += SPECIES.length()) {
1312                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1313                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
1314             }
1315         }
1316 
1317         bh.consume(r);
1318     }
1319 
1320     @Benchmark
1321     public void laneextract(Blackhole bh) {
1322         byte[] a = fa.apply(SPECIES.length());
1323         byte[] r = fr.apply(SPECIES.length());
1324 
1325         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1326             for (int i = 0; i < a.length; i += SPECIES.length()) {
1327                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1328                 int num_lanes = SPECIES.length();
1329                 // Manually unroll because full unroll happens after intrinsification.
1330                 // Unroll is needed because get intrinsic requires for index to be a known constant.
1331                 if (num_lanes == 1) {
1332                     r[i]=av.lane(0);
1333                 } else if (num_lanes == 2) {
1334                     r[i]=av.lane(0);
1335                     r[i+1]=av.lane(1);
1336                 } else if (num_lanes == 4) {
1337                     r[i]=av.lane(0);
1338                     r[i+1]=av.lane(1);
1339                     r[i+2]=av.lane(2);
1340                     r[i+3]=av.lane(3);
1341                 } else if (num_lanes == 8) {
1342                     r[i]=av.lane(0);
1343                     r[i+1]=av.lane(1);
1344                     r[i+2]=av.lane(2);
1345                     r[i+3]=av.lane(3);
1346                     r[i+4]=av.lane(4);
1347                     r[i+5]=av.lane(5);
1348                     r[i+6]=av.lane(6);
1349                     r[i+7]=av.lane(7);
1350                 } else if (num_lanes == 16) {
1351                     r[i]=av.lane(0);
1352                     r[i+1]=av.lane(1);
1353                     r[i+2]=av.lane(2);
1354                     r[i+3]=av.lane(3);
1355                     r[i+4]=av.lane(4);
1356                     r[i+5]=av.lane(5);
1357                     r[i+6]=av.lane(6);
1358                     r[i+7]=av.lane(7);
1359                     r[i+8]=av.lane(8);
1360                     r[i+9]=av.lane(9);
1361                     r[i+10]=av.lane(10);
1362                     r[i+11]=av.lane(11);
1363                     r[i+12]=av.lane(12);
1364                     r[i+13]=av.lane(13);
1365                     r[i+14]=av.lane(14);
1366                     r[i+15]=av.lane(15);
1367                 } else if (num_lanes == 32) {
1368                     r[i]=av.lane(0);
1369                     r[i+1]=av.lane(1);
1370                     r[i+2]=av.lane(2);
1371                     r[i+3]=av.lane(3);
1372                     r[i+4]=av.lane(4);
1373                     r[i+5]=av.lane(5);
1374                     r[i+6]=av.lane(6);
1375                     r[i+7]=av.lane(7);
1376                     r[i+8]=av.lane(8);
1377                     r[i+9]=av.lane(9);
1378                     r[i+10]=av.lane(10);
1379                     r[i+11]=av.lane(11);
1380                     r[i+12]=av.lane(12);
1381                     r[i+13]=av.lane(13);
1382                     r[i+14]=av.lane(14);
1383                     r[i+15]=av.lane(15);
1384                     r[i+16]=av.lane(16);
1385                     r[i+17]=av.lane(17);
1386                     r[i+18]=av.lane(18);
1387                     r[i+19]=av.lane(19);
1388                     r[i+20]=av.lane(20);
1389                     r[i+21]=av.lane(21);
1390                     r[i+22]=av.lane(22);
1391                     r[i+23]=av.lane(23);
1392                     r[i+24]=av.lane(24);
1393                     r[i+25]=av.lane(25);
1394                     r[i+26]=av.lane(26);
1395                     r[i+27]=av.lane(27);
1396                     r[i+28]=av.lane(28);
1397                     r[i+29]=av.lane(29);
1398                     r[i+30]=av.lane(30);
1399                     r[i+31]=av.lane(31);
1400                 } else if (num_lanes == 64) {
1401                     r[i]=av.lane(0);
1402                     r[i+1]=av.lane(1);
1403                     r[i+2]=av.lane(2);
1404                     r[i+3]=av.lane(3);
1405                     r[i+4]=av.lane(4);
1406                     r[i+5]=av.lane(5);
1407                     r[i+6]=av.lane(6);
1408                     r[i+7]=av.lane(7);
1409                     r[i+8]=av.lane(8);
1410                     r[i+9]=av.lane(9);
1411                     r[i+10]=av.lane(10);
1412                     r[i+11]=av.lane(11);
1413                     r[i+12]=av.lane(12);
1414                     r[i+13]=av.lane(13);
1415                     r[i+14]=av.lane(14);
1416                     r[i+15]=av.lane(15);
1417                     r[i+16]=av.lane(16);
1418                     r[i+17]=av.lane(17);
1419                     r[i+18]=av.lane(18);
1420                     r[i+19]=av.lane(19);
1421                     r[i+20]=av.lane(20);
1422                     r[i+21]=av.lane(21);
1423                     r[i+22]=av.lane(22);
1424                     r[i+23]=av.lane(23);
1425                     r[i+24]=av.lane(24);
1426                     r[i+25]=av.lane(25);
1427                     r[i+26]=av.lane(26);
1428                     r[i+27]=av.lane(27);
1429                     r[i+28]=av.lane(28);
1430                     r[i+29]=av.lane(29);
1431                     r[i+30]=av.lane(30);
1432                     r[i+31]=av.lane(31);
1433                     r[i+32]=av.lane(32);
1434                     r[i+33]=av.lane(33);
1435                     r[i+34]=av.lane(34);
1436                     r[i+35]=av.lane(35);
1437                     r[i+36]=av.lane(36);
1438                     r[i+37]=av.lane(37);
1439                     r[i+38]=av.lane(38);
1440                     r[i+39]=av.lane(39);
1441                     r[i+40]=av.lane(40);
1442                     r[i+41]=av.lane(41);
1443                     r[i+42]=av.lane(42);
1444                     r[i+43]=av.lane(43);
1445                     r[i+44]=av.lane(44);
1446                     r[i+45]=av.lane(45);
1447                     r[i+46]=av.lane(46);
1448                     r[i+47]=av.lane(47);
1449                     r[i+48]=av.lane(48);
1450                     r[i+49]=av.lane(49);
1451                     r[i+50]=av.lane(50);
1452                     r[i+51]=av.lane(51);
1453                     r[i+52]=av.lane(52);
1454                     r[i+53]=av.lane(53);
1455                     r[i+54]=av.lane(54);
1456                     r[i+55]=av.lane(55);
1457                     r[i+56]=av.lane(56);
1458                     r[i+57]=av.lane(57);
1459                     r[i+58]=av.lane(58);
1460                     r[i+59]=av.lane(59);
1461                     r[i+60]=av.lane(60);
1462                     r[i+61]=av.lane(61);
1463                     r[i+62]=av.lane(62);
1464                     r[i+63]=av.lane(63);
1465                 } else {
1466                     for (int j = 0; j < SPECIES.length(); j++) {
1467                         r[i+j]=av.lane(j);
1468                     }
1469                 }
1470             }
1471         }
1472 
1473         bh.consume(r);
1474     }
1475 
1476     @Benchmark
1477     public void broadcast(Blackhole bh) {
1478         byte[] a = fa.apply(SPECIES.length());
1479         byte[] r = new byte[a.length];
1480 
1481         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1482             for (int i = 0; i < a.length; i += SPECIES.length()) {
1483                 ByteVector.broadcast(SPECIES, a[i]).intoArray(r, i);
1484             }
1485         }
1486 
1487         bh.consume(r);
1488     }
1489 
1490     @Benchmark
1491     public void zero(Blackhole bh) {
1492         byte[] a = fa.apply(SPECIES.length());
1493         byte[] r = new byte[a.length];
1494 
1495         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1496             for (int i = 0; i < a.length; i += SPECIES.length()) {
1497                 ByteVector.zero(SPECIES).intoArray(a, i);
1498             }
1499         }
1500 
1501         bh.consume(r);
1502     }
1503 
1504     @Benchmark
1505     public void sliceUnary(Blackhole bh) {
1506         byte[] a = fa.apply(SPECIES.length());
1507         byte[] r = new byte[a.length];
1508         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1509         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1510             for (int i = 0; i < a.length; i += SPECIES.length()) {
1511                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1512                 av.slice(origin).intoArray(r, i);
1513             }
1514         }
1515 
1516         bh.consume(r);
1517     }
1518 
1519     @Benchmark
1520     public void sliceBinary(Blackhole bh) {
1521         byte[] a = fa.apply(SPECIES.length());
1522         byte[] b = fb.apply(SPECIES.length());
1523         byte[] r = new byte[a.length];
1524         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1525         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1526             for (int i = 0; i < a.length; i += SPECIES.length()) {
1527                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1528                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1529                 av.slice(origin, bv).intoArray(r, i);
1530             }
1531         }
1532 
1533         bh.consume(r);
1534     }
1535 
1536     @Benchmark
1537     public void sliceMasked(Blackhole bh) {
1538         byte[] a = fa.apply(SPECIES.length());
1539         byte[] b = fb.apply(SPECIES.length());
1540         boolean[] mask = fm.apply(SPECIES.length());
1541         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1542 
1543         byte[] r = new byte[a.length];
1544         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1545         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1546             for (int i = 0; i < a.length; i += SPECIES.length()) {
1547                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1548                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1549                 av.slice(origin, bv, vmask).intoArray(r, i);
1550             }
1551         }
1552 
1553         bh.consume(r);
1554     }
1555 
1556     @Benchmark
1557     public void unsliceUnary(Blackhole bh) {
1558         byte[] a = fa.apply(SPECIES.length());
1559         byte[] r = new byte[a.length];
1560         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1561         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1562             for (int i = 0; i < a.length; i += SPECIES.length()) {
1563                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1564                 av.unslice(origin).intoArray(r, i);
1565             }
1566         }
1567 
1568         bh.consume(r);
1569     }
1570 
1571     @Benchmark
1572     public void unsliceBinary(Blackhole bh) {
1573         byte[] a = fa.apply(SPECIES.length());
1574         byte[] b = fb.apply(SPECIES.length());
1575         byte[] r = new byte[a.length];
1576         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1577         int part = (new java.util.Random()).nextInt(2);
1578         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1579             for (int i = 0; i < a.length; i += SPECIES.length()) {
1580                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1581                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1582                 av.unslice(origin, bv, part).intoArray(r, i);
1583             }
1584         }
1585 
1586         bh.consume(r);
1587     }
1588 
1589     @Benchmark
1590     public void unsliceMasked(Blackhole bh) {
1591         byte[] a = fa.apply(SPECIES.length());
1592         byte[] b = fb.apply(SPECIES.length());
1593         boolean[] mask = fm.apply(SPECIES.length());
1594         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1595         byte[] r = new byte[a.length];
1596         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1597         int part = (new java.util.Random()).nextInt(2);
1598         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1599             for (int i = 0; i < a.length; i += SPECIES.length()) {
1600                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1601                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1602                 av.unslice(origin, bv, part, vmask).intoArray(r, i);
1603             }
1604         }
1605 
1606         bh.consume(r);
1607     }
1608 
1609 
1610 
1611 
1612 
1613 
1614 
1615 
1616 
1617 
1618 
1619 
1620 
1621 
1622 
1623 
1624 
1625 
1626 
1627 
1628 
1629 
1630     @Benchmark
1631     public void BITWISE_BLEND(Blackhole bh) {
1632         byte[] a = fa.apply(SPECIES.length());
1633         byte[] b = fb.apply(SPECIES.length());
1634         byte[] c = fc.apply(SPECIES.length());
1635         byte[] r = fr.apply(SPECIES.length());
1636 
1637         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1638             for (int i = 0; i < a.length; i += SPECIES.length()) {
1639                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1640                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1641                 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
1642                 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv).intoArray(r, i);
1643             }
1644         }
1645 
1646         bh.consume(r);
1647     }
1648 
1649 
1650 
1651     @Benchmark
1652     public void BITWISE_BLENDMasked(Blackhole bh) {
1653         byte[] a = fa.apply(SPECIES.length());
1654         byte[] b = fb.apply(SPECIES.length());
1655         byte[] c = fc.apply(SPECIES.length());
1656         byte[] r = fr.apply(SPECIES.length());
1657         boolean[] mask = fm.apply(SPECIES.length());
1658         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1659 
1660         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1661             for (int i = 0; i < a.length; i += SPECIES.length()) {
1662                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1663                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1664                 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
1665                 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv, vmask).intoArray(r, i);
1666             }
1667         }
1668 
1669         bh.consume(r);
1670     }
1671 
1672 
1673     @Benchmark
1674     public void NEG(Blackhole bh) {
1675         byte[] a = fa.apply(SPECIES.length());
1676         byte[] r = fr.apply(SPECIES.length());
1677 
1678         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1679             for (int i = 0; i < a.length; i += SPECIES.length()) {
1680                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1681                 av.lanewise(VectorOperators.NEG).intoArray(r, i);
1682             }
1683         }
1684 
1685         bh.consume(r);
1686     }
1687 
1688     @Benchmark
1689     public void NEGMasked(Blackhole bh) {
1690         byte[] a = fa.apply(SPECIES.length());
1691         byte[] r = fr.apply(SPECIES.length());
1692         boolean[] mask = fm.apply(SPECIES.length());
1693         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1694 
1695         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1696             for (int i = 0; i < a.length; i += SPECIES.length()) {
1697                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1698                 av.lanewise(VectorOperators.NEG, vmask).intoArray(r, i);
1699             }
1700         }
1701 
1702         bh.consume(r);
1703     }
1704 
1705     @Benchmark
1706     public void ABS(Blackhole bh) {
1707         byte[] a = fa.apply(SPECIES.length());
1708         byte[] r = fr.apply(SPECIES.length());
1709 
1710         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1711             for (int i = 0; i < a.length; i += SPECIES.length()) {
1712                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1713                 av.lanewise(VectorOperators.ABS).intoArray(r, i);
1714             }
1715         }
1716 
1717         bh.consume(r);
1718     }
1719 
1720     @Benchmark
1721     public void ABSMasked(Blackhole bh) {
1722         byte[] a = fa.apply(SPECIES.length());
1723         byte[] r = fr.apply(SPECIES.length());
1724         boolean[] mask = fm.apply(SPECIES.length());
1725         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1726 
1727         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1728             for (int i = 0; i < a.length; i += SPECIES.length()) {
1729                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1730                 av.lanewise(VectorOperators.ABS, vmask).intoArray(r, i);
1731             }
1732         }
1733 
1734         bh.consume(r);
1735     }
1736 
1737 
1738     @Benchmark
1739     public void NOT(Blackhole bh) {
1740         byte[] a = fa.apply(SPECIES.length());
1741         byte[] r = fr.apply(SPECIES.length());
1742 
1743         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1744             for (int i = 0; i < a.length; i += SPECIES.length()) {
1745                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1746                 av.lanewise(VectorOperators.NOT).intoArray(r, i);
1747             }
1748         }
1749 
1750         bh.consume(r);
1751     }
1752 
1753 
1754 
1755     @Benchmark
1756     public void NOTMasked(Blackhole bh) {
1757         byte[] a = fa.apply(SPECIES.length());
1758         byte[] r = fr.apply(SPECIES.length());
1759         boolean[] mask = fm.apply(SPECIES.length());
1760         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1761 
1762         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1763             for (int i = 0; i < a.length; i += SPECIES.length()) {
1764                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1765                 av.lanewise(VectorOperators.NOT, vmask).intoArray(r, i);
1766             }
1767         }
1768 
1769         bh.consume(r);
1770     }
1771 
1772 
1773 
1774     @Benchmark
1775     public void ZOMO(Blackhole bh) {
1776         byte[] a = fa.apply(SPECIES.length());
1777         byte[] r = fr.apply(SPECIES.length());
1778 
1779         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1780             for (int i = 0; i < a.length; i += SPECIES.length()) {
1781                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1782                 av.lanewise(VectorOperators.ZOMO).intoArray(r, i);
1783             }
1784         }
1785 
1786         bh.consume(r);
1787     }
1788 
1789 
1790 
1791     @Benchmark
1792     public void ZOMOMasked(Blackhole bh) {
1793         byte[] a = fa.apply(SPECIES.length());
1794         byte[] r = fr.apply(SPECIES.length());
1795         boolean[] mask = fm.apply(SPECIES.length());
1796         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1797 
1798         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1799             for (int i = 0; i < a.length; i += SPECIES.length()) {
1800                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1801                 av.lanewise(VectorOperators.ZOMO, vmask).intoArray(r, i);
1802             }
1803         }
1804 
1805         bh.consume(r);
1806     }
1807 
1808 
1809 
1810 
1811     @Benchmark
1812     public void gather(Blackhole bh) {
1813         byte[] a = fa.apply(SPECIES.length());
1814         int[] b    = fs.apply(a.length, SPECIES.length());
1815         byte[] r = new byte[a.length];
1816 
1817         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1818             for (int i = 0; i < a.length; i += SPECIES.length()) {
1819                 ByteVector av = ByteVector.fromArray(SPECIES, a, i, b, i);
1820                 av.intoArray(r, i);
1821             }
1822         }
1823 
1824         bh.consume(r);
1825     }
1826 
1827     @Benchmark
1828     public void gatherMasked(Blackhole bh) {
1829         byte[] a = fa.apply(SPECIES.length());
1830         int[] b    = fs.apply(a.length, SPECIES.length());
1831         byte[] r = new byte[a.length];
1832         boolean[] mask = fm.apply(SPECIES.length());
1833         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1834 
1835         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1836             for (int i = 0; i < a.length; i += SPECIES.length()) {
1837                 ByteVector av = ByteVector.fromArray(SPECIES, a, i, b, i, vmask);
1838                 av.intoArray(r, i);
1839             }
1840         }
1841 
1842         bh.consume(r);
1843     }
1844 
1845     @Benchmark
1846     public void scatter(Blackhole bh) {
1847         byte[] a = fa.apply(SPECIES.length());
1848         int[] b = fs.apply(a.length, SPECIES.length());
1849         byte[] r = new byte[a.length];
1850 
1851         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1852             for (int i = 0; i < a.length; i += SPECIES.length()) {
1853                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1854                 av.intoArray(r, i, b, i);
1855             }
1856         }
1857 
1858         bh.consume(r);
1859     }
1860 
1861     @Benchmark
1862     public void scatterMasked(Blackhole bh) {
1863         byte[] a = fa.apply(SPECIES.length());
1864         int[] b = fs.apply(a.length, SPECIES.length());
1865         byte[] r = fb.apply(SPECIES.length());
1866         boolean[] mask = fm.apply(SPECIES.length());
1867         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1868 
1869         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1870             for (int i = 0; i < a.length; i += SPECIES.length()) {
1871                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1872                 av.intoArray(r, i, b, i, vmask);
1873             }
1874         }
1875 
1876         bh.consume(r);
1877     }
1878 }
1879