1 /*
   2  * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import jdk.incubator.vector.Vector;
  29 import jdk.incubator.vector.VectorMask;
  30 import jdk.incubator.vector.VectorMath;
  31 import jdk.incubator.vector.VectorOperators;
  32 import jdk.incubator.vector.VectorShape;
  33 import jdk.incubator.vector.VectorSpecies;
  34 import jdk.incubator.vector.VectorShuffle;
  35 import jdk.incubator.vector.ByteVector;
  36 
  37 import java.util.concurrent.TimeUnit;
  38 import java.util.function.BiFunction;
  39 import java.util.function.IntFunction;
  40 
  41 import org.openjdk.jmh.annotations.*;
  42 import org.openjdk.jmh.infra.Blackhole;
  43 
  44 @BenchmarkMode(Mode.Throughput)
  45 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  46 @State(Scope.Benchmark)
  47 @Warmup(iterations = 3, time = 1)
  48 @Measurement(iterations = 5, time = 1)
  49 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  50 public class Byte128Vector extends AbstractVectorBenchmark {
  51     static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_128;
  52 
  53     static final int INVOC_COUNT = 1; // get rid of outer loop
  54 
  55     static ByteVector bcast_vec = ByteVector.broadcast(SPECIES, (byte)10);
  56 
  57     static void replaceZero(byte[] a, byte v) {
  58         for (int i = 0; i < a.length; i++) {
  59             if (a[i] == 0) {
  60                 a[i] = v;
  61             }
  62         }
  63     }
  64 
  65     static void replaceZero(byte[] a, boolean[] mask, byte v) {
  66         for (int i = 0; i < a.length; i++) {
  67             if (mask[i % mask.length] && a[i] == 0) {
  68                 a[i] = v;
  69             }
  70         }
  71     }
  72 
  73     static byte firstNonZero(byte a, byte b) {
  74         return Byte.compare(a, (byte) 0) != 0 ? a : b;
  75     }
  76 
  77     private static final byte CONST_SHIFT = Byte.SIZE / 2;
  78 
  79     @Param("1024")
  80     int size;
  81 
  82     byte[] fill(IntFunction<Byte> f) {
  83         byte[] array = new byte[size];
  84         for (int i = 0; i < array.length; i++) {
  85             array[i] = f.apply(i);
  86         }
  87         return array;
  88     }
  89 
  90     byte[] a, b, c, r;
  91     boolean[] m, mt, rm;
  92     int[] s;
  93 
  94     @Setup
  95     public void init() {
  96         size += size % SPECIES.length(); // FIXME: add post-loops
  97 
  98         a = fill(i -> (byte)(2*i));
  99         b = fill(i -> (byte)(i+1));
 100         c = fill(i -> (byte)(i+5));
 101         r = fill(i -> (byte)0);
 102 
 103         m = fillMask(size, i -> (i % 2) == 0);
 104         mt = fillMask(size, i -> true);
 105         rm = fillMask(size, i -> false);
 106 
 107         s = fillInt(size, i -> RAND.nextInt(SPECIES.length()));
 108     }
 109 
 110     final IntFunction<byte[]> fa = vl -> a;
 111     final IntFunction<byte[]> fb = vl -> b;
 112     final IntFunction<byte[]> fc = vl -> c;
 113     final IntFunction<byte[]> fr = vl -> r;
 114     final IntFunction<boolean[]> fm = vl -> m;
 115     final IntFunction<boolean[]> fmt = vl -> mt;
 116     final IntFunction<boolean[]> fmr = vl -> rm;
 117     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
 118 
 119 
 120     @Benchmark
 121     public void ADD(Blackhole bh) {
 122         byte[] a = fa.apply(SPECIES.length());
 123         byte[] b = fb.apply(SPECIES.length());
 124         byte[] r = fr.apply(SPECIES.length());
 125 
 126         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 127             for (int i = 0; i < a.length; i += SPECIES.length()) {
 128                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 129                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 130                 av.lanewise(VectorOperators.ADD, bv).intoArray(r, i);
 131             }
 132         }
 133 
 134         bh.consume(r);
 135     }
 136 
 137     @Benchmark
 138     public void ADDMasked(Blackhole bh) {
 139         byte[] a = fa.apply(SPECIES.length());
 140         byte[] b = fb.apply(SPECIES.length());
 141         byte[] r = fr.apply(SPECIES.length());
 142         boolean[] mask = fm.apply(SPECIES.length());
 143         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 144 
 145         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 146             for (int i = 0; i < a.length; i += SPECIES.length()) {
 147                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 148                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 149                 av.lanewise(VectorOperators.ADD, bv, vmask).intoArray(r, i);
 150             }
 151         }
 152 
 153         bh.consume(r);
 154     }
 155 
 156     @Benchmark
 157     public void SUB(Blackhole bh) {
 158         byte[] a = fa.apply(SPECIES.length());
 159         byte[] b = fb.apply(SPECIES.length());
 160         byte[] r = fr.apply(SPECIES.length());
 161 
 162         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 163             for (int i = 0; i < a.length; i += SPECIES.length()) {
 164                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 165                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 166                 av.lanewise(VectorOperators.SUB, bv).intoArray(r, i);
 167             }
 168         }
 169 
 170         bh.consume(r);
 171     }
 172 
 173     @Benchmark
 174     public void SUBMasked(Blackhole bh) {
 175         byte[] a = fa.apply(SPECIES.length());
 176         byte[] b = fb.apply(SPECIES.length());
 177         byte[] r = fr.apply(SPECIES.length());
 178         boolean[] mask = fm.apply(SPECIES.length());
 179         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 180 
 181         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 182             for (int i = 0; i < a.length; i += SPECIES.length()) {
 183                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 184                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 185                 av.lanewise(VectorOperators.SUB, bv, vmask).intoArray(r, i);
 186             }
 187         }
 188 
 189         bh.consume(r);
 190     }
 191 
 192     @Benchmark
 193     public void MUL(Blackhole bh) {
 194         byte[] a = fa.apply(SPECIES.length());
 195         byte[] b = fb.apply(SPECIES.length());
 196         byte[] r = fr.apply(SPECIES.length());
 197 
 198         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 199             for (int i = 0; i < a.length; i += SPECIES.length()) {
 200                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 201                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 202                 av.lanewise(VectorOperators.MUL, bv).intoArray(r, i);
 203             }
 204         }
 205 
 206         bh.consume(r);
 207     }
 208 
 209     @Benchmark
 210     public void MULMasked(Blackhole bh) {
 211         byte[] a = fa.apply(SPECIES.length());
 212         byte[] b = fb.apply(SPECIES.length());
 213         byte[] r = fr.apply(SPECIES.length());
 214         boolean[] mask = fm.apply(SPECIES.length());
 215         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 216 
 217         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 218             for (int i = 0; i < a.length; i += SPECIES.length()) {
 219                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 220                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 221                 av.lanewise(VectorOperators.MUL, bv, vmask).intoArray(r, i);
 222             }
 223         }
 224 
 225         bh.consume(r);
 226     }
 227 
 228     @Benchmark
 229     public void DIV(Blackhole bh) {
 230         byte[] a = fa.apply(SPECIES.length());
 231         byte[] b = fb.apply(SPECIES.length());
 232         byte[] r = fr.apply(SPECIES.length());
 233 
 234         replaceZero(b, (byte) 1);
 235 
 236         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 237             for (int i = 0; i < a.length; i += SPECIES.length()) {
 238                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 239                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 240                 av.lanewise(VectorOperators.DIV, bv).intoArray(r, i);
 241             }
 242         }
 243 
 244         bh.consume(r);
 245     }
 246 
 247     @Benchmark
 248     public void DIVMasked(Blackhole bh) {
 249         byte[] a = fa.apply(SPECIES.length());
 250         byte[] b = fb.apply(SPECIES.length());
 251         byte[] r = fr.apply(SPECIES.length());
 252         boolean[] mask = fm.apply(SPECIES.length());
 253         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 254 
 255         replaceZero(b, mask, (byte) 1);
 256 
 257         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 258             for (int i = 0; i < a.length; i += SPECIES.length()) {
 259                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 260                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 261                 av.lanewise(VectorOperators.DIV, bv, vmask).intoArray(r, i);
 262             }
 263         }
 264 
 265         bh.consume(r);
 266     }
 267 
 268     @Benchmark
 269     public void FIRST_NONZERO(Blackhole bh) {
 270         byte[] a = fa.apply(SPECIES.length());
 271         byte[] b = fb.apply(SPECIES.length());
 272         byte[] r = fr.apply(SPECIES.length());
 273 
 274         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 275             for (int i = 0; i < a.length; i += SPECIES.length()) {
 276                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 277                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 278                 av.lanewise(VectorOperators.FIRST_NONZERO, bv).intoArray(r, i);
 279             }
 280         }
 281 
 282         bh.consume(r);
 283     }
 284 
 285     @Benchmark
 286     public void FIRST_NONZEROMasked(Blackhole bh) {
 287         byte[] a = fa.apply(SPECIES.length());
 288         byte[] b = fb.apply(SPECIES.length());
 289         byte[] r = fr.apply(SPECIES.length());
 290         boolean[] mask = fm.apply(SPECIES.length());
 291         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 292 
 293         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 294             for (int i = 0; i < a.length; i += SPECIES.length()) {
 295                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 296                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 297                 av.lanewise(VectorOperators.FIRST_NONZERO, bv, vmask).intoArray(r, i);
 298             }
 299         }
 300 
 301         bh.consume(r);
 302     }
 303 
 304     @Benchmark
 305     public void AND(Blackhole bh) {
 306         byte[] a = fa.apply(SPECIES.length());
 307         byte[] b = fb.apply(SPECIES.length());
 308         byte[] r = fr.apply(SPECIES.length());
 309 
 310         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 311             for (int i = 0; i < a.length; i += SPECIES.length()) {
 312                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 313                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 314                 av.lanewise(VectorOperators.AND, bv).intoArray(r, i);
 315             }
 316         }
 317 
 318         bh.consume(r);
 319     }
 320 
 321     @Benchmark
 322     public void ANDMasked(Blackhole bh) {
 323         byte[] a = fa.apply(SPECIES.length());
 324         byte[] b = fb.apply(SPECIES.length());
 325         byte[] r = fr.apply(SPECIES.length());
 326         boolean[] mask = fm.apply(SPECIES.length());
 327         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 328 
 329         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 330             for (int i = 0; i < a.length; i += SPECIES.length()) {
 331                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 332                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 333                 av.lanewise(VectorOperators.AND, bv, vmask).intoArray(r, i);
 334             }
 335         }
 336 
 337         bh.consume(r);
 338     }
 339 
 340     @Benchmark
 341     public void AND_NOT(Blackhole bh) {
 342         byte[] a = fa.apply(SPECIES.length());
 343         byte[] b = fb.apply(SPECIES.length());
 344         byte[] r = fr.apply(SPECIES.length());
 345 
 346         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 347             for (int i = 0; i < a.length; i += SPECIES.length()) {
 348                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 349                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 350                 av.lanewise(VectorOperators.AND_NOT, bv).intoArray(r, i);
 351             }
 352         }
 353 
 354         bh.consume(r);
 355     }
 356 
 357     @Benchmark
 358     public void AND_NOTMasked(Blackhole bh) {
 359         byte[] a = fa.apply(SPECIES.length());
 360         byte[] b = fb.apply(SPECIES.length());
 361         byte[] r = fr.apply(SPECIES.length());
 362         boolean[] mask = fm.apply(SPECIES.length());
 363         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 364 
 365         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 366             for (int i = 0; i < a.length; i += SPECIES.length()) {
 367                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 368                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 369                 av.lanewise(VectorOperators.AND_NOT, bv, vmask).intoArray(r, i);
 370             }
 371         }
 372 
 373         bh.consume(r);
 374     }
 375 
 376     @Benchmark
 377     public void OR(Blackhole bh) {
 378         byte[] a = fa.apply(SPECIES.length());
 379         byte[] b = fb.apply(SPECIES.length());
 380         byte[] r = fr.apply(SPECIES.length());
 381 
 382         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 383             for (int i = 0; i < a.length; i += SPECIES.length()) {
 384                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 385                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 386                 av.lanewise(VectorOperators.OR, bv).intoArray(r, i);
 387             }
 388         }
 389 
 390         bh.consume(r);
 391     }
 392 
 393     @Benchmark
 394     public void ORMasked(Blackhole bh) {
 395         byte[] a = fa.apply(SPECIES.length());
 396         byte[] b = fb.apply(SPECIES.length());
 397         byte[] r = fr.apply(SPECIES.length());
 398         boolean[] mask = fm.apply(SPECIES.length());
 399         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 400 
 401         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 402             for (int i = 0; i < a.length; i += SPECIES.length()) {
 403                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 404                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 405                 av.lanewise(VectorOperators.OR, bv, vmask).intoArray(r, i);
 406             }
 407         }
 408 
 409         bh.consume(r);
 410     }
 411 
 412     @Benchmark
 413     public void XOR(Blackhole bh) {
 414         byte[] a = fa.apply(SPECIES.length());
 415         byte[] b = fb.apply(SPECIES.length());
 416         byte[] r = fr.apply(SPECIES.length());
 417 
 418         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 419             for (int i = 0; i < a.length; i += SPECIES.length()) {
 420                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 421                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 422                 av.lanewise(VectorOperators.XOR, bv).intoArray(r, i);
 423             }
 424         }
 425 
 426         bh.consume(r);
 427     }
 428 
 429     @Benchmark
 430     public void XORMasked(Blackhole bh) {
 431         byte[] a = fa.apply(SPECIES.length());
 432         byte[] b = fb.apply(SPECIES.length());
 433         byte[] r = fr.apply(SPECIES.length());
 434         boolean[] mask = fm.apply(SPECIES.length());
 435         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 436 
 437         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 438             for (int i = 0; i < a.length; i += SPECIES.length()) {
 439                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 440                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 441                 av.lanewise(VectorOperators.XOR, bv, vmask).intoArray(r, i);
 442             }
 443         }
 444 
 445         bh.consume(r);
 446     }
 447 
 448     @Benchmark
 449     public void LSHL(Blackhole bh) {
 450         byte[] a = fa.apply(SPECIES.length());
 451         byte[] b = fb.apply(SPECIES.length());
 452         byte[] r = fr.apply(SPECIES.length());
 453 
 454         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 455             for (int i = 0; i < a.length; i += SPECIES.length()) {
 456                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 457                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 458                 av.lanewise(VectorOperators.LSHL, bv).intoArray(r, i);
 459             }
 460         }
 461 
 462         bh.consume(r);
 463     }
 464 
 465     @Benchmark
 466     public void LSHLMasked(Blackhole bh) {
 467         byte[] a = fa.apply(SPECIES.length());
 468         byte[] b = fb.apply(SPECIES.length());
 469         byte[] r = fr.apply(SPECIES.length());
 470         boolean[] mask = fm.apply(SPECIES.length());
 471         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 472 
 473         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 474             for (int i = 0; i < a.length; i += SPECIES.length()) {
 475                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 476                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 477                 av.lanewise(VectorOperators.LSHL, bv, vmask).intoArray(r, i);
 478             }
 479         }
 480 
 481         bh.consume(r);
 482     }
 483 
 484     @Benchmark
 485     public void ASHR(Blackhole bh) {
 486         byte[] a = fa.apply(SPECIES.length());
 487         byte[] b = fb.apply(SPECIES.length());
 488         byte[] r = fr.apply(SPECIES.length());
 489 
 490         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 491             for (int i = 0; i < a.length; i += SPECIES.length()) {
 492                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 493                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 494                 av.lanewise(VectorOperators.ASHR, bv).intoArray(r, i);
 495             }
 496         }
 497 
 498         bh.consume(r);
 499     }
 500 
 501     @Benchmark
 502     public void ASHRMasked(Blackhole bh) {
 503         byte[] a = fa.apply(SPECIES.length());
 504         byte[] b = fb.apply(SPECIES.length());
 505         byte[] r = fr.apply(SPECIES.length());
 506         boolean[] mask = fm.apply(SPECIES.length());
 507         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 508 
 509         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 510             for (int i = 0; i < a.length; i += SPECIES.length()) {
 511                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 512                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 513                 av.lanewise(VectorOperators.ASHR, bv, vmask).intoArray(r, i);
 514             }
 515         }
 516 
 517         bh.consume(r);
 518     }
 519 
 520     @Benchmark
 521     public void LSHR(Blackhole bh) {
 522         byte[] a = fa.apply(SPECIES.length());
 523         byte[] b = fb.apply(SPECIES.length());
 524         byte[] r = fr.apply(SPECIES.length());
 525 
 526         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 527             for (int i = 0; i < a.length; i += SPECIES.length()) {
 528                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 529                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 530                 av.lanewise(VectorOperators.LSHR, bv).intoArray(r, i);
 531             }
 532         }
 533 
 534         bh.consume(r);
 535     }
 536 
 537     @Benchmark
 538     public void LSHRMasked(Blackhole bh) {
 539         byte[] a = fa.apply(SPECIES.length());
 540         byte[] b = fb.apply(SPECIES.length());
 541         byte[] r = fr.apply(SPECIES.length());
 542         boolean[] mask = fm.apply(SPECIES.length());
 543         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 544 
 545         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 546             for (int i = 0; i < a.length; i += SPECIES.length()) {
 547                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 548                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 549                 av.lanewise(VectorOperators.LSHR, bv, vmask).intoArray(r, i);
 550             }
 551         }
 552 
 553         bh.consume(r);
 554     }
 555 
 556     @Benchmark
 557     public void LSHLShift(Blackhole bh) {
 558         byte[] a = fa.apply(SPECIES.length());
 559         byte[] b = fb.apply(SPECIES.length());
 560         byte[] r = fr.apply(SPECIES.length());
 561 
 562         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 563             for (int i = 0; i < a.length; i += SPECIES.length()) {
 564                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 565                 av.lanewise(VectorOperators.LSHL, (int)b[i]).intoArray(r, i);
 566             }
 567         }
 568 
 569         bh.consume(r);
 570     }
 571 
 572     @Benchmark
 573     public void LSHLMaskedShift(Blackhole bh) {
 574         byte[] a = fa.apply(SPECIES.length());
 575         byte[] b = fb.apply(SPECIES.length());
 576         byte[] r = fr.apply(SPECIES.length());
 577         boolean[] mask = fm.apply(SPECIES.length());
 578         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 579 
 580         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 581             for (int i = 0; i < a.length; i += SPECIES.length()) {
 582                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 583                 av.lanewise(VectorOperators.LSHL, (int)b[i], vmask).intoArray(r, i);
 584             }
 585         }
 586 
 587         bh.consume(r);
 588     }
 589 
 590     @Benchmark
 591     public void LSHRShift(Blackhole bh) {
 592         byte[] a = fa.apply(SPECIES.length());
 593         byte[] b = fb.apply(SPECIES.length());
 594         byte[] r = fr.apply(SPECIES.length());
 595 
 596         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 597             for (int i = 0; i < a.length; i += SPECIES.length()) {
 598                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 599                 av.lanewise(VectorOperators.LSHR, (int)b[i]).intoArray(r, i);
 600             }
 601         }
 602 
 603         bh.consume(r);
 604     }
 605 
 606     @Benchmark
 607     public void LSHRMaskedShift(Blackhole bh) {
 608         byte[] a = fa.apply(SPECIES.length());
 609         byte[] b = fb.apply(SPECIES.length());
 610         byte[] r = fr.apply(SPECIES.length());
 611         boolean[] mask = fm.apply(SPECIES.length());
 612         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 613 
 614         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 615             for (int i = 0; i < a.length; i += SPECIES.length()) {
 616                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 617                 av.lanewise(VectorOperators.LSHR, (int)b[i], vmask).intoArray(r, i);
 618             }
 619         }
 620 
 621         bh.consume(r);
 622     }
 623 
 624     @Benchmark
 625     public void ASHRShift(Blackhole bh) {
 626         byte[] a = fa.apply(SPECIES.length());
 627         byte[] b = fb.apply(SPECIES.length());
 628         byte[] r = fr.apply(SPECIES.length());
 629 
 630         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 631             for (int i = 0; i < a.length; i += SPECIES.length()) {
 632                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 633                 av.lanewise(VectorOperators.ASHR, (int)b[i]).intoArray(r, i);
 634             }
 635         }
 636 
 637         bh.consume(r);
 638     }
 639 
 640     @Benchmark
 641     public void ASHRMaskedShift(Blackhole bh) {
 642         byte[] a = fa.apply(SPECIES.length());
 643         byte[] b = fb.apply(SPECIES.length());
 644         byte[] r = fr.apply(SPECIES.length());
 645         boolean[] mask = fm.apply(SPECIES.length());
 646         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 647 
 648         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 649             for (int i = 0; i < a.length; i += SPECIES.length()) {
 650                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 651                 av.lanewise(VectorOperators.ASHR, (int)b[i], vmask).intoArray(r, i);
 652             }
 653         }
 654 
 655         bh.consume(r);
 656     }
 657 
 658     @Benchmark
 659     public void ROR(Blackhole bh) {
 660         byte[] a = fa.apply(SPECIES.length());
 661         byte[] b = fb.apply(SPECIES.length());
 662         byte[] r = fr.apply(SPECIES.length());
 663 
 664         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 665             for (int i = 0; i < a.length; i += SPECIES.length()) {
 666                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 667                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 668                 av.lanewise(VectorOperators.ROR, bv).intoArray(r, i);
 669             }
 670         }
 671 
 672         bh.consume(r);
 673     }
 674 
 675     @Benchmark
 676     public void RORMasked(Blackhole bh) {
 677         byte[] a = fa.apply(SPECIES.length());
 678         byte[] b = fb.apply(SPECIES.length());
 679         byte[] r = fr.apply(SPECIES.length());
 680         boolean[] mask = fm.apply(SPECIES.length());
 681         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 682 
 683         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 684             for (int i = 0; i < a.length; i += SPECIES.length()) {
 685                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 686                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 687                 av.lanewise(VectorOperators.ROR, bv, vmask).intoArray(r, i);
 688             }
 689         }
 690 
 691         bh.consume(r);
 692     }
 693 
 694     @Benchmark
 695     public void ROL(Blackhole bh) {
 696         byte[] a = fa.apply(SPECIES.length());
 697         byte[] b = fb.apply(SPECIES.length());
 698         byte[] r = fr.apply(SPECIES.length());
 699 
 700         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 701             for (int i = 0; i < a.length; i += SPECIES.length()) {
 702                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 703                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 704                 av.lanewise(VectorOperators.ROL, bv).intoArray(r, i);
 705             }
 706         }
 707 
 708         bh.consume(r);
 709     }
 710 
 711     @Benchmark
 712     public void ROLMasked(Blackhole bh) {
 713         byte[] a = fa.apply(SPECIES.length());
 714         byte[] b = fb.apply(SPECIES.length());
 715         byte[] r = fr.apply(SPECIES.length());
 716         boolean[] mask = fm.apply(SPECIES.length());
 717         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 718 
 719         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 720             for (int i = 0; i < a.length; i += SPECIES.length()) {
 721                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 722                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 723                 av.lanewise(VectorOperators.ROL, bv, vmask).intoArray(r, i);
 724             }
 725         }
 726 
 727         bh.consume(r);
 728     }
 729 
 730     @Benchmark
 731     public void RORShift(Blackhole bh) {
 732         byte[] a = fa.apply(SPECIES.length());
 733         byte[] b = fb.apply(SPECIES.length());
 734         byte[] r = fr.apply(SPECIES.length());
 735 
 736         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 737             for (int i = 0; i < a.length; i += SPECIES.length()) {
 738                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 739                 av.lanewise(VectorOperators.ROR, (int)b[i]).intoArray(r, i);
 740             }
 741         }
 742 
 743         bh.consume(r);
 744     }
 745 
 746     @Benchmark
 747     public void RORMaskedShift(Blackhole bh) {
 748         byte[] a = fa.apply(SPECIES.length());
 749         byte[] b = fb.apply(SPECIES.length());
 750         byte[] r = fr.apply(SPECIES.length());
 751         boolean[] mask = fm.apply(SPECIES.length());
 752         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 753 
 754         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 755             for (int i = 0; i < a.length; i += SPECIES.length()) {
 756                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 757                 av.lanewise(VectorOperators.ROR, (int)b[i], vmask).intoArray(r, i);
 758             }
 759         }
 760 
 761         bh.consume(r);
 762     }
 763 
 764     @Benchmark
 765     public void ROLShift(Blackhole bh) {
 766         byte[] a = fa.apply(SPECIES.length());
 767         byte[] b = fb.apply(SPECIES.length());
 768         byte[] r = fr.apply(SPECIES.length());
 769 
 770         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 771             for (int i = 0; i < a.length; i += SPECIES.length()) {
 772                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 773                 av.lanewise(VectorOperators.ROL, (int)b[i]).intoArray(r, i);
 774             }
 775         }
 776 
 777         bh.consume(r);
 778     }
 779 
 780     @Benchmark
 781     public void ROLMaskedShift(Blackhole bh) {
 782         byte[] a = fa.apply(SPECIES.length());
 783         byte[] b = fb.apply(SPECIES.length());
 784         byte[] r = fr.apply(SPECIES.length());
 785         boolean[] mask = fm.apply(SPECIES.length());
 786         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 787 
 788         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 789             for (int i = 0; i < a.length; i += SPECIES.length()) {
 790                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 791                 av.lanewise(VectorOperators.ROL, (int)b[i], vmask).intoArray(r, i);
 792             }
 793         }
 794 
 795         bh.consume(r);
 796     }
 797 
 798     @Benchmark
 799     public void LSHRShiftConst(Blackhole bh) {
 800         byte[] a = fa.apply(SPECIES.length());
 801         byte[] r = fr.apply(SPECIES.length());
 802 
 803         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 804             for (int i = 0; i < a.length; i += SPECIES.length()) {
 805                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 806                 av.lanewise(VectorOperators.LSHR, CONST_SHIFT).intoArray(r, i);
 807             }
 808         }
 809 
 810         bh.consume(r);
 811     }
 812 
 813     @Benchmark
 814     public void LSHRMaskedShiftConst(Blackhole bh) {
 815         byte[] a = fa.apply(SPECIES.length());
 816         byte[] r = fr.apply(SPECIES.length());
 817         boolean[] mask = fm.apply(SPECIES.length());
 818         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 819 
 820         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 821             for (int i = 0; i < a.length; i += SPECIES.length()) {
 822                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 823                 av.lanewise(VectorOperators.LSHR, CONST_SHIFT, vmask).intoArray(r, i);
 824             }
 825         }
 826 
 827         bh.consume(r);
 828     }
 829 
 830     @Benchmark
 831     public void LSHLShiftConst(Blackhole bh) {
 832         byte[] a = fa.apply(SPECIES.length());
 833         byte[] r = fr.apply(SPECIES.length());
 834 
 835         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 836             for (int i = 0; i < a.length; i += SPECIES.length()) {
 837                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 838                 av.lanewise(VectorOperators.LSHL, CONST_SHIFT).intoArray(r, i);
 839             }
 840         }
 841 
 842         bh.consume(r);
 843     }
 844 
 845     @Benchmark
 846     public void LSHLMaskedShiftConst(Blackhole bh) {
 847         byte[] a = fa.apply(SPECIES.length());
 848         byte[] r = fr.apply(SPECIES.length());
 849         boolean[] mask = fm.apply(SPECIES.length());
 850         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 851 
 852         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 853             for (int i = 0; i < a.length; i += SPECIES.length()) {
 854                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 855                 av.lanewise(VectorOperators.LSHL, CONST_SHIFT, vmask).intoArray(r, i);
 856             }
 857         }
 858 
 859         bh.consume(r);
 860     }
 861 
 862     @Benchmark
 863     public void ASHRShiftConst(Blackhole bh) {
 864         byte[] a = fa.apply(SPECIES.length());
 865         byte[] r = fr.apply(SPECIES.length());
 866 
 867         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 868             for (int i = 0; i < a.length; i += SPECIES.length()) {
 869                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 870                 av.lanewise(VectorOperators.ASHR, CONST_SHIFT).intoArray(r, i);
 871             }
 872         }
 873 
 874         bh.consume(r);
 875     }
 876 
 877     @Benchmark
 878     public void ASHRMaskedShiftConst(Blackhole bh) {
 879         byte[] a = fa.apply(SPECIES.length());
 880         byte[] r = fr.apply(SPECIES.length());
 881         boolean[] mask = fm.apply(SPECIES.length());
 882         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 883 
 884         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 885             for (int i = 0; i < a.length; i += SPECIES.length()) {
 886                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 887                 av.lanewise(VectorOperators.ASHR, CONST_SHIFT, vmask).intoArray(r, i);
 888             }
 889         }
 890 
 891         bh.consume(r);
 892     }
 893 
 894     @Benchmark
 895     public void RORShiftConst(Blackhole bh) {
 896         byte[] a = fa.apply(SPECIES.length());
 897         byte[] r = fr.apply(SPECIES.length());
 898 
 899         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 900             for (int i = 0; i < a.length; i += SPECIES.length()) {
 901                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 902                 av.lanewise(VectorOperators.ROR, CONST_SHIFT).intoArray(r, i);
 903             }
 904         }
 905 
 906         bh.consume(r);
 907     }
 908 
 909     @Benchmark
 910     public void RORMaskedShiftConst(Blackhole bh) {
 911         byte[] a = fa.apply(SPECIES.length());
 912         byte[] r = fr.apply(SPECIES.length());
 913         boolean[] mask = fm.apply(SPECIES.length());
 914         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 915 
 916         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 917             for (int i = 0; i < a.length; i += SPECIES.length()) {
 918                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 919                 av.lanewise(VectorOperators.ROR, CONST_SHIFT, vmask).intoArray(r, i);
 920             }
 921         }
 922 
 923         bh.consume(r);
 924     }
 925 
 926     @Benchmark
 927     public void ROLShiftConst(Blackhole bh) {
 928         byte[] a = fa.apply(SPECIES.length());
 929         byte[] r = fr.apply(SPECIES.length());
 930 
 931         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 932             for (int i = 0; i < a.length; i += SPECIES.length()) {
 933                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 934                 av.lanewise(VectorOperators.ROL, CONST_SHIFT).intoArray(r, i);
 935             }
 936         }
 937 
 938         bh.consume(r);
 939     }
 940 
 941     @Benchmark
 942     public void ROLMaskedShiftConst(Blackhole bh) {
 943         byte[] a = fa.apply(SPECIES.length());
 944         byte[] r = fr.apply(SPECIES.length());
 945         boolean[] mask = fm.apply(SPECIES.length());
 946         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 947 
 948         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 949             for (int i = 0; i < a.length; i += SPECIES.length()) {
 950                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 951                 av.lanewise(VectorOperators.ROL, CONST_SHIFT, vmask).intoArray(r, i);
 952             }
 953         }
 954 
 955         bh.consume(r);
 956     }
 957 
 958     @Benchmark
 959     public void MIN_MEM(Blackhole bh) {
 960         byte[] a = fa.apply(SPECIES.length());
 961         byte[] r = fr.apply(SPECIES.length());
 962 
 963         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 964             for (int i = 0; i < a.length; i += SPECIES.length()) {
 965                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 966                 av.lanewise(VectorOperators.MIN, bcast_vec).intoArray(r, i);
 967             }
 968         }
 969 
 970         bh.consume(r);
 971     }
 972 
 973     @Benchmark
 974     public void MINMasked_MEM(Blackhole bh) {
 975         byte[] a = fa.apply(SPECIES.length());
 976         byte[] r = fr.apply(SPECIES.length());
 977         boolean[] mask = fm.apply(SPECIES.length());
 978         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 979 
 980         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 981             for (int i = 0; i < a.length; i += SPECIES.length()) {
 982                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 983                 av.lanewise(VectorOperators.MIN, bcast_vec, vmask).intoArray(r, i);
 984             }
 985         }
 986 
 987         bh.consume(r);
 988     }
 989 
 990     @Benchmark
 991     public void MAX_MEM(Blackhole bh) {
 992         byte[] a = fa.apply(SPECIES.length());
 993         byte[] r = fr.apply(SPECIES.length());
 994 
 995         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 996             for (int i = 0; i < a.length; i += SPECIES.length()) {
 997                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 998                 av.lanewise(VectorOperators.MAX, bcast_vec).intoArray(r, i);
 999             }
1000         }
1001 
1002         bh.consume(r);
1003     }
1004 
1005     @Benchmark
1006     public void MAXMasked_MEM(Blackhole bh) {
1007         byte[] a = fa.apply(SPECIES.length());
1008         byte[] r = fr.apply(SPECIES.length());
1009         boolean[] mask = fm.apply(SPECIES.length());
1010         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1011 
1012         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1013             for (int i = 0; i < a.length; i += SPECIES.length()) {
1014                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1015                 av.lanewise(VectorOperators.MAX, bcast_vec, vmask).intoArray(r, i);
1016             }
1017         }
1018 
1019         bh.consume(r);
1020     }
1021 
1022     @Benchmark
1023     public void MIN(Blackhole bh) {
1024         byte[] a = fa.apply(SPECIES.length());
1025         byte[] b = fb.apply(SPECIES.length());
1026         byte[] r = fr.apply(SPECIES.length());
1027 
1028         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1029             for (int i = 0; i < a.length; i += SPECIES.length()) {
1030                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1031                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1032                 av.lanewise(VectorOperators.MIN, bv).intoArray(r, i);
1033             }
1034         }
1035 
1036         bh.consume(r);
1037     }
1038 
1039     @Benchmark
1040     public void MAX(Blackhole bh) {
1041         byte[] a = fa.apply(SPECIES.length());
1042         byte[] b = fb.apply(SPECIES.length());
1043         byte[] r = fr.apply(SPECIES.length());
1044 
1045         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1046             for (int i = 0; i < a.length; i += SPECIES.length()) {
1047                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1048                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1049                 av.lanewise(VectorOperators.MAX, bv).intoArray(r, i);
1050             }
1051         }
1052 
1053         bh.consume(r);
1054     }
1055 
1056     @Benchmark
1057     public void UMIN(Blackhole bh) {
1058         byte[] a = fa.apply(SPECIES.length());
1059         byte[] b = fb.apply(SPECIES.length());
1060         byte[] r = fr.apply(SPECIES.length());
1061 
1062         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1063             for (int i = 0; i < a.length; i += SPECIES.length()) {
1064                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1065                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1066                 av.lanewise(VectorOperators.UMIN, bv).intoArray(r, i);
1067             }
1068         }
1069 
1070         bh.consume(r);
1071     }
1072 
1073     @Benchmark
1074     public void UMINMasked(Blackhole bh) {
1075         byte[] a = fa.apply(SPECIES.length());
1076         byte[] b = fb.apply(SPECIES.length());
1077         byte[] r = fr.apply(SPECIES.length());
1078         boolean[] mask = fm.apply(SPECIES.length());
1079         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1080 
1081         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1082             for (int i = 0; i < a.length; i += SPECIES.length()) {
1083                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1084                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1085                 av.lanewise(VectorOperators.UMIN, bv, vmask).intoArray(r, i);
1086             }
1087         }
1088 
1089         bh.consume(r);
1090     }
1091 
1092     @Benchmark
1093     public void UMAX(Blackhole bh) {
1094         byte[] a = fa.apply(SPECIES.length());
1095         byte[] b = fb.apply(SPECIES.length());
1096         byte[] r = fr.apply(SPECIES.length());
1097 
1098         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1099             for (int i = 0; i < a.length; i += SPECIES.length()) {
1100                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1101                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1102                 av.lanewise(VectorOperators.UMAX, bv).intoArray(r, i);
1103             }
1104         }
1105 
1106         bh.consume(r);
1107     }
1108 
1109     @Benchmark
1110     public void UMAXMasked(Blackhole bh) {
1111         byte[] a = fa.apply(SPECIES.length());
1112         byte[] b = fb.apply(SPECIES.length());
1113         byte[] r = fr.apply(SPECIES.length());
1114         boolean[] mask = fm.apply(SPECIES.length());
1115         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1116 
1117         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1118             for (int i = 0; i < a.length; i += SPECIES.length()) {
1119                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1120                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1121                 av.lanewise(VectorOperators.UMAX, bv, vmask).intoArray(r, i);
1122             }
1123         }
1124 
1125         bh.consume(r);
1126     }
1127 
1128     @Benchmark
1129     public void SADD(Blackhole bh) {
1130         byte[] a = fa.apply(SPECIES.length());
1131         byte[] b = fb.apply(SPECIES.length());
1132         byte[] r = fr.apply(SPECIES.length());
1133 
1134         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1135             for (int i = 0; i < a.length; i += SPECIES.length()) {
1136                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1137                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1138                 av.lanewise(VectorOperators.SADD, bv).intoArray(r, i);
1139             }
1140         }
1141 
1142         bh.consume(r);
1143     }
1144 
1145     @Benchmark
1146     public void SADDMasked(Blackhole bh) {
1147         byte[] a = fa.apply(SPECIES.length());
1148         byte[] b = fb.apply(SPECIES.length());
1149         byte[] r = fr.apply(SPECIES.length());
1150         boolean[] mask = fm.apply(SPECIES.length());
1151         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1152 
1153         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1154             for (int i = 0; i < a.length; i += SPECIES.length()) {
1155                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1156                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1157                 av.lanewise(VectorOperators.SADD, bv, vmask).intoArray(r, i);
1158             }
1159         }
1160 
1161         bh.consume(r);
1162     }
1163 
1164     @Benchmark
1165     public void SSUB(Blackhole bh) {
1166         byte[] a = fa.apply(SPECIES.length());
1167         byte[] b = fb.apply(SPECIES.length());
1168         byte[] r = fr.apply(SPECIES.length());
1169 
1170         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1171             for (int i = 0; i < a.length; i += SPECIES.length()) {
1172                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1173                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1174                 av.lanewise(VectorOperators.SSUB, bv).intoArray(r, i);
1175             }
1176         }
1177 
1178         bh.consume(r);
1179     }
1180 
1181     @Benchmark
1182     public void SSUBMasked(Blackhole bh) {
1183         byte[] a = fa.apply(SPECIES.length());
1184         byte[] b = fb.apply(SPECIES.length());
1185         byte[] r = fr.apply(SPECIES.length());
1186         boolean[] mask = fm.apply(SPECIES.length());
1187         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1188 
1189         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1190             for (int i = 0; i < a.length; i += SPECIES.length()) {
1191                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1192                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1193                 av.lanewise(VectorOperators.SSUB, bv, vmask).intoArray(r, i);
1194             }
1195         }
1196 
1197         bh.consume(r);
1198     }
1199 
1200     @Benchmark
1201     public void SUADD(Blackhole bh) {
1202         byte[] a = fa.apply(SPECIES.length());
1203         byte[] b = fb.apply(SPECIES.length());
1204         byte[] r = fr.apply(SPECIES.length());
1205 
1206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1207             for (int i = 0; i < a.length; i += SPECIES.length()) {
1208                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1209                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1210                 av.lanewise(VectorOperators.SUADD, bv).intoArray(r, i);
1211             }
1212         }
1213 
1214         bh.consume(r);
1215     }
1216 
1217     @Benchmark
1218     public void SUADDMasked(Blackhole bh) {
1219         byte[] a = fa.apply(SPECIES.length());
1220         byte[] b = fb.apply(SPECIES.length());
1221         byte[] r = fr.apply(SPECIES.length());
1222         boolean[] mask = fm.apply(SPECIES.length());
1223         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1224 
1225         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1226             for (int i = 0; i < a.length; i += SPECIES.length()) {
1227                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1228                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1229                 av.lanewise(VectorOperators.SUADD, bv, vmask).intoArray(r, i);
1230             }
1231         }
1232 
1233         bh.consume(r);
1234     }
1235 
1236     @Benchmark
1237     public void SUSUB(Blackhole bh) {
1238         byte[] a = fa.apply(SPECIES.length());
1239         byte[] b = fb.apply(SPECIES.length());
1240         byte[] r = fr.apply(SPECIES.length());
1241 
1242         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1243             for (int i = 0; i < a.length; i += SPECIES.length()) {
1244                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1245                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1246                 av.lanewise(VectorOperators.SUSUB, bv).intoArray(r, i);
1247             }
1248         }
1249 
1250         bh.consume(r);
1251     }
1252 
1253     @Benchmark
1254     public void SUSUBMasked(Blackhole bh) {
1255         byte[] a = fa.apply(SPECIES.length());
1256         byte[] b = fb.apply(SPECIES.length());
1257         byte[] r = fr.apply(SPECIES.length());
1258         boolean[] mask = fm.apply(SPECIES.length());
1259         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1260 
1261         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1262             for (int i = 0; i < a.length; i += SPECIES.length()) {
1263                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1264                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1265                 av.lanewise(VectorOperators.SUSUB, bv, vmask).intoArray(r, i);
1266             }
1267         }
1268 
1269         bh.consume(r);
1270     }
1271 
1272     @Benchmark
1273     public void SUADD_ASSOC(Blackhole bh) {
1274         byte[] a = fa.apply(SPECIES.length());
1275         byte[] b = fb.apply(SPECIES.length());
1276         byte[] c = fc.apply(SPECIES.length());
1277         byte[] rl = fr.apply(SPECIES.length());
1278         byte[] rr = fr.apply(SPECIES.length());
1279 
1280         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1281             for (int i = 0; i < a.length; i += SPECIES.length()) {
1282                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1283                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1284                 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
1285                 av.lanewise(VectorOperators.SUADD, bv).lanewise(VectorOperators.SUADD, cv).intoArray(rl, i);
1286                 av.lanewise(VectorOperators.SUADD, bv.lanewise(VectorOperators.SUADD, cv)).intoArray(rr, i);
1287             }
1288         }
1289 
1290         bh.consume(r);
1291     }
1292 
1293     @Benchmark
1294     public void SUADDMasked_ASSOC(Blackhole bh) {
1295         byte[] a = fa.apply(SPECIES.length());
1296         byte[] b = fb.apply(SPECIES.length());
1297         byte[] c = fc.apply(SPECIES.length());
1298         boolean[] mask = fm.apply(SPECIES.length());
1299         byte[] rl = fr.apply(SPECIES.length());
1300         byte[] rr = fr.apply(SPECIES.length());
1301 
1302         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1303 
1304         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1305             for (int i = 0; i < a.length; i += SPECIES.length()) {
1306                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1307                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1308                 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
1309                 av.lanewise(VectorOperators.SUADD, bv, vmask).lanewise(VectorOperators.SUADD, cv, vmask).intoArray(rl, i);
1310                 av.lanewise(VectorOperators.SUADD, bv.lanewise(VectorOperators.SUADD, cv, vmask), vmask).intoArray(rr, i);
1311             }
1312         }
1313 
1314         bh.consume(r);
1315     }
1316 
1317     @Benchmark
1318     public void ANDLanes(Blackhole bh) {
1319         byte[] a = fa.apply(SPECIES.length());
1320         byte ra = -1;
1321 
1322         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1323             ra = -1;
1324             for (int i = 0; i < a.length; i += SPECIES.length()) {
1325                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1326                 ra &= av.reduceLanes(VectorOperators.AND);
1327             }
1328         }
1329         bh.consume(ra);
1330     }
1331 
1332     @Benchmark
1333     public void ANDMaskedLanes(Blackhole bh) {
1334         byte[] a = fa.apply(SPECIES.length());
1335         boolean[] mask = fm.apply(SPECIES.length());
1336         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1337         byte ra = -1;
1338 
1339         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1340             ra = -1;
1341             for (int i = 0; i < a.length; i += SPECIES.length()) {
1342                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1343                 ra &= av.reduceLanes(VectorOperators.AND, vmask);
1344             }
1345         }
1346         bh.consume(ra);
1347     }
1348 
1349     @Benchmark
1350     public void ORLanes(Blackhole bh) {
1351         byte[] a = fa.apply(SPECIES.length());
1352         byte ra = 0;
1353 
1354         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1355             ra = 0;
1356             for (int i = 0; i < a.length; i += SPECIES.length()) {
1357                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1358                 ra |= av.reduceLanes(VectorOperators.OR);
1359             }
1360         }
1361         bh.consume(ra);
1362     }
1363 
1364     @Benchmark
1365     public void ORMaskedLanes(Blackhole bh) {
1366         byte[] a = fa.apply(SPECIES.length());
1367         boolean[] mask = fm.apply(SPECIES.length());
1368         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1369         byte ra = 0;
1370 
1371         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1372             ra = 0;
1373             for (int i = 0; i < a.length; i += SPECIES.length()) {
1374                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1375                 ra |= av.reduceLanes(VectorOperators.OR, vmask);
1376             }
1377         }
1378         bh.consume(ra);
1379     }
1380 
1381     @Benchmark
1382     public void XORLanes(Blackhole bh) {
1383         byte[] a = fa.apply(SPECIES.length());
1384         byte ra = 0;
1385 
1386         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1387             ra = 0;
1388             for (int i = 0; i < a.length; i += SPECIES.length()) {
1389                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1390                 ra ^= av.reduceLanes(VectorOperators.XOR);
1391             }
1392         }
1393         bh.consume(ra);
1394     }
1395 
1396     @Benchmark
1397     public void XORMaskedLanes(Blackhole bh) {
1398         byte[] a = fa.apply(SPECIES.length());
1399         boolean[] mask = fm.apply(SPECIES.length());
1400         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1401         byte ra = 0;
1402 
1403         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1404             ra = 0;
1405             for (int i = 0; i < a.length; i += SPECIES.length()) {
1406                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1407                 ra ^= av.reduceLanes(VectorOperators.XOR, vmask);
1408             }
1409         }
1410         bh.consume(ra);
1411     }
1412 
1413     @Benchmark
1414     public void ADDLanes(Blackhole bh) {
1415         byte[] a = fa.apply(SPECIES.length());
1416         byte ra = 0;
1417 
1418         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1419             ra = 0;
1420             for (int i = 0; i < a.length; i += SPECIES.length()) {
1421                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1422                 ra += av.reduceLanes(VectorOperators.ADD);
1423             }
1424         }
1425         bh.consume(ra);
1426     }
1427 
1428     @Benchmark
1429     public void ADDMaskedLanes(Blackhole bh) {
1430         byte[] a = fa.apply(SPECIES.length());
1431         boolean[] mask = fm.apply(SPECIES.length());
1432         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1433         byte ra = 0;
1434 
1435         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1436             ra = 0;
1437             for (int i = 0; i < a.length; i += SPECIES.length()) {
1438                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1439                 ra += av.reduceLanes(VectorOperators.ADD, vmask);
1440             }
1441         }
1442         bh.consume(ra);
1443     }
1444 
1445     @Benchmark
1446     public void MULLanes(Blackhole bh) {
1447         byte[] a = fa.apply(SPECIES.length());
1448         byte ra = 1;
1449 
1450         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1451             ra = 1;
1452             for (int i = 0; i < a.length; i += SPECIES.length()) {
1453                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1454                 ra *= av.reduceLanes(VectorOperators.MUL);
1455             }
1456         }
1457         bh.consume(ra);
1458     }
1459 
1460     @Benchmark
1461     public void MULMaskedLanes(Blackhole bh) {
1462         byte[] a = fa.apply(SPECIES.length());
1463         boolean[] mask = fm.apply(SPECIES.length());
1464         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1465         byte ra = 1;
1466 
1467         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1468             ra = 1;
1469             for (int i = 0; i < a.length; i += SPECIES.length()) {
1470                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1471                 ra *= av.reduceLanes(VectorOperators.MUL, vmask);
1472             }
1473         }
1474         bh.consume(ra);
1475     }
1476 
1477     @Benchmark
1478     public void MINLanes(Blackhole bh) {
1479         byte[] a = fa.apply(SPECIES.length());
1480         byte ra = Byte.MAX_VALUE;
1481 
1482         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1483             ra = Byte.MAX_VALUE;
1484             for (int i = 0; i < a.length; i += SPECIES.length()) {
1485                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1486                 ra = (byte) Math.min(ra, av.reduceLanes(VectorOperators.MIN));
1487             }
1488         }
1489         bh.consume(ra);
1490     }
1491 
1492     @Benchmark
1493     public void MINMaskedLanes(Blackhole bh) {
1494         byte[] a = fa.apply(SPECIES.length());
1495         boolean[] mask = fm.apply(SPECIES.length());
1496         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1497         byte ra = Byte.MAX_VALUE;
1498 
1499         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1500             ra = Byte.MAX_VALUE;
1501             for (int i = 0; i < a.length; i += SPECIES.length()) {
1502                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1503                 ra = (byte) Math.min(ra, av.reduceLanes(VectorOperators.MIN, vmask));
1504             }
1505         }
1506         bh.consume(ra);
1507     }
1508 
1509     @Benchmark
1510     public void MAXLanes(Blackhole bh) {
1511         byte[] a = fa.apply(SPECIES.length());
1512         byte ra = Byte.MIN_VALUE;
1513 
1514         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1515             ra = Byte.MIN_VALUE;
1516             for (int i = 0; i < a.length; i += SPECIES.length()) {
1517                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1518                 ra = (byte) Math.max(ra, av.reduceLanes(VectorOperators.MAX));
1519             }
1520         }
1521         bh.consume(ra);
1522     }
1523 
1524     @Benchmark
1525     public void MAXMaskedLanes(Blackhole bh) {
1526         byte[] a = fa.apply(SPECIES.length());
1527         boolean[] mask = fm.apply(SPECIES.length());
1528         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1529         byte ra = Byte.MIN_VALUE;
1530 
1531         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1532             ra = Byte.MIN_VALUE;
1533             for (int i = 0; i < a.length; i += SPECIES.length()) {
1534                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1535                 ra = (byte) Math.max(ra, av.reduceLanes(VectorOperators.MAX, vmask));
1536             }
1537         }
1538         bh.consume(ra);
1539     }
1540 
1541     @Benchmark
1542     public void UMINLanes(Blackhole bh) {
1543         byte[] a = fa.apply(SPECIES.length());
1544         byte ra = Byte.MAX_VALUE;
1545 
1546         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1547             ra = Byte.MAX_VALUE;
1548             for (int i = 0; i < a.length; i += SPECIES.length()) {
1549                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1550                 ra = (byte) VectorMath.minUnsigned(ra, av.reduceLanes(VectorOperators.UMIN));
1551             }
1552         }
1553         bh.consume(ra);
1554     }
1555 
1556     @Benchmark
1557     public void UMINMaskedLanes(Blackhole bh) {
1558         byte[] a = fa.apply(SPECIES.length());
1559         boolean[] mask = fm.apply(SPECIES.length());
1560         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1561         byte ra = Byte.MAX_VALUE;
1562 
1563         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1564             ra = Byte.MAX_VALUE;
1565             for (int i = 0; i < a.length; i += SPECIES.length()) {
1566                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1567                 ra = (byte) VectorMath.minUnsigned(ra, av.reduceLanes(VectorOperators.UMIN, vmask));
1568             }
1569         }
1570         bh.consume(ra);
1571     }
1572 
1573     @Benchmark
1574     public void UMAXLanes(Blackhole bh) {
1575         byte[] a = fa.apply(SPECIES.length());
1576         byte ra = Byte.MIN_VALUE;
1577 
1578         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1579             ra = Byte.MIN_VALUE;
1580             for (int i = 0; i < a.length; i += SPECIES.length()) {
1581                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1582                 ra = (byte) VectorMath.maxUnsigned(ra, av.reduceLanes(VectorOperators.UMAX));
1583             }
1584         }
1585         bh.consume(ra);
1586     }
1587 
1588     @Benchmark
1589     public void UMAXMaskedLanes(Blackhole bh) {
1590         byte[] a = fa.apply(SPECIES.length());
1591         boolean[] mask = fm.apply(SPECIES.length());
1592         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1593         byte ra = Byte.MIN_VALUE;
1594 
1595         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1596             ra = Byte.MIN_VALUE;
1597             for (int i = 0; i < a.length; i += SPECIES.length()) {
1598                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1599                 ra = (byte) VectorMath.maxUnsigned(ra, av.reduceLanes(VectorOperators.UMAX, vmask));
1600             }
1601         }
1602         bh.consume(ra);
1603     }
1604 
1605     @Benchmark
1606     public void FIRST_NONZEROLanes(Blackhole bh) {
1607         byte[] a = fa.apply(SPECIES.length());
1608         byte ra = (byte) 0;
1609 
1610         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1611             ra = (byte) 0;
1612             for (int i = 0; i < a.length; i += SPECIES.length()) {
1613                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1614                 ra = firstNonZero(ra, av.reduceLanes(VectorOperators.FIRST_NONZERO));
1615             }
1616         }
1617         bh.consume(ra);
1618     }
1619 
1620     @Benchmark
1621     public void FIRST_NONZEROMaskedLanes(Blackhole bh) {
1622         byte[] a = fa.apply(SPECIES.length());
1623         boolean[] mask = fm.apply(SPECIES.length());
1624         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1625         byte ra = (byte) 0;
1626 
1627         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1628             ra = (byte) 0;
1629             for (int i = 0; i < a.length; i += SPECIES.length()) {
1630                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1631                 ra = firstNonZero(ra, av.reduceLanes(VectorOperators.FIRST_NONZERO, vmask));
1632             }
1633         }
1634         bh.consume(ra);
1635     }
1636 
1637     @Benchmark
1638     public void anyTrue(Blackhole bh) {
1639         boolean[] mask = fm.apply(SPECIES.length());
1640         boolean[] r = fmr.apply(SPECIES.length());
1641 
1642         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1643             for (int i = 0; i < mask.length; i += SPECIES.length()) {
1644                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
1645                 r[i] = vmask.anyTrue();
1646             }
1647         }
1648 
1649         bh.consume(r);
1650     }
1651 
1652     @Benchmark
1653     public void allTrue(Blackhole bh) {
1654         boolean[] mask = fm.apply(SPECIES.length());
1655         boolean[] r = fmr.apply(SPECIES.length());
1656 
1657         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1658             for (int i = 0; i < mask.length; i += SPECIES.length()) {
1659                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
1660                 r[i] = vmask.allTrue();
1661             }
1662         }
1663 
1664         bh.consume(r);
1665     }
1666 
1667     @Benchmark
1668     public void SUADD_REDUCTION(Blackhole bh) {
1669         byte[] a = fa.apply(SPECIES.length());
1670         byte[] r = fr.apply(SPECIES.length());
1671         byte ra = 0;
1672 
1673         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1674             for (int i = 0; i < a.length; i += SPECIES.length()) {
1675                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1676                 r[i] = av.reduceLanes(VectorOperators.SUADD);
1677             }
1678         }
1679 
1680         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1681             ra = 0;
1682             for (int i = 0; i < a.length; i += SPECIES.length()) {
1683                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1684                 ra = (byte) VectorMath.addSaturatingUnsigned(ra, av.reduceLanes(VectorOperators.SUADD));
1685             }
1686         }
1687 
1688         bh.consume(r);
1689     }
1690 
1691     @Benchmark
1692     public void SUADDMasked_REDUCTION(Blackhole bh) {
1693         byte[] a = fa.apply(SPECIES.length());
1694         byte[] r = fr.apply(SPECIES.length());
1695         boolean[] mask = fm.apply(SPECIES.length());
1696         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1697         byte ra = 0;
1698 
1699         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1700             for (int i = 0; i < a.length; i += SPECIES.length()) {
1701                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1702                 r[i] = av.reduceLanes(VectorOperators.SUADD, vmask);
1703             }
1704         }
1705 
1706         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1707             ra = 0;
1708             for (int i = 0; i < a.length; i += SPECIES.length()) {
1709                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1710                 ra = (byte) VectorMath.addSaturatingUnsigned(ra, av.reduceLanes(VectorOperators.SUADD, vmask));
1711             }
1712         }
1713 
1714         bh.consume(r);
1715     }
1716 
1717     @Benchmark
1718     public void withLane(Blackhole bh) {
1719         byte[] a = fa.apply(SPECIES.length());
1720         byte[] b = fb.apply(SPECIES.length());
1721         byte[] r = fr.apply(SPECIES.length());
1722 
1723         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1724             for (int i = 0, j = 0; i < a.length; i += SPECIES.length()) {
1725                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1726                 av.withLane(j, b[i + j]).intoArray(r, i);
1727                 a[i + j] = b[i + j];
1728                 j = (j + 1) & (SPECIES.length() - 1);
1729             }
1730         }
1731 
1732         bh.consume(r);
1733     }
1734 
1735     @Benchmark
1736     public Object IS_DEFAULT() {
1737         byte[] a = fa.apply(size);
1738         boolean[] ms = fmt.apply(size);
1739         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1740 
1741         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1742             for (int i = 0; i < a.length; i += SPECIES.length()) {
1743                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1744 
1745                 // accumulate results, so JIT can't eliminate relevant computations
1746                 m = m.and(av.test(VectorOperators.IS_DEFAULT));
1747             }
1748         }
1749 
1750         return m;
1751     }
1752 
1753     @Benchmark
1754     public Object IS_NEGATIVE() {
1755         byte[] a = fa.apply(size);
1756         boolean[] ms = fmt.apply(size);
1757         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1758 
1759         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1760             for (int i = 0; i < a.length; i += SPECIES.length()) {
1761                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1762 
1763                 // accumulate results, so JIT can't eliminate relevant computations
1764                 m = m.and(av.test(VectorOperators.IS_NEGATIVE));
1765             }
1766         }
1767 
1768         return m;
1769     }
1770     @Benchmark
1771     public Object LT() {
1772         byte[] a = fa.apply(size);
1773         byte[] b = fb.apply(size);
1774         boolean[] ms = fmt.apply(size);
1775         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1776 
1777         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1778             for (int i = 0; i < a.length; i += SPECIES.length()) {
1779                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1780                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1781 
1782                 // accumulate results, so JIT can't eliminate relevant computations
1783                 m = m.and(av.compare(VectorOperators.LT, bv));
1784             }
1785         }
1786 
1787         return m;
1788     }
1789     @Benchmark
1790     public Object GT() {
1791         byte[] a = fa.apply(size);
1792         byte[] b = fb.apply(size);
1793         boolean[] ms = fmt.apply(size);
1794         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1795 
1796         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1797             for (int i = 0; i < a.length; i += SPECIES.length()) {
1798                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1799                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1800 
1801                 // accumulate results, so JIT can't eliminate relevant computations
1802                 m = m.and(av.compare(VectorOperators.GT, bv));
1803             }
1804         }
1805 
1806         return m;
1807     }
1808     @Benchmark
1809     public Object EQ() {
1810         byte[] a = fa.apply(size);
1811         byte[] b = fb.apply(size);
1812         boolean[] ms = fmt.apply(size);
1813         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1814 
1815         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1816             for (int i = 0; i < a.length; i += SPECIES.length()) {
1817                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1818                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1819 
1820                 // accumulate results, so JIT can't eliminate relevant computations
1821                 m = m.and(av.compare(VectorOperators.EQ, bv));
1822             }
1823         }
1824 
1825         return m;
1826     }
1827     @Benchmark
1828     public Object NE() {
1829         byte[] a = fa.apply(size);
1830         byte[] b = fb.apply(size);
1831         boolean[] ms = fmt.apply(size);
1832         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1833 
1834         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1835             for (int i = 0; i < a.length; i += SPECIES.length()) {
1836                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1837                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1838 
1839                 // accumulate results, so JIT can't eliminate relevant computations
1840                 m = m.and(av.compare(VectorOperators.NE, bv));
1841             }
1842         }
1843 
1844         return m;
1845     }
1846     @Benchmark
1847     public Object LE() {
1848         byte[] a = fa.apply(size);
1849         byte[] b = fb.apply(size);
1850         boolean[] ms = fmt.apply(size);
1851         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1852 
1853         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1854             for (int i = 0; i < a.length; i += SPECIES.length()) {
1855                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1856                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1857 
1858                 // accumulate results, so JIT can't eliminate relevant computations
1859                 m = m.and(av.compare(VectorOperators.LE, bv));
1860             }
1861         }
1862 
1863         return m;
1864     }
1865     @Benchmark
1866     public Object GE() {
1867         byte[] a = fa.apply(size);
1868         byte[] b = fb.apply(size);
1869         boolean[] ms = fmt.apply(size);
1870         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1871 
1872         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1873             for (int i = 0; i < a.length; i += SPECIES.length()) {
1874                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1875                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1876 
1877                 // accumulate results, so JIT can't eliminate relevant computations
1878                 m = m.and(av.compare(VectorOperators.GE, bv));
1879             }
1880         }
1881 
1882         return m;
1883     }
1884     @Benchmark
1885     public Object ULT() {
1886         byte[] a = fa.apply(size);
1887         byte[] b = fb.apply(size);
1888         boolean[] ms = fmt.apply(size);
1889         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1890 
1891         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1892             for (int i = 0; i < a.length; i += SPECIES.length()) {
1893                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1894                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1895 
1896                 // accumulate results, so JIT can't eliminate relevant computations
1897                 m = m.and(av.compare(VectorOperators.ULT, bv));
1898             }
1899         }
1900 
1901         return m;
1902     }
1903     @Benchmark
1904     public Object UGT() {
1905         byte[] a = fa.apply(size);
1906         byte[] b = fb.apply(size);
1907         boolean[] ms = fmt.apply(size);
1908         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1909 
1910         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1911             for (int i = 0; i < a.length; i += SPECIES.length()) {
1912                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1913                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1914 
1915                 // accumulate results, so JIT can't eliminate relevant computations
1916                 m = m.and(av.compare(VectorOperators.UGT, bv));
1917             }
1918         }
1919 
1920         return m;
1921     }
1922     @Benchmark
1923     public Object ULE() {
1924         byte[] a = fa.apply(size);
1925         byte[] b = fb.apply(size);
1926         boolean[] ms = fmt.apply(size);
1927         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1928 
1929         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1930             for (int i = 0; i < a.length; i += SPECIES.length()) {
1931                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1932                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1933 
1934                 // accumulate results, so JIT can't eliminate relevant computations
1935                 m = m.and(av.compare(VectorOperators.ULE, bv));
1936             }
1937         }
1938 
1939         return m;
1940     }
1941     @Benchmark
1942     public Object UGE() {
1943         byte[] a = fa.apply(size);
1944         byte[] b = fb.apply(size);
1945         boolean[] ms = fmt.apply(size);
1946         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1947 
1948         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1949             for (int i = 0; i < a.length; i += SPECIES.length()) {
1950                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1951                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1952 
1953                 // accumulate results, so JIT can't eliminate relevant computations
1954                 m = m.and(av.compare(VectorOperators.UGE, bv));
1955             }
1956         }
1957 
1958         return m;
1959     }
1960 
1961     @Benchmark
1962     public void blend(Blackhole bh) {
1963         byte[] a = fa.apply(SPECIES.length());
1964         byte[] b = fb.apply(SPECIES.length());
1965         byte[] r = fr.apply(SPECIES.length());
1966         boolean[] mask = fm.apply(SPECIES.length());
1967         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1968 
1969         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1970             for (int i = 0; i < a.length; i += SPECIES.length()) {
1971                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1972                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1973                 av.blend(bv, vmask).intoArray(r, i);
1974             }
1975         }
1976 
1977         bh.consume(r);
1978     }
1979 
1980     @Benchmark
1981     public void rearrange(Blackhole bh) {
1982         byte[] a = fa.apply(SPECIES.length());
1983         int[] order = fs.apply(a.length, SPECIES.length());
1984         byte[] r = fr.apply(SPECIES.length());
1985 
1986         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1987             for (int i = 0; i < a.length; i += SPECIES.length()) {
1988                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1989                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
1990             }
1991         }
1992 
1993         bh.consume(r);
1994     }
1995     @Benchmark
1996     public Object compress() {
1997         byte[] a = fa.apply(size);
1998         byte[] r = fb.apply(size);
1999         boolean[] ms = fmt.apply(size);
2000         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
2001 
2002         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2003             for (int i = 0; i < a.length; i += SPECIES.length()) {
2004                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2005                 av.compress(m).intoArray(r, i);
2006             }
2007         }
2008 
2009         return r;
2010     }
2011 
2012     @Benchmark
2013     public Object expand() {
2014         byte[] a = fa.apply(size);
2015         byte[] r = fb.apply(size);
2016         boolean[] ms = fmt.apply(size);
2017         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
2018 
2019         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2020             for (int i = 0; i < a.length; i += SPECIES.length()) {
2021                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2022                 av.expand(m).intoArray(r, i);
2023             }
2024         }
2025 
2026         return r;
2027     }
2028 
2029     @Benchmark
2030     public Object maskCompress() {
2031         boolean[] ms = fmt.apply(size);
2032         boolean[] rs = fmt.apply(size);
2033 
2034         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2035             for (int i = 0, j = 0; i < ms.length; i += SPECIES.length()) {
2036                 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, i);
2037                 m.compress().intoArray(rs, j);
2038                 j += m.trueCount();
2039             }
2040         }
2041 
2042         return rs;
2043     }
2044 
2045     @Benchmark
2046     public void laneextract(Blackhole bh) {
2047         byte[] a = fa.apply(SPECIES.length());
2048         byte[] r = fr.apply(SPECIES.length());
2049 
2050         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2051             for (int i = 0; i < a.length; i += SPECIES.length()) {
2052                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2053                 int num_lanes = SPECIES.length();
2054                 // Manually unroll because full unroll happens after intrinsification.
2055                 // Unroll is needed because get intrinsic requires for index to be a known constant.
2056                 if (num_lanes == 1) {
2057                     r[i]=av.lane(0);
2058                 } else if (num_lanes == 2) {
2059                     r[i]=av.lane(0);
2060                     r[i+1]=av.lane(1);
2061                 } else if (num_lanes == 4) {
2062                     r[i]=av.lane(0);
2063                     r[i+1]=av.lane(1);
2064                     r[i+2]=av.lane(2);
2065                     r[i+3]=av.lane(3);
2066                 } else if (num_lanes == 8) {
2067                     r[i]=av.lane(0);
2068                     r[i+1]=av.lane(1);
2069                     r[i+2]=av.lane(2);
2070                     r[i+3]=av.lane(3);
2071                     r[i+4]=av.lane(4);
2072                     r[i+5]=av.lane(5);
2073                     r[i+6]=av.lane(6);
2074                     r[i+7]=av.lane(7);
2075                 } else if (num_lanes == 16) {
2076                     r[i]=av.lane(0);
2077                     r[i+1]=av.lane(1);
2078                     r[i+2]=av.lane(2);
2079                     r[i+3]=av.lane(3);
2080                     r[i+4]=av.lane(4);
2081                     r[i+5]=av.lane(5);
2082                     r[i+6]=av.lane(6);
2083                     r[i+7]=av.lane(7);
2084                     r[i+8]=av.lane(8);
2085                     r[i+9]=av.lane(9);
2086                     r[i+10]=av.lane(10);
2087                     r[i+11]=av.lane(11);
2088                     r[i+12]=av.lane(12);
2089                     r[i+13]=av.lane(13);
2090                     r[i+14]=av.lane(14);
2091                     r[i+15]=av.lane(15);
2092                 } else if (num_lanes == 32) {
2093                     r[i]=av.lane(0);
2094                     r[i+1]=av.lane(1);
2095                     r[i+2]=av.lane(2);
2096                     r[i+3]=av.lane(3);
2097                     r[i+4]=av.lane(4);
2098                     r[i+5]=av.lane(5);
2099                     r[i+6]=av.lane(6);
2100                     r[i+7]=av.lane(7);
2101                     r[i+8]=av.lane(8);
2102                     r[i+9]=av.lane(9);
2103                     r[i+10]=av.lane(10);
2104                     r[i+11]=av.lane(11);
2105                     r[i+12]=av.lane(12);
2106                     r[i+13]=av.lane(13);
2107                     r[i+14]=av.lane(14);
2108                     r[i+15]=av.lane(15);
2109                     r[i+16]=av.lane(16);
2110                     r[i+17]=av.lane(17);
2111                     r[i+18]=av.lane(18);
2112                     r[i+19]=av.lane(19);
2113                     r[i+20]=av.lane(20);
2114                     r[i+21]=av.lane(21);
2115                     r[i+22]=av.lane(22);
2116                     r[i+23]=av.lane(23);
2117                     r[i+24]=av.lane(24);
2118                     r[i+25]=av.lane(25);
2119                     r[i+26]=av.lane(26);
2120                     r[i+27]=av.lane(27);
2121                     r[i+28]=av.lane(28);
2122                     r[i+29]=av.lane(29);
2123                     r[i+30]=av.lane(30);
2124                     r[i+31]=av.lane(31);
2125                 } else if (num_lanes == 64) {
2126                     r[i]=av.lane(0);
2127                     r[i+1]=av.lane(1);
2128                     r[i+2]=av.lane(2);
2129                     r[i+3]=av.lane(3);
2130                     r[i+4]=av.lane(4);
2131                     r[i+5]=av.lane(5);
2132                     r[i+6]=av.lane(6);
2133                     r[i+7]=av.lane(7);
2134                     r[i+8]=av.lane(8);
2135                     r[i+9]=av.lane(9);
2136                     r[i+10]=av.lane(10);
2137                     r[i+11]=av.lane(11);
2138                     r[i+12]=av.lane(12);
2139                     r[i+13]=av.lane(13);
2140                     r[i+14]=av.lane(14);
2141                     r[i+15]=av.lane(15);
2142                     r[i+16]=av.lane(16);
2143                     r[i+17]=av.lane(17);
2144                     r[i+18]=av.lane(18);
2145                     r[i+19]=av.lane(19);
2146                     r[i+20]=av.lane(20);
2147                     r[i+21]=av.lane(21);
2148                     r[i+22]=av.lane(22);
2149                     r[i+23]=av.lane(23);
2150                     r[i+24]=av.lane(24);
2151                     r[i+25]=av.lane(25);
2152                     r[i+26]=av.lane(26);
2153                     r[i+27]=av.lane(27);
2154                     r[i+28]=av.lane(28);
2155                     r[i+29]=av.lane(29);
2156                     r[i+30]=av.lane(30);
2157                     r[i+31]=av.lane(31);
2158                     r[i+32]=av.lane(32);
2159                     r[i+33]=av.lane(33);
2160                     r[i+34]=av.lane(34);
2161                     r[i+35]=av.lane(35);
2162                     r[i+36]=av.lane(36);
2163                     r[i+37]=av.lane(37);
2164                     r[i+38]=av.lane(38);
2165                     r[i+39]=av.lane(39);
2166                     r[i+40]=av.lane(40);
2167                     r[i+41]=av.lane(41);
2168                     r[i+42]=av.lane(42);
2169                     r[i+43]=av.lane(43);
2170                     r[i+44]=av.lane(44);
2171                     r[i+45]=av.lane(45);
2172                     r[i+46]=av.lane(46);
2173                     r[i+47]=av.lane(47);
2174                     r[i+48]=av.lane(48);
2175                     r[i+49]=av.lane(49);
2176                     r[i+50]=av.lane(50);
2177                     r[i+51]=av.lane(51);
2178                     r[i+52]=av.lane(52);
2179                     r[i+53]=av.lane(53);
2180                     r[i+54]=av.lane(54);
2181                     r[i+55]=av.lane(55);
2182                     r[i+56]=av.lane(56);
2183                     r[i+57]=av.lane(57);
2184                     r[i+58]=av.lane(58);
2185                     r[i+59]=av.lane(59);
2186                     r[i+60]=av.lane(60);
2187                     r[i+61]=av.lane(61);
2188                     r[i+62]=av.lane(62);
2189                     r[i+63]=av.lane(63);
2190                 } else {
2191                     for (int j = 0; j < SPECIES.length(); j++) {
2192                         r[i+j]=av.lane(j);
2193                     }
2194                 }
2195             }
2196         }
2197 
2198         bh.consume(r);
2199     }
2200 
2201     @Benchmark
2202     public void broadcast(Blackhole bh) {
2203         byte[] a = fa.apply(SPECIES.length());
2204         byte[] r = new byte[a.length];
2205 
2206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2207             for (int i = 0; i < a.length; i += SPECIES.length()) {
2208                 ByteVector.broadcast(SPECIES, a[i]).intoArray(r, i);
2209             }
2210         }
2211 
2212         bh.consume(r);
2213     }
2214 
2215     @Benchmark
2216     public void zero(Blackhole bh) {
2217         byte[] a = fa.apply(SPECIES.length());
2218         byte[] r = new byte[a.length];
2219 
2220         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2221             for (int i = 0; i < a.length; i += SPECIES.length()) {
2222                 ByteVector.zero(SPECIES).intoArray(a, i);
2223             }
2224         }
2225 
2226         bh.consume(r);
2227     }
2228 
2229     @Benchmark
2230     public void sliceUnary(Blackhole bh) {
2231         byte[] a = fa.apply(SPECIES.length());
2232         byte[] r = new byte[a.length];
2233         int origin = RAND.nextInt(SPECIES.length());
2234         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2235             for (int i = 0; i < a.length; i += SPECIES.length()) {
2236                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2237                 av.slice(origin).intoArray(r, i);
2238             }
2239         }
2240 
2241         bh.consume(r);
2242     }
2243 
2244     @Benchmark
2245     public void sliceBinary(Blackhole bh) {
2246         byte[] a = fa.apply(SPECIES.length());
2247         byte[] b = fb.apply(SPECIES.length());
2248         byte[] r = new byte[a.length];
2249         int origin = RAND.nextInt(SPECIES.length());
2250         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2251             for (int i = 0; i < a.length; i += SPECIES.length()) {
2252                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2253                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2254                 av.slice(origin, bv).intoArray(r, i);
2255             }
2256         }
2257 
2258         bh.consume(r);
2259     }
2260 
2261     @Benchmark
2262     public void sliceMasked(Blackhole bh) {
2263         byte[] a = fa.apply(SPECIES.length());
2264         byte[] b = fb.apply(SPECIES.length());
2265         boolean[] mask = fm.apply(SPECIES.length());
2266         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2267 
2268         byte[] r = new byte[a.length];
2269         int origin = RAND.nextInt(SPECIES.length());
2270         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2271             for (int i = 0; i < a.length; i += SPECIES.length()) {
2272                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2273                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2274                 av.slice(origin, bv, vmask).intoArray(r, i);
2275             }
2276         }
2277 
2278         bh.consume(r);
2279     }
2280 
2281     @Benchmark
2282     public void unsliceUnary(Blackhole bh) {
2283         byte[] a = fa.apply(SPECIES.length());
2284         byte[] r = new byte[a.length];
2285         int origin = RAND.nextInt(SPECIES.length());
2286         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2287             for (int i = 0; i < a.length; i += SPECIES.length()) {
2288                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2289                 av.unslice(origin).intoArray(r, i);
2290             }
2291         }
2292 
2293         bh.consume(r);
2294     }
2295 
2296     @Benchmark
2297     public void unsliceBinary(Blackhole bh) {
2298         byte[] a = fa.apply(SPECIES.length());
2299         byte[] b = fb.apply(SPECIES.length());
2300         byte[] r = new byte[a.length];
2301         int origin = RAND.nextInt(SPECIES.length());
2302         int part = RAND.nextInt(2);
2303         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2304             for (int i = 0; i < a.length; i += SPECIES.length()) {
2305                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2306                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2307                 av.unslice(origin, bv, part).intoArray(r, i);
2308             }
2309         }
2310 
2311         bh.consume(r);
2312     }
2313 
2314     @Benchmark
2315     public void unsliceMasked(Blackhole bh) {
2316         byte[] a = fa.apply(SPECIES.length());
2317         byte[] b = fb.apply(SPECIES.length());
2318         boolean[] mask = fm.apply(SPECIES.length());
2319         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2320         byte[] r = new byte[a.length];
2321         int origin = RAND.nextInt(SPECIES.length());
2322         int part = RAND.nextInt(2);
2323         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2324             for (int i = 0; i < a.length; i += SPECIES.length()) {
2325                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2326                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2327                 av.unslice(origin, bv, part, vmask).intoArray(r, i);
2328             }
2329         }
2330 
2331         bh.consume(r);
2332     }
2333 
2334     @Benchmark
2335     public void BITWISE_BLEND(Blackhole bh) {
2336         byte[] a = fa.apply(SPECIES.length());
2337         byte[] b = fb.apply(SPECIES.length());
2338         byte[] c = fc.apply(SPECIES.length());
2339         byte[] r = fr.apply(SPECIES.length());
2340 
2341         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2342             for (int i = 0; i < a.length; i += SPECIES.length()) {
2343                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2344                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2345                 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
2346                 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv).intoArray(r, i);
2347             }
2348         }
2349 
2350         bh.consume(r);
2351     }
2352 
2353     @Benchmark
2354     public void BITWISE_BLENDMasked(Blackhole bh) {
2355         byte[] a = fa.apply(SPECIES.length());
2356         byte[] b = fb.apply(SPECIES.length());
2357         byte[] c = fc.apply(SPECIES.length());
2358         byte[] r = fr.apply(SPECIES.length());
2359         boolean[] mask = fm.apply(SPECIES.length());
2360         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2361 
2362         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2363             for (int i = 0; i < a.length; i += SPECIES.length()) {
2364                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2365                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2366                 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
2367                 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv, vmask).intoArray(r, i);
2368             }
2369         }
2370 
2371         bh.consume(r);
2372     }
2373 
2374     @Benchmark
2375     public void NEG(Blackhole bh) {
2376         byte[] a = fa.apply(SPECIES.length());
2377         byte[] r = fr.apply(SPECIES.length());
2378 
2379         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2380             for (int i = 0; i < a.length; i += SPECIES.length()) {
2381                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2382                 av.lanewise(VectorOperators.NEG).intoArray(r, i);
2383             }
2384         }
2385 
2386         bh.consume(r);
2387     }
2388 
2389     @Benchmark
2390     public void NEGMasked(Blackhole bh) {
2391         byte[] a = fa.apply(SPECIES.length());
2392         byte[] r = fr.apply(SPECIES.length());
2393         boolean[] mask = fm.apply(SPECIES.length());
2394         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2395 
2396         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2397             for (int i = 0; i < a.length; i += SPECIES.length()) {
2398                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2399                 av.lanewise(VectorOperators.NEG, vmask).intoArray(r, i);
2400             }
2401         }
2402 
2403         bh.consume(r);
2404     }
2405 
2406     @Benchmark
2407     public void ABS(Blackhole bh) {
2408         byte[] a = fa.apply(SPECIES.length());
2409         byte[] r = fr.apply(SPECIES.length());
2410 
2411         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2412             for (int i = 0; i < a.length; i += SPECIES.length()) {
2413                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2414                 av.lanewise(VectorOperators.ABS).intoArray(r, i);
2415             }
2416         }
2417 
2418         bh.consume(r);
2419     }
2420 
2421     @Benchmark
2422     public void ABSMasked(Blackhole bh) {
2423         byte[] a = fa.apply(SPECIES.length());
2424         byte[] r = fr.apply(SPECIES.length());
2425         boolean[] mask = fm.apply(SPECIES.length());
2426         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2427 
2428         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2429             for (int i = 0; i < a.length; i += SPECIES.length()) {
2430                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2431                 av.lanewise(VectorOperators.ABS, vmask).intoArray(r, i);
2432             }
2433         }
2434 
2435         bh.consume(r);
2436     }
2437 
2438     @Benchmark
2439     public void NOT(Blackhole bh) {
2440         byte[] a = fa.apply(SPECIES.length());
2441         byte[] r = fr.apply(SPECIES.length());
2442 
2443         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2444             for (int i = 0; i < a.length; i += SPECIES.length()) {
2445                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2446                 av.lanewise(VectorOperators.NOT).intoArray(r, i);
2447             }
2448         }
2449 
2450         bh.consume(r);
2451     }
2452 
2453     @Benchmark
2454     public void NOTMasked(Blackhole bh) {
2455         byte[] a = fa.apply(SPECIES.length());
2456         byte[] r = fr.apply(SPECIES.length());
2457         boolean[] mask = fm.apply(SPECIES.length());
2458         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2459 
2460         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2461             for (int i = 0; i < a.length; i += SPECIES.length()) {
2462                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2463                 av.lanewise(VectorOperators.NOT, vmask).intoArray(r, i);
2464             }
2465         }
2466 
2467         bh.consume(r);
2468     }
2469 
2470     @Benchmark
2471     public void ZOMO(Blackhole bh) {
2472         byte[] a = fa.apply(SPECIES.length());
2473         byte[] r = fr.apply(SPECIES.length());
2474 
2475         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2476             for (int i = 0; i < a.length; i += SPECIES.length()) {
2477                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2478                 av.lanewise(VectorOperators.ZOMO).intoArray(r, i);
2479             }
2480         }
2481 
2482         bh.consume(r);
2483     }
2484 
2485     @Benchmark
2486     public void ZOMOMasked(Blackhole bh) {
2487         byte[] a = fa.apply(SPECIES.length());
2488         byte[] r = fr.apply(SPECIES.length());
2489         boolean[] mask = fm.apply(SPECIES.length());
2490         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2491 
2492         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2493             for (int i = 0; i < a.length; i += SPECIES.length()) {
2494                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2495                 av.lanewise(VectorOperators.ZOMO, vmask).intoArray(r, i);
2496             }
2497         }
2498 
2499         bh.consume(r);
2500     }
2501 
2502     @Benchmark
2503     public void BIT_COUNT(Blackhole bh) {
2504         byte[] a = fa.apply(SPECIES.length());
2505         byte[] r = fr.apply(SPECIES.length());
2506 
2507         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2508             for (int i = 0; i < a.length; i += SPECIES.length()) {
2509                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2510                 av.lanewise(VectorOperators.BIT_COUNT).intoArray(r, i);
2511             }
2512         }
2513 
2514         bh.consume(r);
2515     }
2516 
2517     @Benchmark
2518     public void BIT_COUNTMasked(Blackhole bh) {
2519         byte[] a = fa.apply(SPECIES.length());
2520         byte[] r = fr.apply(SPECIES.length());
2521         boolean[] mask = fm.apply(SPECIES.length());
2522         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2523 
2524         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2525             for (int i = 0; i < a.length; i += SPECIES.length()) {
2526                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2527                 av.lanewise(VectorOperators.BIT_COUNT, vmask).intoArray(r, i);
2528             }
2529         }
2530 
2531         bh.consume(r);
2532     }
2533 
2534     @Benchmark
2535     public void TRAILING_ZEROS_COUNT(Blackhole bh) {
2536         byte[] a = fa.apply(SPECIES.length());
2537         byte[] r = fr.apply(SPECIES.length());
2538 
2539         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2540             for (int i = 0; i < a.length; i += SPECIES.length()) {
2541                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2542                 av.lanewise(VectorOperators.TRAILING_ZEROS_COUNT).intoArray(r, i);
2543             }
2544         }
2545 
2546         bh.consume(r);
2547     }
2548 
2549     @Benchmark
2550     public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) {
2551         byte[] a = fa.apply(SPECIES.length());
2552         byte[] r = fr.apply(SPECIES.length());
2553         boolean[] mask = fm.apply(SPECIES.length());
2554         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2555 
2556         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2557             for (int i = 0; i < a.length; i += SPECIES.length()) {
2558                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2559                 av.lanewise(VectorOperators.TRAILING_ZEROS_COUNT, vmask).intoArray(r, i);
2560             }
2561         }
2562 
2563         bh.consume(r);
2564     }
2565 
2566     @Benchmark
2567     public void LEADING_ZEROS_COUNT(Blackhole bh) {
2568         byte[] a = fa.apply(SPECIES.length());
2569         byte[] r = fr.apply(SPECIES.length());
2570 
2571         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2572             for (int i = 0; i < a.length; i += SPECIES.length()) {
2573                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2574                 av.lanewise(VectorOperators.LEADING_ZEROS_COUNT).intoArray(r, i);
2575             }
2576         }
2577 
2578         bh.consume(r);
2579     }
2580 
2581     @Benchmark
2582     public void LEADING_ZEROS_COUNTMasked(Blackhole bh) {
2583         byte[] a = fa.apply(SPECIES.length());
2584         byte[] r = fr.apply(SPECIES.length());
2585         boolean[] mask = fm.apply(SPECIES.length());
2586         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2587 
2588         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2589             for (int i = 0; i < a.length; i += SPECIES.length()) {
2590                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2591                 av.lanewise(VectorOperators.LEADING_ZEROS_COUNT, vmask).intoArray(r, i);
2592             }
2593         }
2594 
2595         bh.consume(r);
2596     }
2597 
2598     @Benchmark
2599     public void REVERSE(Blackhole bh) {
2600         byte[] a = fa.apply(SPECIES.length());
2601         byte[] r = fr.apply(SPECIES.length());
2602 
2603         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2604             for (int i = 0; i < a.length; i += SPECIES.length()) {
2605                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2606                 av.lanewise(VectorOperators.REVERSE).intoArray(r, i);
2607             }
2608         }
2609 
2610         bh.consume(r);
2611     }
2612 
2613     @Benchmark
2614     public void REVERSEMasked(Blackhole bh) {
2615         byte[] a = fa.apply(SPECIES.length());
2616         byte[] r = fr.apply(SPECIES.length());
2617         boolean[] mask = fm.apply(SPECIES.length());
2618         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2619 
2620         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2621             for (int i = 0; i < a.length; i += SPECIES.length()) {
2622                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2623                 av.lanewise(VectorOperators.REVERSE, vmask).intoArray(r, i);
2624             }
2625         }
2626 
2627         bh.consume(r);
2628     }
2629 
2630     @Benchmark
2631     public void REVERSE_BYTES(Blackhole bh) {
2632         byte[] a = fa.apply(SPECIES.length());
2633         byte[] r = fr.apply(SPECIES.length());
2634 
2635         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2636             for (int i = 0; i < a.length; i += SPECIES.length()) {
2637                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2638                 av.lanewise(VectorOperators.REVERSE_BYTES).intoArray(r, i);
2639             }
2640         }
2641 
2642         bh.consume(r);
2643     }
2644 
2645     @Benchmark
2646     public void REVERSE_BYTESMasked(Blackhole bh) {
2647         byte[] a = fa.apply(SPECIES.length());
2648         byte[] r = fr.apply(SPECIES.length());
2649         boolean[] mask = fm.apply(SPECIES.length());
2650         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2651 
2652         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2653             for (int i = 0; i < a.length; i += SPECIES.length()) {
2654                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2655                 av.lanewise(VectorOperators.REVERSE_BYTES, vmask).intoArray(r, i);
2656             }
2657         }
2658 
2659         bh.consume(r);
2660     }
2661 }