1 /*
   2  * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import jdk.incubator.vector.Vector;
  29 import jdk.incubator.vector.VectorMask;
  30 import jdk.incubator.vector.VectorOperators;
  31 import jdk.incubator.vector.VectorShape;
  32 import jdk.incubator.vector.VectorSpecies;
  33 import jdk.incubator.vector.VectorShuffle;
  34 import jdk.incubator.vector.ByteVector;
  35 
  36 import java.util.concurrent.TimeUnit;
  37 import java.util.function.BiFunction;
  38 import java.util.function.IntFunction;
  39 
  40 import org.openjdk.jmh.annotations.*;
  41 import org.openjdk.jmh.infra.Blackhole;
  42 
  43 @BenchmarkMode(Mode.Throughput)
  44 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  45 @State(Scope.Benchmark)
  46 @Warmup(iterations = 3, time = 1)
  47 @Measurement(iterations = 5, time = 1)
  48 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  49 public class Byte128Vector extends AbstractVectorBenchmark {
  50     static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_128;
  51 
  52     static final int INVOC_COUNT = 1; // get rid of outer loop
  53 
  54     static void replaceZero(byte[] a, byte v) {
  55         for (int i = 0; i < a.length; i++) {
  56             if (a[i] == 0) {
  57                 a[i] = v;
  58             }
  59         }
  60     }
  61 
  62     static void replaceZero(byte[] a, boolean[] mask, byte v) {
  63         for (int i = 0; i < a.length; i++) {
  64             if (mask[i % mask.length] && a[i] == 0) {
  65                 a[i] = v;
  66             }
  67         }
  68     }
  69 
  70     @Param("1024")
  71     int size;
  72 
  73     byte[] fill(IntFunction<Byte> f) {
  74         byte[] array = new byte[size];
  75         for (int i = 0; i < array.length; i++) {
  76             array[i] = f.apply(i);
  77         }
  78         return array;
  79     }
  80 
  81     byte[] a, b, c, r;
  82     boolean[] m, mt, rm;
  83     int[] s;
  84 
  85     @Setup
  86     public void init() {
  87         size += size % SPECIES.length(); // FIXME: add post-loops
  88 
  89         a = fill(i -> (byte)(2*i));
  90         b = fill(i -> (byte)(i+1));
  91         c = fill(i -> (byte)(i+5));
  92         r = fill(i -> (byte)0);
  93 
  94         m = fillMask(size, i -> (i % 2) == 0);
  95         mt = fillMask(size, i -> true);
  96         rm = fillMask(size, i -> false);
  97 
  98         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  99     }
 100 
 101     final IntFunction<byte[]> fa = vl -> a;
 102     final IntFunction<byte[]> fb = vl -> b;
 103     final IntFunction<byte[]> fc = vl -> c;
 104     final IntFunction<byte[]> fr = vl -> r;
 105     final IntFunction<boolean[]> fm = vl -> m;
 106     final IntFunction<boolean[]> fmt = vl -> mt;
 107     final IntFunction<boolean[]> fmr = vl -> rm;
 108     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
 109 
 110 
 111     @Benchmark
 112     public void ADD(Blackhole bh) {
 113         byte[] a = fa.apply(SPECIES.length());
 114         byte[] b = fb.apply(SPECIES.length());
 115         byte[] r = fr.apply(SPECIES.length());
 116 
 117         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 118             for (int i = 0; i < a.length; i += SPECIES.length()) {
 119                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 120                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 121                 av.lanewise(VectorOperators.ADD, bv).intoArray(r, i);
 122             }
 123         }
 124 
 125         bh.consume(r);
 126     }
 127 
 128     @Benchmark
 129     public void ADDMasked(Blackhole bh) {
 130         byte[] a = fa.apply(SPECIES.length());
 131         byte[] b = fb.apply(SPECIES.length());
 132         byte[] r = fr.apply(SPECIES.length());
 133         boolean[] mask = fm.apply(SPECIES.length());
 134         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 135 
 136         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 137             for (int i = 0; i < a.length; i += SPECIES.length()) {
 138                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 139                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 140                 av.lanewise(VectorOperators.ADD, bv, vmask).intoArray(r, i);
 141             }
 142         }
 143 
 144         bh.consume(r);
 145     }
 146 
 147     @Benchmark
 148     public void SUB(Blackhole bh) {
 149         byte[] a = fa.apply(SPECIES.length());
 150         byte[] b = fb.apply(SPECIES.length());
 151         byte[] r = fr.apply(SPECIES.length());
 152 
 153         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 154             for (int i = 0; i < a.length; i += SPECIES.length()) {
 155                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 156                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 157                 av.lanewise(VectorOperators.SUB, bv).intoArray(r, i);
 158             }
 159         }
 160 
 161         bh.consume(r);
 162     }
 163 
 164     @Benchmark
 165     public void SUBMasked(Blackhole bh) {
 166         byte[] a = fa.apply(SPECIES.length());
 167         byte[] b = fb.apply(SPECIES.length());
 168         byte[] r = fr.apply(SPECIES.length());
 169         boolean[] mask = fm.apply(SPECIES.length());
 170         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 171 
 172         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 173             for (int i = 0; i < a.length; i += SPECIES.length()) {
 174                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 175                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 176                 av.lanewise(VectorOperators.SUB, bv, vmask).intoArray(r, i);
 177             }
 178         }
 179 
 180         bh.consume(r);
 181     }
 182 
 183     @Benchmark
 184     public void MUL(Blackhole bh) {
 185         byte[] a = fa.apply(SPECIES.length());
 186         byte[] b = fb.apply(SPECIES.length());
 187         byte[] r = fr.apply(SPECIES.length());
 188 
 189         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 190             for (int i = 0; i < a.length; i += SPECIES.length()) {
 191                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 192                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 193                 av.lanewise(VectorOperators.MUL, bv).intoArray(r, i);
 194             }
 195         }
 196 
 197         bh.consume(r);
 198     }
 199 
 200     @Benchmark
 201     public void MULMasked(Blackhole bh) {
 202         byte[] a = fa.apply(SPECIES.length());
 203         byte[] b = fb.apply(SPECIES.length());
 204         byte[] r = fr.apply(SPECIES.length());
 205         boolean[] mask = fm.apply(SPECIES.length());
 206         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 207 
 208         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 209             for (int i = 0; i < a.length; i += SPECIES.length()) {
 210                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 211                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 212                 av.lanewise(VectorOperators.MUL, bv, vmask).intoArray(r, i);
 213             }
 214         }
 215 
 216         bh.consume(r);
 217     }
 218 
 219 
 220 
 221 
 222     @Benchmark
 223     public void DIV(Blackhole bh) {
 224         byte[] a = fa.apply(SPECIES.length());
 225         byte[] b = fb.apply(SPECIES.length());
 226         byte[] r = fr.apply(SPECIES.length());
 227 
 228         replaceZero(b, (byte) 1);
 229 
 230         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 231             for (int i = 0; i < a.length; i += SPECIES.length()) {
 232                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 233                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 234                 av.lanewise(VectorOperators.DIV, bv).intoArray(r, i);
 235             }
 236         }
 237 
 238         bh.consume(r);
 239     }
 240 
 241 
 242 
 243     @Benchmark
 244     public void DIVMasked(Blackhole bh) {
 245         byte[] a = fa.apply(SPECIES.length());
 246         byte[] b = fb.apply(SPECIES.length());
 247         byte[] r = fr.apply(SPECIES.length());
 248         boolean[] mask = fm.apply(SPECIES.length());
 249         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 250 
 251         replaceZero(b, mask, (byte) 1);
 252 
 253         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 254             for (int i = 0; i < a.length; i += SPECIES.length()) {
 255                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 256                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 257                 av.lanewise(VectorOperators.DIV, bv, vmask).intoArray(r, i);
 258             }
 259         }
 260 
 261         bh.consume(r);
 262     }
 263 
 264 
 265     @Benchmark
 266     public void FIRST_NONZERO(Blackhole bh) {
 267         byte[] a = fa.apply(SPECIES.length());
 268         byte[] b = fb.apply(SPECIES.length());
 269         byte[] r = fr.apply(SPECIES.length());
 270 
 271         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 272             for (int i = 0; i < a.length; i += SPECIES.length()) {
 273                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 274                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 275                 av.lanewise(VectorOperators.FIRST_NONZERO, bv).intoArray(r, i);
 276             }
 277         }
 278 
 279         bh.consume(r);
 280     }
 281 
 282     @Benchmark
 283     public void FIRST_NONZEROMasked(Blackhole bh) {
 284         byte[] a = fa.apply(SPECIES.length());
 285         byte[] b = fb.apply(SPECIES.length());
 286         byte[] r = fr.apply(SPECIES.length());
 287         boolean[] mask = fm.apply(SPECIES.length());
 288         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 289 
 290         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 291             for (int i = 0; i < a.length; i += SPECIES.length()) {
 292                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 293                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 294                 av.lanewise(VectorOperators.FIRST_NONZERO, bv, vmask).intoArray(r, i);
 295             }
 296         }
 297 
 298         bh.consume(r);
 299     }
 300 
 301 
 302     @Benchmark
 303     public void AND(Blackhole bh) {
 304         byte[] a = fa.apply(SPECIES.length());
 305         byte[] b = fb.apply(SPECIES.length());
 306         byte[] r = fr.apply(SPECIES.length());
 307 
 308         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 309             for (int i = 0; i < a.length; i += SPECIES.length()) {
 310                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 311                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 312                 av.lanewise(VectorOperators.AND, bv).intoArray(r, i);
 313             }
 314         }
 315 
 316         bh.consume(r);
 317     }
 318 
 319 
 320 
 321     @Benchmark
 322     public void ANDMasked(Blackhole bh) {
 323         byte[] a = fa.apply(SPECIES.length());
 324         byte[] b = fb.apply(SPECIES.length());
 325         byte[] r = fr.apply(SPECIES.length());
 326         boolean[] mask = fm.apply(SPECIES.length());
 327         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 328 
 329         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 330             for (int i = 0; i < a.length; i += SPECIES.length()) {
 331                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 332                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 333                 av.lanewise(VectorOperators.AND, bv, vmask).intoArray(r, i);
 334             }
 335         }
 336 
 337         bh.consume(r);
 338     }
 339 
 340 
 341 
 342     @Benchmark
 343     public void AND_NOT(Blackhole bh) {
 344         byte[] a = fa.apply(SPECIES.length());
 345         byte[] b = fb.apply(SPECIES.length());
 346         byte[] r = fr.apply(SPECIES.length());
 347 
 348         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 349             for (int i = 0; i < a.length; i += SPECIES.length()) {
 350                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 351                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 352                 av.lanewise(VectorOperators.AND_NOT, bv).intoArray(r, i);
 353             }
 354         }
 355 
 356         bh.consume(r);
 357     }
 358 
 359 
 360 
 361     @Benchmark
 362     public void AND_NOTMasked(Blackhole bh) {
 363         byte[] a = fa.apply(SPECIES.length());
 364         byte[] b = fb.apply(SPECIES.length());
 365         byte[] r = fr.apply(SPECIES.length());
 366         boolean[] mask = fm.apply(SPECIES.length());
 367         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 368 
 369         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 370             for (int i = 0; i < a.length; i += SPECIES.length()) {
 371                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 372                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 373                 av.lanewise(VectorOperators.AND_NOT, bv, vmask).intoArray(r, i);
 374             }
 375         }
 376 
 377         bh.consume(r);
 378     }
 379 
 380 
 381 
 382     @Benchmark
 383     public void OR(Blackhole bh) {
 384         byte[] a = fa.apply(SPECIES.length());
 385         byte[] b = fb.apply(SPECIES.length());
 386         byte[] r = fr.apply(SPECIES.length());
 387 
 388         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 389             for (int i = 0; i < a.length; i += SPECIES.length()) {
 390                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 391                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 392                 av.lanewise(VectorOperators.OR, bv).intoArray(r, i);
 393             }
 394         }
 395 
 396         bh.consume(r);
 397     }
 398 
 399 
 400 
 401     @Benchmark
 402     public void ORMasked(Blackhole bh) {
 403         byte[] a = fa.apply(SPECIES.length());
 404         byte[] b = fb.apply(SPECIES.length());
 405         byte[] r = fr.apply(SPECIES.length());
 406         boolean[] mask = fm.apply(SPECIES.length());
 407         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 408 
 409         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 410             for (int i = 0; i < a.length; i += SPECIES.length()) {
 411                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 412                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 413                 av.lanewise(VectorOperators.OR, bv, vmask).intoArray(r, i);
 414             }
 415         }
 416 
 417         bh.consume(r);
 418     }
 419 
 420 
 421 
 422     @Benchmark
 423     public void XOR(Blackhole bh) {
 424         byte[] a = fa.apply(SPECIES.length());
 425         byte[] b = fb.apply(SPECIES.length());
 426         byte[] r = fr.apply(SPECIES.length());
 427 
 428         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 429             for (int i = 0; i < a.length; i += SPECIES.length()) {
 430                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 431                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 432                 av.lanewise(VectorOperators.XOR, bv).intoArray(r, i);
 433             }
 434         }
 435 
 436         bh.consume(r);
 437     }
 438 
 439 
 440 
 441     @Benchmark
 442     public void XORMasked(Blackhole bh) {
 443         byte[] a = fa.apply(SPECIES.length());
 444         byte[] b = fb.apply(SPECIES.length());
 445         byte[] r = fr.apply(SPECIES.length());
 446         boolean[] mask = fm.apply(SPECIES.length());
 447         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 448 
 449         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 450             for (int i = 0; i < a.length; i += SPECIES.length()) {
 451                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 452                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 453                 av.lanewise(VectorOperators.XOR, bv, vmask).intoArray(r, i);
 454             }
 455         }
 456 
 457         bh.consume(r);
 458     }
 459 
 460 
 461 
 462 
 463 
 464     @Benchmark
 465     public void LSHL(Blackhole bh) {
 466         byte[] a = fa.apply(SPECIES.length());
 467         byte[] b = fb.apply(SPECIES.length());
 468         byte[] r = fr.apply(SPECIES.length());
 469 
 470         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 471             for (int i = 0; i < a.length; i += SPECIES.length()) {
 472                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 473                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 474                 av.lanewise(VectorOperators.LSHL, bv).intoArray(r, i);
 475             }
 476         }
 477 
 478         bh.consume(r);
 479     }
 480 
 481 
 482 
 483     @Benchmark
 484     public void LSHLMasked(Blackhole bh) {
 485         byte[] a = fa.apply(SPECIES.length());
 486         byte[] b = fb.apply(SPECIES.length());
 487         byte[] r = fr.apply(SPECIES.length());
 488         boolean[] mask = fm.apply(SPECIES.length());
 489         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 490 
 491         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 492             for (int i = 0; i < a.length; i += SPECIES.length()) {
 493                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 494                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 495                 av.lanewise(VectorOperators.LSHL, bv, vmask).intoArray(r, i);
 496             }
 497         }
 498 
 499         bh.consume(r);
 500     }
 501 
 502 
 503 
 504 
 505 
 506 
 507 
 508     @Benchmark
 509     public void ASHR(Blackhole bh) {
 510         byte[] a = fa.apply(SPECIES.length());
 511         byte[] b = fb.apply(SPECIES.length());
 512         byte[] r = fr.apply(SPECIES.length());
 513 
 514         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 515             for (int i = 0; i < a.length; i += SPECIES.length()) {
 516                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 517                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 518                 av.lanewise(VectorOperators.ASHR, bv).intoArray(r, i);
 519             }
 520         }
 521 
 522         bh.consume(r);
 523     }
 524 
 525 
 526 
 527     @Benchmark
 528     public void ASHRMasked(Blackhole bh) {
 529         byte[] a = fa.apply(SPECIES.length());
 530         byte[] b = fb.apply(SPECIES.length());
 531         byte[] r = fr.apply(SPECIES.length());
 532         boolean[] mask = fm.apply(SPECIES.length());
 533         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 534 
 535         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 536             for (int i = 0; i < a.length; i += SPECIES.length()) {
 537                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 538                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 539                 av.lanewise(VectorOperators.ASHR, bv, vmask).intoArray(r, i);
 540             }
 541         }
 542 
 543         bh.consume(r);
 544     }
 545 
 546 
 547 
 548 
 549 
 550 
 551 
 552     @Benchmark
 553     public void LSHR(Blackhole bh) {
 554         byte[] a = fa.apply(SPECIES.length());
 555         byte[] b = fb.apply(SPECIES.length());
 556         byte[] r = fr.apply(SPECIES.length());
 557 
 558         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 559             for (int i = 0; i < a.length; i += SPECIES.length()) {
 560                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 561                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 562                 av.lanewise(VectorOperators.LSHR, bv).intoArray(r, i);
 563             }
 564         }
 565 
 566         bh.consume(r);
 567     }
 568 
 569 
 570 
 571     @Benchmark
 572     public void LSHRMasked(Blackhole bh) {
 573         byte[] a = fa.apply(SPECIES.length());
 574         byte[] b = fb.apply(SPECIES.length());
 575         byte[] r = fr.apply(SPECIES.length());
 576         boolean[] mask = fm.apply(SPECIES.length());
 577         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 578 
 579         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 580             for (int i = 0; i < a.length; i += SPECIES.length()) {
 581                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 582                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 583                 av.lanewise(VectorOperators.LSHR, bv, vmask).intoArray(r, i);
 584             }
 585         }
 586 
 587         bh.consume(r);
 588     }
 589 
 590 
 591 
 592 
 593 
 594 
 595 
 596     @Benchmark
 597     public void LSHLShift(Blackhole bh) {
 598         byte[] a = fa.apply(SPECIES.length());
 599         byte[] b = fb.apply(SPECIES.length());
 600         byte[] r = fr.apply(SPECIES.length());
 601 
 602         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 603             for (int i = 0; i < a.length; i += SPECIES.length()) {
 604                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 605                 av.lanewise(VectorOperators.LSHL, (int)b[i]).intoArray(r, i);
 606             }
 607         }
 608 
 609         bh.consume(r);
 610     }
 611 
 612 
 613 
 614     @Benchmark
 615     public void LSHLMaskedShift(Blackhole bh) {
 616         byte[] a = fa.apply(SPECIES.length());
 617         byte[] b = fb.apply(SPECIES.length());
 618         byte[] r = fr.apply(SPECIES.length());
 619         boolean[] mask = fm.apply(SPECIES.length());
 620         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 621 
 622         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 623             for (int i = 0; i < a.length; i += SPECIES.length()) {
 624                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 625                 av.lanewise(VectorOperators.LSHL, (int)b[i], vmask).intoArray(r, i);
 626             }
 627         }
 628 
 629         bh.consume(r);
 630     }
 631 
 632 
 633 
 634 
 635 
 636 
 637 
 638     @Benchmark
 639     public void LSHRShift(Blackhole bh) {
 640         byte[] a = fa.apply(SPECIES.length());
 641         byte[] b = fb.apply(SPECIES.length());
 642         byte[] r = fr.apply(SPECIES.length());
 643 
 644         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 645             for (int i = 0; i < a.length; i += SPECIES.length()) {
 646                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 647                 av.lanewise(VectorOperators.LSHR, (int)b[i]).intoArray(r, i);
 648             }
 649         }
 650 
 651         bh.consume(r);
 652     }
 653 
 654 
 655 
 656     @Benchmark
 657     public void LSHRMaskedShift(Blackhole bh) {
 658         byte[] a = fa.apply(SPECIES.length());
 659         byte[] b = fb.apply(SPECIES.length());
 660         byte[] r = fr.apply(SPECIES.length());
 661         boolean[] mask = fm.apply(SPECIES.length());
 662         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 663 
 664         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 665             for (int i = 0; i < a.length; i += SPECIES.length()) {
 666                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 667                 av.lanewise(VectorOperators.LSHR, (int)b[i], vmask).intoArray(r, i);
 668             }
 669         }
 670 
 671         bh.consume(r);
 672     }
 673 
 674 
 675 
 676 
 677 
 678 
 679 
 680     @Benchmark
 681     public void ASHRShift(Blackhole bh) {
 682         byte[] a = fa.apply(SPECIES.length());
 683         byte[] b = fb.apply(SPECIES.length());
 684         byte[] r = fr.apply(SPECIES.length());
 685 
 686         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 687             for (int i = 0; i < a.length; i += SPECIES.length()) {
 688                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 689                 av.lanewise(VectorOperators.ASHR, (int)b[i]).intoArray(r, i);
 690             }
 691         }
 692 
 693         bh.consume(r);
 694     }
 695 
 696 
 697 
 698     @Benchmark
 699     public void ASHRMaskedShift(Blackhole bh) {
 700         byte[] a = fa.apply(SPECIES.length());
 701         byte[] b = fb.apply(SPECIES.length());
 702         byte[] r = fr.apply(SPECIES.length());
 703         boolean[] mask = fm.apply(SPECIES.length());
 704         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 705 
 706         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 707             for (int i = 0; i < a.length; i += SPECIES.length()) {
 708                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 709                 av.lanewise(VectorOperators.ASHR, (int)b[i], vmask).intoArray(r, i);
 710             }
 711         }
 712 
 713         bh.consume(r);
 714     }
 715 
 716 
 717 
 718 
 719 
 720     @Benchmark
 721     public void ROR(Blackhole bh) {
 722         byte[] a = fa.apply(SPECIES.length());
 723         byte[] b = fb.apply(SPECIES.length());
 724         byte[] r = fr.apply(SPECIES.length());
 725 
 726         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 727             for (int i = 0; i < a.length; i += SPECIES.length()) {
 728                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 729                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 730                 av.lanewise(VectorOperators.ROR, bv).intoArray(r, i);
 731             }
 732         }
 733 
 734         bh.consume(r);
 735     }
 736 
 737 
 738 
 739     @Benchmark
 740     public void RORMasked(Blackhole bh) {
 741         byte[] a = fa.apply(SPECIES.length());
 742         byte[] b = fb.apply(SPECIES.length());
 743         byte[] r = fr.apply(SPECIES.length());
 744         boolean[] mask = fm.apply(SPECIES.length());
 745         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 746 
 747         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 748             for (int i = 0; i < a.length; i += SPECIES.length()) {
 749                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 750                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 751                 av.lanewise(VectorOperators.ROR, bv, vmask).intoArray(r, i);
 752             }
 753         }
 754 
 755         bh.consume(r);
 756     }
 757 
 758 
 759 
 760     @Benchmark
 761     public void ROL(Blackhole bh) {
 762         byte[] a = fa.apply(SPECIES.length());
 763         byte[] b = fb.apply(SPECIES.length());
 764         byte[] r = fr.apply(SPECIES.length());
 765 
 766         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 767             for (int i = 0; i < a.length; i += SPECIES.length()) {
 768                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 769                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 770                 av.lanewise(VectorOperators.ROL, bv).intoArray(r, i);
 771             }
 772         }
 773 
 774         bh.consume(r);
 775     }
 776 
 777 
 778 
 779     @Benchmark
 780     public void ROLMasked(Blackhole bh) {
 781         byte[] a = fa.apply(SPECIES.length());
 782         byte[] b = fb.apply(SPECIES.length());
 783         byte[] r = fr.apply(SPECIES.length());
 784         boolean[] mask = fm.apply(SPECIES.length());
 785         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 786 
 787         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 788             for (int i = 0; i < a.length; i += SPECIES.length()) {
 789                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 790                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 791                 av.lanewise(VectorOperators.ROL, bv, vmask).intoArray(r, i);
 792             }
 793         }
 794 
 795         bh.consume(r);
 796     }
 797 
 798 
 799 
 800     @Benchmark
 801     public void RORShift(Blackhole bh) {
 802         byte[] a = fa.apply(SPECIES.length());
 803         byte[] b = fb.apply(SPECIES.length());
 804         byte[] r = fr.apply(SPECIES.length());
 805 
 806         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 807             for (int i = 0; i < a.length; i += SPECIES.length()) {
 808                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 809                 av.lanewise(VectorOperators.ROR, (int)b[i]).intoArray(r, i);
 810             }
 811         }
 812 
 813         bh.consume(r);
 814     }
 815 
 816 
 817 
 818     @Benchmark
 819     public void RORMaskedShift(Blackhole bh) {
 820         byte[] a = fa.apply(SPECIES.length());
 821         byte[] b = fb.apply(SPECIES.length());
 822         byte[] r = fr.apply(SPECIES.length());
 823         boolean[] mask = fm.apply(SPECIES.length());
 824         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 825 
 826         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 827             for (int i = 0; i < a.length; i += SPECIES.length()) {
 828                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 829                 av.lanewise(VectorOperators.ROR, (int)b[i], vmask).intoArray(r, i);
 830             }
 831         }
 832 
 833         bh.consume(r);
 834     }
 835 
 836 
 837 
 838     @Benchmark
 839     public void ROLShift(Blackhole bh) {
 840         byte[] a = fa.apply(SPECIES.length());
 841         byte[] b = fb.apply(SPECIES.length());
 842         byte[] r = fr.apply(SPECIES.length());
 843 
 844         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 845             for (int i = 0; i < a.length; i += SPECIES.length()) {
 846                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 847                 av.lanewise(VectorOperators.ROL, (int)b[i]).intoArray(r, i);
 848             }
 849         }
 850 
 851         bh.consume(r);
 852     }
 853 
 854 
 855 
 856     @Benchmark
 857     public void ROLMaskedShift(Blackhole bh) {
 858         byte[] a = fa.apply(SPECIES.length());
 859         byte[] b = fb.apply(SPECIES.length());
 860         byte[] r = fr.apply(SPECIES.length());
 861         boolean[] mask = fm.apply(SPECIES.length());
 862         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 863 
 864         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 865             for (int i = 0; i < a.length; i += SPECIES.length()) {
 866                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 867                 av.lanewise(VectorOperators.ROL, (int)b[i], vmask).intoArray(r, i);
 868             }
 869         }
 870 
 871         bh.consume(r);
 872     }
 873 
 874 
 875     @Benchmark
 876     public void MIN(Blackhole bh) {
 877         byte[] a = fa.apply(SPECIES.length());
 878         byte[] b = fb.apply(SPECIES.length());
 879         byte[] r = fr.apply(SPECIES.length());
 880 
 881         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 882             for (int i = 0; i < a.length; i += SPECIES.length()) {
 883                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 884                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 885                 av.lanewise(VectorOperators.MIN, bv).intoArray(r, i);
 886             }
 887         }
 888 
 889         bh.consume(r);
 890     }
 891 
 892     @Benchmark
 893     public void MAX(Blackhole bh) {
 894         byte[] a = fa.apply(SPECIES.length());
 895         byte[] b = fb.apply(SPECIES.length());
 896         byte[] r = fr.apply(SPECIES.length());
 897 
 898         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 899             for (int i = 0; i < a.length; i += SPECIES.length()) {
 900                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 901                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 902                 av.lanewise(VectorOperators.MAX, bv).intoArray(r, i);
 903             }
 904         }
 905 
 906         bh.consume(r);
 907     }
 908 
 909 
 910     @Benchmark
 911     public void ANDLanes(Blackhole bh) {
 912         byte[] a = fa.apply(SPECIES.length());
 913         byte ra = -1;
 914 
 915         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 916             ra = -1;
 917             for (int i = 0; i < a.length; i += SPECIES.length()) {
 918                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 919                 ra &= av.reduceLanes(VectorOperators.AND);
 920             }
 921         }
 922         bh.consume(ra);
 923     }
 924 
 925 
 926 
 927     @Benchmark
 928     public void ANDMaskedLanes(Blackhole bh) {
 929         byte[] a = fa.apply(SPECIES.length());
 930         boolean[] mask = fm.apply(SPECIES.length());
 931         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 932         byte ra = -1;
 933 
 934         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 935             ra = -1;
 936             for (int i = 0; i < a.length; i += SPECIES.length()) {
 937                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 938                 ra &= av.reduceLanes(VectorOperators.AND, vmask);
 939             }
 940         }
 941         bh.consume(ra);
 942     }
 943 
 944 
 945 
 946     @Benchmark
 947     public void ORLanes(Blackhole bh) {
 948         byte[] a = fa.apply(SPECIES.length());
 949         byte ra = 0;
 950 
 951         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 952             ra = 0;
 953             for (int i = 0; i < a.length; i += SPECIES.length()) {
 954                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 955                 ra |= av.reduceLanes(VectorOperators.OR);
 956             }
 957         }
 958         bh.consume(ra);
 959     }
 960 
 961 
 962 
 963     @Benchmark
 964     public void ORMaskedLanes(Blackhole bh) {
 965         byte[] a = fa.apply(SPECIES.length());
 966         boolean[] mask = fm.apply(SPECIES.length());
 967         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 968         byte ra = 0;
 969 
 970         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 971             ra = 0;
 972             for (int i = 0; i < a.length; i += SPECIES.length()) {
 973                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 974                 ra |= av.reduceLanes(VectorOperators.OR, vmask);
 975             }
 976         }
 977         bh.consume(ra);
 978     }
 979 
 980 
 981 
 982     @Benchmark
 983     public void XORLanes(Blackhole bh) {
 984         byte[] a = fa.apply(SPECIES.length());
 985         byte ra = 0;
 986 
 987         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 988             ra = 0;
 989             for (int i = 0; i < a.length; i += SPECIES.length()) {
 990                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 991                 ra ^= av.reduceLanes(VectorOperators.XOR);
 992             }
 993         }
 994         bh.consume(ra);
 995     }
 996 
 997 
 998 
 999     @Benchmark
1000     public void XORMaskedLanes(Blackhole bh) {
1001         byte[] a = fa.apply(SPECIES.length());
1002         boolean[] mask = fm.apply(SPECIES.length());
1003         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1004         byte ra = 0;
1005 
1006         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1007             ra = 0;
1008             for (int i = 0; i < a.length; i += SPECIES.length()) {
1009                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1010                 ra ^= av.reduceLanes(VectorOperators.XOR, vmask);
1011             }
1012         }
1013         bh.consume(ra);
1014     }
1015 
1016 
1017     @Benchmark
1018     public void ADDLanes(Blackhole bh) {
1019         byte[] a = fa.apply(SPECIES.length());
1020         byte ra = 0;
1021 
1022         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1023             ra = 0;
1024             for (int i = 0; i < a.length; i += SPECIES.length()) {
1025                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1026                 ra += av.reduceLanes(VectorOperators.ADD);
1027             }
1028         }
1029         bh.consume(ra);
1030     }
1031 
1032     @Benchmark
1033     public void ADDMaskedLanes(Blackhole bh) {
1034         byte[] a = fa.apply(SPECIES.length());
1035         boolean[] mask = fm.apply(SPECIES.length());
1036         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1037         byte ra = 0;
1038 
1039         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1040             ra = 0;
1041             for (int i = 0; i < a.length; i += SPECIES.length()) {
1042                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1043                 ra += av.reduceLanes(VectorOperators.ADD, vmask);
1044             }
1045         }
1046         bh.consume(ra);
1047     }
1048 
1049     @Benchmark
1050     public void MULLanes(Blackhole bh) {
1051         byte[] a = fa.apply(SPECIES.length());
1052         byte ra = 1;
1053 
1054         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1055             ra = 1;
1056             for (int i = 0; i < a.length; i += SPECIES.length()) {
1057                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1058                 ra *= av.reduceLanes(VectorOperators.MUL);
1059             }
1060         }
1061         bh.consume(ra);
1062     }
1063 
1064     @Benchmark
1065     public void MULMaskedLanes(Blackhole bh) {
1066         byte[] a = fa.apply(SPECIES.length());
1067         boolean[] mask = fm.apply(SPECIES.length());
1068         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1069         byte ra = 1;
1070 
1071         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1072             ra = 1;
1073             for (int i = 0; i < a.length; i += SPECIES.length()) {
1074                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1075                 ra *= av.reduceLanes(VectorOperators.MUL, vmask);
1076             }
1077         }
1078         bh.consume(ra);
1079     }
1080 
1081     @Benchmark
1082     public void MINLanes(Blackhole bh) {
1083         byte[] a = fa.apply(SPECIES.length());
1084         byte ra = Byte.MAX_VALUE;
1085 
1086         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1087             ra = Byte.MAX_VALUE;
1088             for (int i = 0; i < a.length; i += SPECIES.length()) {
1089                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1090                 ra = (byte)Math.min(ra, av.reduceLanes(VectorOperators.MIN));
1091             }
1092         }
1093         bh.consume(ra);
1094     }
1095 
1096     @Benchmark
1097     public void MINMaskedLanes(Blackhole bh) {
1098         byte[] a = fa.apply(SPECIES.length());
1099         boolean[] mask = fm.apply(SPECIES.length());
1100         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1101         byte ra = Byte.MAX_VALUE;
1102 
1103         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1104             ra = Byte.MAX_VALUE;
1105             for (int i = 0; i < a.length; i += SPECIES.length()) {
1106                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1107                 ra = (byte)Math.min(ra, av.reduceLanes(VectorOperators.MIN, vmask));
1108             }
1109         }
1110         bh.consume(ra);
1111     }
1112 
1113     @Benchmark
1114     public void MAXLanes(Blackhole bh) {
1115         byte[] a = fa.apply(SPECIES.length());
1116         byte ra = Byte.MIN_VALUE;
1117 
1118         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1119             ra = Byte.MIN_VALUE;
1120             for (int i = 0; i < a.length; i += SPECIES.length()) {
1121                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1122                 ra = (byte)Math.max(ra, av.reduceLanes(VectorOperators.MAX));
1123             }
1124         }
1125         bh.consume(ra);
1126     }
1127 
1128     @Benchmark
1129     public void MAXMaskedLanes(Blackhole bh) {
1130         byte[] a = fa.apply(SPECIES.length());
1131         boolean[] mask = fm.apply(SPECIES.length());
1132         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1133         byte ra = Byte.MIN_VALUE;
1134 
1135         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1136             ra = Byte.MIN_VALUE;
1137             for (int i = 0; i < a.length; i += SPECIES.length()) {
1138                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1139                 ra = (byte)Math.max(ra, av.reduceLanes(VectorOperators.MAX, vmask));
1140             }
1141         }
1142         bh.consume(ra);
1143     }
1144 
1145 
1146     @Benchmark
1147     public void anyTrue(Blackhole bh) {
1148         boolean[] mask = fm.apply(SPECIES.length());
1149         boolean[] r = fmr.apply(SPECIES.length());
1150 
1151         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1152             for (int i = 0; i < mask.length; i += SPECIES.length()) {
1153                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
1154                 r[i] = vmask.anyTrue();
1155             }
1156         }
1157 
1158         bh.consume(r);
1159     }
1160 
1161 
1162 
1163     @Benchmark
1164     public void allTrue(Blackhole bh) {
1165         boolean[] mask = fm.apply(SPECIES.length());
1166         boolean[] r = fmr.apply(SPECIES.length());
1167 
1168         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1169             for (int i = 0; i < mask.length; i += SPECIES.length()) {
1170                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
1171                 r[i] = vmask.allTrue();
1172             }
1173         }
1174 
1175         bh.consume(r);
1176     }
1177 
1178 
1179     @Benchmark
1180     public void withLane(Blackhole bh) {
1181         byte[] a = fa.apply(SPECIES.length());
1182         byte[] r = fr.apply(SPECIES.length());
1183 
1184         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1185             for (int i = 0; i < a.length; i += SPECIES.length()) {
1186                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1187                 av.withLane(0, (byte)4).intoArray(r, i);
1188             }
1189         }
1190 
1191         bh.consume(r);
1192     }
1193 
1194     @Benchmark
1195     public Object IS_DEFAULT() {
1196         byte[] a = fa.apply(size);
1197         boolean[] ms = fmt.apply(size);
1198         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1199 
1200         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1201             for (int i = 0; i < a.length; i += SPECIES.length()) {
1202                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1203 
1204                 // accumulate results, so JIT can't eliminate relevant computations
1205                 m = m.and(av.test(VectorOperators.IS_DEFAULT));
1206             }
1207         }
1208 
1209         return m;
1210     }
1211 
1212     @Benchmark
1213     public Object IS_NEGATIVE() {
1214         byte[] a = fa.apply(size);
1215         boolean[] ms = fmt.apply(size);
1216         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1217 
1218         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1219             for (int i = 0; i < a.length; i += SPECIES.length()) {
1220                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1221 
1222                 // accumulate results, so JIT can't eliminate relevant computations
1223                 m = m.and(av.test(VectorOperators.IS_NEGATIVE));
1224             }
1225         }
1226 
1227         return m;
1228     }
1229 
1230 
1231 
1232 
1233     @Benchmark
1234     public Object LT() {
1235         byte[] a = fa.apply(size);
1236         byte[] b = fb.apply(size);
1237         boolean[] ms = fmt.apply(size);
1238         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1239 
1240         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1241             for (int i = 0; i < a.length; i += SPECIES.length()) {
1242                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1243                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1244 
1245                 // accumulate results, so JIT can't eliminate relevant computations
1246                 m = m.and(av.compare(VectorOperators.LT, bv));
1247             }
1248         }
1249 
1250         return m;
1251     }
1252 
1253     @Benchmark
1254     public Object GT() {
1255         byte[] a = fa.apply(size);
1256         byte[] b = fb.apply(size);
1257         boolean[] ms = fmt.apply(size);
1258         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1259 
1260         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1261             for (int i = 0; i < a.length; i += SPECIES.length()) {
1262                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1263                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1264 
1265                 // accumulate results, so JIT can't eliminate relevant computations
1266                 m = m.and(av.compare(VectorOperators.GT, bv));
1267             }
1268         }
1269 
1270         return m;
1271     }
1272 
1273     @Benchmark
1274     public Object EQ() {
1275         byte[] a = fa.apply(size);
1276         byte[] b = fb.apply(size);
1277         boolean[] ms = fmt.apply(size);
1278         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1279 
1280         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1281             for (int i = 0; i < a.length; i += SPECIES.length()) {
1282                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1283                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1284 
1285                 // accumulate results, so JIT can't eliminate relevant computations
1286                 m = m.and(av.compare(VectorOperators.EQ, bv));
1287             }
1288         }
1289 
1290         return m;
1291     }
1292 
1293     @Benchmark
1294     public Object NE() {
1295         byte[] a = fa.apply(size);
1296         byte[] b = fb.apply(size);
1297         boolean[] ms = fmt.apply(size);
1298         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1299 
1300         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1301             for (int i = 0; i < a.length; i += SPECIES.length()) {
1302                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1303                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1304 
1305                 // accumulate results, so JIT can't eliminate relevant computations
1306                 m = m.and(av.compare(VectorOperators.NE, bv));
1307             }
1308         }
1309 
1310         return m;
1311     }
1312 
1313     @Benchmark
1314     public Object LE() {
1315         byte[] a = fa.apply(size);
1316         byte[] b = fb.apply(size);
1317         boolean[] ms = fmt.apply(size);
1318         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1319 
1320         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1321             for (int i = 0; i < a.length; i += SPECIES.length()) {
1322                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1323                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1324 
1325                 // accumulate results, so JIT can't eliminate relevant computations
1326                 m = m.and(av.compare(VectorOperators.LE, bv));
1327             }
1328         }
1329 
1330         return m;
1331     }
1332 
1333     @Benchmark
1334     public Object GE() {
1335         byte[] a = fa.apply(size);
1336         byte[] b = fb.apply(size);
1337         boolean[] ms = fmt.apply(size);
1338         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1339 
1340         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1341             for (int i = 0; i < a.length; i += SPECIES.length()) {
1342                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1343                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1344 
1345                 // accumulate results, so JIT can't eliminate relevant computations
1346                 m = m.and(av.compare(VectorOperators.GE, bv));
1347             }
1348         }
1349 
1350         return m;
1351     }
1352 
1353 
1354     @Benchmark
1355     public Object UNSIGNED_LT() {
1356         byte[] a = fa.apply(size);
1357         byte[] b = fb.apply(size);
1358         boolean[] ms = fmt.apply(size);
1359         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1360 
1361         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1362             for (int i = 0; i < a.length; i += SPECIES.length()) {
1363                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1364                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1365 
1366                 // accumulate results, so JIT can't eliminate relevant computations
1367                 m = m.and(av.compare(VectorOperators.UNSIGNED_LT, bv));
1368             }
1369         }
1370 
1371         return m;
1372     }
1373 
1374 
1375 
1376     @Benchmark
1377     public Object UNSIGNED_GT() {
1378         byte[] a = fa.apply(size);
1379         byte[] b = fb.apply(size);
1380         boolean[] ms = fmt.apply(size);
1381         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1382 
1383         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1384             for (int i = 0; i < a.length; i += SPECIES.length()) {
1385                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1386                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1387 
1388                 // accumulate results, so JIT can't eliminate relevant computations
1389                 m = m.and(av.compare(VectorOperators.UNSIGNED_GT, bv));
1390             }
1391         }
1392 
1393         return m;
1394     }
1395 
1396 
1397 
1398     @Benchmark
1399     public Object UNSIGNED_LE() {
1400         byte[] a = fa.apply(size);
1401         byte[] b = fb.apply(size);
1402         boolean[] ms = fmt.apply(size);
1403         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1404 
1405         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1406             for (int i = 0; i < a.length; i += SPECIES.length()) {
1407                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1408                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1409 
1410                 // accumulate results, so JIT can't eliminate relevant computations
1411                 m = m.and(av.compare(VectorOperators.UNSIGNED_LE, bv));
1412             }
1413         }
1414 
1415         return m;
1416     }
1417 
1418 
1419 
1420     @Benchmark
1421     public Object UNSIGNED_GE() {
1422         byte[] a = fa.apply(size);
1423         byte[] b = fb.apply(size);
1424         boolean[] ms = fmt.apply(size);
1425         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1426 
1427         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1428             for (int i = 0; i < a.length; i += SPECIES.length()) {
1429                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1430                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1431 
1432                 // accumulate results, so JIT can't eliminate relevant computations
1433                 m = m.and(av.compare(VectorOperators.UNSIGNED_GE, bv));
1434             }
1435         }
1436 
1437         return m;
1438     }
1439 
1440 
1441     @Benchmark
1442     public void blend(Blackhole bh) {
1443         byte[] a = fa.apply(SPECIES.length());
1444         byte[] b = fb.apply(SPECIES.length());
1445         byte[] r = fr.apply(SPECIES.length());
1446         boolean[] mask = fm.apply(SPECIES.length());
1447         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1448 
1449         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1450             for (int i = 0; i < a.length; i += SPECIES.length()) {
1451                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1452                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1453                 av.blend(bv, vmask).intoArray(r, i);
1454             }
1455         }
1456 
1457         bh.consume(r);
1458     }
1459 
1460     @Benchmark
1461     public void rearrange(Blackhole bh) {
1462         byte[] a = fa.apply(SPECIES.length());
1463         int[] order = fs.apply(a.length, SPECIES.length());
1464         byte[] r = fr.apply(SPECIES.length());
1465 
1466         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1467             for (int i = 0; i < a.length; i += SPECIES.length()) {
1468                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1469                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
1470             }
1471         }
1472 
1473         bh.consume(r);
1474     }
1475 
1476     @Benchmark
1477     public Object compress() {
1478         byte[] a = fa.apply(size);
1479         byte[] r = fb.apply(size);
1480         boolean[] ms = fmt.apply(size);
1481         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1482 
1483         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1484             for (int i = 0; i < a.length; i += SPECIES.length()) {
1485                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1486                 av.compress(m).intoArray(r, i);
1487             }
1488         }
1489 
1490         return r;
1491     }
1492 
1493     @Benchmark
1494     public Object expand() {
1495         byte[] a = fa.apply(size);
1496         byte[] r = fb.apply(size);
1497         boolean[] ms = fmt.apply(size);
1498         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1499 
1500         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1501             for (int i = 0; i < a.length; i += SPECIES.length()) {
1502                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1503                 av.expand(m).intoArray(r, i);
1504             }
1505         }
1506 
1507         return r;
1508     }
1509 
1510     @Benchmark
1511     public Object maskCompress() {
1512         boolean[] ms = fmt.apply(size);
1513         boolean[] rs = fmt.apply(size);
1514 
1515         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1516             for (int i = 0, j = 0; i < ms.length; i += SPECIES.length()) {
1517                 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, i);
1518                 m.compress().intoArray(rs, j);
1519                 j += m.trueCount();
1520             }
1521         }
1522 
1523         return rs;
1524     }
1525 
1526 
1527     @Benchmark
1528     public void laneextract(Blackhole bh) {
1529         byte[] a = fa.apply(SPECIES.length());
1530         byte[] r = fr.apply(SPECIES.length());
1531 
1532         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1533             for (int i = 0; i < a.length; i += SPECIES.length()) {
1534                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1535                 int num_lanes = SPECIES.length();
1536                 // Manually unroll because full unroll happens after intrinsification.
1537                 // Unroll is needed because get intrinsic requires for index to be a known constant.
1538                 if (num_lanes == 1) {
1539                     r[i]=av.lane(0);
1540                 } else if (num_lanes == 2) {
1541                     r[i]=av.lane(0);
1542                     r[i+1]=av.lane(1);
1543                 } else if (num_lanes == 4) {
1544                     r[i]=av.lane(0);
1545                     r[i+1]=av.lane(1);
1546                     r[i+2]=av.lane(2);
1547                     r[i+3]=av.lane(3);
1548                 } else if (num_lanes == 8) {
1549                     r[i]=av.lane(0);
1550                     r[i+1]=av.lane(1);
1551                     r[i+2]=av.lane(2);
1552                     r[i+3]=av.lane(3);
1553                     r[i+4]=av.lane(4);
1554                     r[i+5]=av.lane(5);
1555                     r[i+6]=av.lane(6);
1556                     r[i+7]=av.lane(7);
1557                 } else if (num_lanes == 16) {
1558                     r[i]=av.lane(0);
1559                     r[i+1]=av.lane(1);
1560                     r[i+2]=av.lane(2);
1561                     r[i+3]=av.lane(3);
1562                     r[i+4]=av.lane(4);
1563                     r[i+5]=av.lane(5);
1564                     r[i+6]=av.lane(6);
1565                     r[i+7]=av.lane(7);
1566                     r[i+8]=av.lane(8);
1567                     r[i+9]=av.lane(9);
1568                     r[i+10]=av.lane(10);
1569                     r[i+11]=av.lane(11);
1570                     r[i+12]=av.lane(12);
1571                     r[i+13]=av.lane(13);
1572                     r[i+14]=av.lane(14);
1573                     r[i+15]=av.lane(15);
1574                 } else if (num_lanes == 32) {
1575                     r[i]=av.lane(0);
1576                     r[i+1]=av.lane(1);
1577                     r[i+2]=av.lane(2);
1578                     r[i+3]=av.lane(3);
1579                     r[i+4]=av.lane(4);
1580                     r[i+5]=av.lane(5);
1581                     r[i+6]=av.lane(6);
1582                     r[i+7]=av.lane(7);
1583                     r[i+8]=av.lane(8);
1584                     r[i+9]=av.lane(9);
1585                     r[i+10]=av.lane(10);
1586                     r[i+11]=av.lane(11);
1587                     r[i+12]=av.lane(12);
1588                     r[i+13]=av.lane(13);
1589                     r[i+14]=av.lane(14);
1590                     r[i+15]=av.lane(15);
1591                     r[i+16]=av.lane(16);
1592                     r[i+17]=av.lane(17);
1593                     r[i+18]=av.lane(18);
1594                     r[i+19]=av.lane(19);
1595                     r[i+20]=av.lane(20);
1596                     r[i+21]=av.lane(21);
1597                     r[i+22]=av.lane(22);
1598                     r[i+23]=av.lane(23);
1599                     r[i+24]=av.lane(24);
1600                     r[i+25]=av.lane(25);
1601                     r[i+26]=av.lane(26);
1602                     r[i+27]=av.lane(27);
1603                     r[i+28]=av.lane(28);
1604                     r[i+29]=av.lane(29);
1605                     r[i+30]=av.lane(30);
1606                     r[i+31]=av.lane(31);
1607                 } else if (num_lanes == 64) {
1608                     r[i]=av.lane(0);
1609                     r[i+1]=av.lane(1);
1610                     r[i+2]=av.lane(2);
1611                     r[i+3]=av.lane(3);
1612                     r[i+4]=av.lane(4);
1613                     r[i+5]=av.lane(5);
1614                     r[i+6]=av.lane(6);
1615                     r[i+7]=av.lane(7);
1616                     r[i+8]=av.lane(8);
1617                     r[i+9]=av.lane(9);
1618                     r[i+10]=av.lane(10);
1619                     r[i+11]=av.lane(11);
1620                     r[i+12]=av.lane(12);
1621                     r[i+13]=av.lane(13);
1622                     r[i+14]=av.lane(14);
1623                     r[i+15]=av.lane(15);
1624                     r[i+16]=av.lane(16);
1625                     r[i+17]=av.lane(17);
1626                     r[i+18]=av.lane(18);
1627                     r[i+19]=av.lane(19);
1628                     r[i+20]=av.lane(20);
1629                     r[i+21]=av.lane(21);
1630                     r[i+22]=av.lane(22);
1631                     r[i+23]=av.lane(23);
1632                     r[i+24]=av.lane(24);
1633                     r[i+25]=av.lane(25);
1634                     r[i+26]=av.lane(26);
1635                     r[i+27]=av.lane(27);
1636                     r[i+28]=av.lane(28);
1637                     r[i+29]=av.lane(29);
1638                     r[i+30]=av.lane(30);
1639                     r[i+31]=av.lane(31);
1640                     r[i+32]=av.lane(32);
1641                     r[i+33]=av.lane(33);
1642                     r[i+34]=av.lane(34);
1643                     r[i+35]=av.lane(35);
1644                     r[i+36]=av.lane(36);
1645                     r[i+37]=av.lane(37);
1646                     r[i+38]=av.lane(38);
1647                     r[i+39]=av.lane(39);
1648                     r[i+40]=av.lane(40);
1649                     r[i+41]=av.lane(41);
1650                     r[i+42]=av.lane(42);
1651                     r[i+43]=av.lane(43);
1652                     r[i+44]=av.lane(44);
1653                     r[i+45]=av.lane(45);
1654                     r[i+46]=av.lane(46);
1655                     r[i+47]=av.lane(47);
1656                     r[i+48]=av.lane(48);
1657                     r[i+49]=av.lane(49);
1658                     r[i+50]=av.lane(50);
1659                     r[i+51]=av.lane(51);
1660                     r[i+52]=av.lane(52);
1661                     r[i+53]=av.lane(53);
1662                     r[i+54]=av.lane(54);
1663                     r[i+55]=av.lane(55);
1664                     r[i+56]=av.lane(56);
1665                     r[i+57]=av.lane(57);
1666                     r[i+58]=av.lane(58);
1667                     r[i+59]=av.lane(59);
1668                     r[i+60]=av.lane(60);
1669                     r[i+61]=av.lane(61);
1670                     r[i+62]=av.lane(62);
1671                     r[i+63]=av.lane(63);
1672                 } else {
1673                     for (int j = 0; j < SPECIES.length(); j++) {
1674                         r[i+j]=av.lane(j);
1675                     }
1676                 }
1677             }
1678         }
1679 
1680         bh.consume(r);
1681     }
1682 
1683     @Benchmark
1684     public void broadcast(Blackhole bh) {
1685         byte[] a = fa.apply(SPECIES.length());
1686         byte[] r = new byte[a.length];
1687 
1688         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1689             for (int i = 0; i < a.length; i += SPECIES.length()) {
1690                 ByteVector.broadcast(SPECIES, a[i]).intoArray(r, i);
1691             }
1692         }
1693 
1694         bh.consume(r);
1695     }
1696 
1697     @Benchmark
1698     public void zero(Blackhole bh) {
1699         byte[] a = fa.apply(SPECIES.length());
1700         byte[] r = new byte[a.length];
1701 
1702         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1703             for (int i = 0; i < a.length; i += SPECIES.length()) {
1704                 ByteVector.zero(SPECIES).intoArray(a, i);
1705             }
1706         }
1707 
1708         bh.consume(r);
1709     }
1710 
1711     @Benchmark
1712     public void sliceUnary(Blackhole bh) {
1713         byte[] a = fa.apply(SPECIES.length());
1714         byte[] r = new byte[a.length];
1715         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1716         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1717             for (int i = 0; i < a.length; i += SPECIES.length()) {
1718                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1719                 av.slice(origin).intoArray(r, i);
1720             }
1721         }
1722 
1723         bh.consume(r);
1724     }
1725 
1726     @Benchmark
1727     public void sliceBinary(Blackhole bh) {
1728         byte[] a = fa.apply(SPECIES.length());
1729         byte[] b = fb.apply(SPECIES.length());
1730         byte[] r = new byte[a.length];
1731         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1732         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1733             for (int i = 0; i < a.length; i += SPECIES.length()) {
1734                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1735                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1736                 av.slice(origin, bv).intoArray(r, i);
1737             }
1738         }
1739 
1740         bh.consume(r);
1741     }
1742 
1743     @Benchmark
1744     public void sliceMasked(Blackhole bh) {
1745         byte[] a = fa.apply(SPECIES.length());
1746         byte[] b = fb.apply(SPECIES.length());
1747         boolean[] mask = fm.apply(SPECIES.length());
1748         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1749 
1750         byte[] r = new byte[a.length];
1751         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1752         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1753             for (int i = 0; i < a.length; i += SPECIES.length()) {
1754                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1755                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1756                 av.slice(origin, bv, vmask).intoArray(r, i);
1757             }
1758         }
1759 
1760         bh.consume(r);
1761     }
1762 
1763     @Benchmark
1764     public void unsliceUnary(Blackhole bh) {
1765         byte[] a = fa.apply(SPECIES.length());
1766         byte[] r = new byte[a.length];
1767         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1768         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1769             for (int i = 0; i < a.length; i += SPECIES.length()) {
1770                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1771                 av.unslice(origin).intoArray(r, i);
1772             }
1773         }
1774 
1775         bh.consume(r);
1776     }
1777 
1778     @Benchmark
1779     public void unsliceBinary(Blackhole bh) {
1780         byte[] a = fa.apply(SPECIES.length());
1781         byte[] b = fb.apply(SPECIES.length());
1782         byte[] r = new byte[a.length];
1783         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1784         int part = (new java.util.Random()).nextInt(2);
1785         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1786             for (int i = 0; i < a.length; i += SPECIES.length()) {
1787                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1788                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1789                 av.unslice(origin, bv, part).intoArray(r, i);
1790             }
1791         }
1792 
1793         bh.consume(r);
1794     }
1795 
1796     @Benchmark
1797     public void unsliceMasked(Blackhole bh) {
1798         byte[] a = fa.apply(SPECIES.length());
1799         byte[] b = fb.apply(SPECIES.length());
1800         boolean[] mask = fm.apply(SPECIES.length());
1801         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1802         byte[] r = new byte[a.length];
1803         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1804         int part = (new java.util.Random()).nextInt(2);
1805         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1806             for (int i = 0; i < a.length; i += SPECIES.length()) {
1807                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1808                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1809                 av.unslice(origin, bv, part, vmask).intoArray(r, i);
1810             }
1811         }
1812 
1813         bh.consume(r);
1814     }
1815 
1816 
1817 
1818 
1819 
1820 
1821 
1822 
1823 
1824 
1825 
1826 
1827 
1828 
1829 
1830 
1831 
1832 
1833 
1834 
1835 
1836 
1837     @Benchmark
1838     public void BITWISE_BLEND(Blackhole bh) {
1839         byte[] a = fa.apply(SPECIES.length());
1840         byte[] b = fb.apply(SPECIES.length());
1841         byte[] c = fc.apply(SPECIES.length());
1842         byte[] r = fr.apply(SPECIES.length());
1843 
1844         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1845             for (int i = 0; i < a.length; i += SPECIES.length()) {
1846                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1847                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1848                 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
1849                 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv).intoArray(r, i);
1850             }
1851         }
1852 
1853         bh.consume(r);
1854     }
1855 
1856 
1857 
1858     @Benchmark
1859     public void BITWISE_BLENDMasked(Blackhole bh) {
1860         byte[] a = fa.apply(SPECIES.length());
1861         byte[] b = fb.apply(SPECIES.length());
1862         byte[] c = fc.apply(SPECIES.length());
1863         byte[] r = fr.apply(SPECIES.length());
1864         boolean[] mask = fm.apply(SPECIES.length());
1865         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1866 
1867         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1868             for (int i = 0; i < a.length; i += SPECIES.length()) {
1869                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1870                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1871                 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
1872                 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv, vmask).intoArray(r, i);
1873             }
1874         }
1875 
1876         bh.consume(r);
1877     }
1878 
1879 
1880     @Benchmark
1881     public void NEG(Blackhole bh) {
1882         byte[] a = fa.apply(SPECIES.length());
1883         byte[] r = fr.apply(SPECIES.length());
1884 
1885         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1886             for (int i = 0; i < a.length; i += SPECIES.length()) {
1887                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1888                 av.lanewise(VectorOperators.NEG).intoArray(r, i);
1889             }
1890         }
1891 
1892         bh.consume(r);
1893     }
1894 
1895     @Benchmark
1896     public void NEGMasked(Blackhole bh) {
1897         byte[] a = fa.apply(SPECIES.length());
1898         byte[] r = fr.apply(SPECIES.length());
1899         boolean[] mask = fm.apply(SPECIES.length());
1900         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1901 
1902         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1903             for (int i = 0; i < a.length; i += SPECIES.length()) {
1904                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1905                 av.lanewise(VectorOperators.NEG, vmask).intoArray(r, i);
1906             }
1907         }
1908 
1909         bh.consume(r);
1910     }
1911 
1912     @Benchmark
1913     public void ABS(Blackhole bh) {
1914         byte[] a = fa.apply(SPECIES.length());
1915         byte[] r = fr.apply(SPECIES.length());
1916 
1917         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1918             for (int i = 0; i < a.length; i += SPECIES.length()) {
1919                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1920                 av.lanewise(VectorOperators.ABS).intoArray(r, i);
1921             }
1922         }
1923 
1924         bh.consume(r);
1925     }
1926 
1927     @Benchmark
1928     public void ABSMasked(Blackhole bh) {
1929         byte[] a = fa.apply(SPECIES.length());
1930         byte[] r = fr.apply(SPECIES.length());
1931         boolean[] mask = fm.apply(SPECIES.length());
1932         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1933 
1934         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1935             for (int i = 0; i < a.length; i += SPECIES.length()) {
1936                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1937                 av.lanewise(VectorOperators.ABS, vmask).intoArray(r, i);
1938             }
1939         }
1940 
1941         bh.consume(r);
1942     }
1943 
1944 
1945     @Benchmark
1946     public void NOT(Blackhole bh) {
1947         byte[] a = fa.apply(SPECIES.length());
1948         byte[] r = fr.apply(SPECIES.length());
1949 
1950         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1951             for (int i = 0; i < a.length; i += SPECIES.length()) {
1952                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1953                 av.lanewise(VectorOperators.NOT).intoArray(r, i);
1954             }
1955         }
1956 
1957         bh.consume(r);
1958     }
1959 
1960 
1961 
1962     @Benchmark
1963     public void NOTMasked(Blackhole bh) {
1964         byte[] a = fa.apply(SPECIES.length());
1965         byte[] r = fr.apply(SPECIES.length());
1966         boolean[] mask = fm.apply(SPECIES.length());
1967         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1968 
1969         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1970             for (int i = 0; i < a.length; i += SPECIES.length()) {
1971                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1972                 av.lanewise(VectorOperators.NOT, vmask).intoArray(r, i);
1973             }
1974         }
1975 
1976         bh.consume(r);
1977     }
1978 
1979 
1980 
1981     @Benchmark
1982     public void ZOMO(Blackhole bh) {
1983         byte[] a = fa.apply(SPECIES.length());
1984         byte[] r = fr.apply(SPECIES.length());
1985 
1986         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1987             for (int i = 0; i < a.length; i += SPECIES.length()) {
1988                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1989                 av.lanewise(VectorOperators.ZOMO).intoArray(r, i);
1990             }
1991         }
1992 
1993         bh.consume(r);
1994     }
1995 
1996 
1997 
1998     @Benchmark
1999     public void ZOMOMasked(Blackhole bh) {
2000         byte[] a = fa.apply(SPECIES.length());
2001         byte[] r = fr.apply(SPECIES.length());
2002         boolean[] mask = fm.apply(SPECIES.length());
2003         VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2004 
2005         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2006             for (int i = 0; i < a.length; i += SPECIES.length()) {
2007                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2008                 av.lanewise(VectorOperators.ZOMO, vmask).intoArray(r, i);
2009             }
2010         }
2011 
2012         bh.consume(r);
2013     }
2014 
2015 
2016 
2017 }
2018