1 /*
   2  * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import jdk.incubator.vector.Vector;
  29 import jdk.incubator.vector.VectorMask;
  30 import jdk.incubator.vector.VectorOperators;
  31 import jdk.incubator.vector.VectorShape;
  32 import jdk.incubator.vector.VectorSpecies;
  33 import jdk.incubator.vector.VectorShuffle;
  34 import jdk.incubator.vector.ShortVector;
  35 
  36 import java.util.concurrent.TimeUnit;
  37 import java.util.function.BiFunction;
  38 import java.util.function.IntFunction;
  39 
  40 import org.openjdk.jmh.annotations.*;
  41 import org.openjdk.jmh.infra.Blackhole;
  42 
  43 @BenchmarkMode(Mode.Throughput)
  44 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  45 @State(Scope.Benchmark)
  46 @Warmup(iterations = 3, time = 1)
  47 @Measurement(iterations = 5, time = 1)
  48 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  49 public class Short64Vector extends AbstractVectorBenchmark {
  50     static final VectorSpecies<Short> SPECIES = ShortVector.SPECIES_64;
  51 
  52     static final int INVOC_COUNT = 1; // get rid of outer loop
  53 
  54     static void replaceZero(short[] a, short v) {
  55         for (int i = 0; i < a.length; i++) {
  56             if (a[i] == 0) {
  57                 a[i] = v;
  58             }
  59         }
  60     }
  61 
  62     static void replaceZero(short[] a, boolean[] mask, short v) {
  63         for (int i = 0; i < a.length; i++) {
  64             if (mask[i % mask.length] && a[i] == 0) {
  65                 a[i] = v;
  66             }
  67         }
  68     }
  69 
  70     static short firstNonZero(short a, short b) {
  71         return Short.compare(a, (short) 0) != 0 ? a : b;
  72     }
  73 
  74     private static final short CONST_SHIFT = Short.SIZE / 2;
  75 
  76     @Param("1024")
  77     int size;
  78 
  79     short[] fill(IntFunction<Short> f) {
  80         short[] array = new short[size];
  81         for (int i = 0; i < array.length; i++) {
  82             array[i] = f.apply(i);
  83         }
  84         return array;
  85     }
  86 
  87     short[] a, b, c, r;
  88     boolean[] m, mt, rm;
  89     int[] s;
  90 
  91     @Setup
  92     public void init() {
  93         size += size % SPECIES.length(); // FIXME: add post-loops
  94 
  95         a = fill(i -> (short)(2*i));
  96         b = fill(i -> (short)(i+1));
  97         c = fill(i -> (short)(i+5));
  98         r = fill(i -> (short)0);
  99 
 100         m = fillMask(size, i -> (i % 2) == 0);
 101         mt = fillMask(size, i -> true);
 102         rm = fillMask(size, i -> false);
 103 
 104         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
 105     }
 106 
 107     final IntFunction<short[]> fa = vl -> a;
 108     final IntFunction<short[]> fb = vl -> b;
 109     final IntFunction<short[]> fc = vl -> c;
 110     final IntFunction<short[]> fr = vl -> r;
 111     final IntFunction<boolean[]> fm = vl -> m;
 112     final IntFunction<boolean[]> fmt = vl -> mt;
 113     final IntFunction<boolean[]> fmr = vl -> rm;
 114     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
 115 
 116 
 117     @Benchmark
 118     public void ADD(Blackhole bh) {
 119         short[] a = fa.apply(SPECIES.length());
 120         short[] b = fb.apply(SPECIES.length());
 121         short[] r = fr.apply(SPECIES.length());
 122 
 123         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 124             for (int i = 0; i < a.length; i += SPECIES.length()) {
 125                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 126                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 127                 av.lanewise(VectorOperators.ADD, bv).intoArray(r, i);
 128             }
 129         }
 130 
 131         bh.consume(r);
 132     }
 133 
 134     @Benchmark
 135     public void ADDMasked(Blackhole bh) {
 136         short[] a = fa.apply(SPECIES.length());
 137         short[] b = fb.apply(SPECIES.length());
 138         short[] r = fr.apply(SPECIES.length());
 139         boolean[] mask = fm.apply(SPECIES.length());
 140         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 141 
 142         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 143             for (int i = 0; i < a.length; i += SPECIES.length()) {
 144                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 145                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 146                 av.lanewise(VectorOperators.ADD, bv, vmask).intoArray(r, i);
 147             }
 148         }
 149 
 150         bh.consume(r);
 151     }
 152 
 153     @Benchmark
 154     public void SUB(Blackhole bh) {
 155         short[] a = fa.apply(SPECIES.length());
 156         short[] b = fb.apply(SPECIES.length());
 157         short[] r = fr.apply(SPECIES.length());
 158 
 159         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 160             for (int i = 0; i < a.length; i += SPECIES.length()) {
 161                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 162                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 163                 av.lanewise(VectorOperators.SUB, bv).intoArray(r, i);
 164             }
 165         }
 166 
 167         bh.consume(r);
 168     }
 169 
 170     @Benchmark
 171     public void SUBMasked(Blackhole bh) {
 172         short[] a = fa.apply(SPECIES.length());
 173         short[] b = fb.apply(SPECIES.length());
 174         short[] r = fr.apply(SPECIES.length());
 175         boolean[] mask = fm.apply(SPECIES.length());
 176         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 177 
 178         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 179             for (int i = 0; i < a.length; i += SPECIES.length()) {
 180                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 181                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 182                 av.lanewise(VectorOperators.SUB, bv, vmask).intoArray(r, i);
 183             }
 184         }
 185 
 186         bh.consume(r);
 187     }
 188 
 189     @Benchmark
 190     public void MUL(Blackhole bh) {
 191         short[] a = fa.apply(SPECIES.length());
 192         short[] b = fb.apply(SPECIES.length());
 193         short[] r = fr.apply(SPECIES.length());
 194 
 195         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 196             for (int i = 0; i < a.length; i += SPECIES.length()) {
 197                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 198                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 199                 av.lanewise(VectorOperators.MUL, bv).intoArray(r, i);
 200             }
 201         }
 202 
 203         bh.consume(r);
 204     }
 205 
 206     @Benchmark
 207     public void MULMasked(Blackhole bh) {
 208         short[] a = fa.apply(SPECIES.length());
 209         short[] b = fb.apply(SPECIES.length());
 210         short[] r = fr.apply(SPECIES.length());
 211         boolean[] mask = fm.apply(SPECIES.length());
 212         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 213 
 214         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 215             for (int i = 0; i < a.length; i += SPECIES.length()) {
 216                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 217                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 218                 av.lanewise(VectorOperators.MUL, bv, vmask).intoArray(r, i);
 219             }
 220         }
 221 
 222         bh.consume(r);
 223     }
 224 
 225     @Benchmark
 226     public void DIV(Blackhole bh) {
 227         short[] a = fa.apply(SPECIES.length());
 228         short[] b = fb.apply(SPECIES.length());
 229         short[] r = fr.apply(SPECIES.length());
 230 
 231         replaceZero(b, (short) 1);
 232 
 233         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 234             for (int i = 0; i < a.length; i += SPECIES.length()) {
 235                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 236                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 237                 av.lanewise(VectorOperators.DIV, bv).intoArray(r, i);
 238             }
 239         }
 240 
 241         bh.consume(r);
 242     }
 243 
 244     @Benchmark
 245     public void DIVMasked(Blackhole bh) {
 246         short[] a = fa.apply(SPECIES.length());
 247         short[] b = fb.apply(SPECIES.length());
 248         short[] r = fr.apply(SPECIES.length());
 249         boolean[] mask = fm.apply(SPECIES.length());
 250         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 251 
 252         replaceZero(b, mask, (short) 1);
 253 
 254         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 255             for (int i = 0; i < a.length; i += SPECIES.length()) {
 256                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 257                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 258                 av.lanewise(VectorOperators.DIV, bv, vmask).intoArray(r, i);
 259             }
 260         }
 261 
 262         bh.consume(r);
 263     }
 264 
 265     @Benchmark
 266     public void FIRST_NONZERO(Blackhole bh) {
 267         short[] a = fa.apply(SPECIES.length());
 268         short[] b = fb.apply(SPECIES.length());
 269         short[] r = fr.apply(SPECIES.length());
 270 
 271         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 272             for (int i = 0; i < a.length; i += SPECIES.length()) {
 273                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 274                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 275                 av.lanewise(VectorOperators.FIRST_NONZERO, bv).intoArray(r, i);
 276             }
 277         }
 278 
 279         bh.consume(r);
 280     }
 281 
 282     @Benchmark
 283     public void FIRST_NONZEROMasked(Blackhole bh) {
 284         short[] a = fa.apply(SPECIES.length());
 285         short[] b = fb.apply(SPECIES.length());
 286         short[] r = fr.apply(SPECIES.length());
 287         boolean[] mask = fm.apply(SPECIES.length());
 288         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 289 
 290         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 291             for (int i = 0; i < a.length; i += SPECIES.length()) {
 292                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 293                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 294                 av.lanewise(VectorOperators.FIRST_NONZERO, bv, vmask).intoArray(r, i);
 295             }
 296         }
 297 
 298         bh.consume(r);
 299     }
 300 
 301     @Benchmark
 302     public void AND(Blackhole bh) {
 303         short[] a = fa.apply(SPECIES.length());
 304         short[] b = fb.apply(SPECIES.length());
 305         short[] r = fr.apply(SPECIES.length());
 306 
 307         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 308             for (int i = 0; i < a.length; i += SPECIES.length()) {
 309                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 310                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 311                 av.lanewise(VectorOperators.AND, bv).intoArray(r, i);
 312             }
 313         }
 314 
 315         bh.consume(r);
 316     }
 317 
 318     @Benchmark
 319     public void ANDMasked(Blackhole bh) {
 320         short[] a = fa.apply(SPECIES.length());
 321         short[] b = fb.apply(SPECIES.length());
 322         short[] r = fr.apply(SPECIES.length());
 323         boolean[] mask = fm.apply(SPECIES.length());
 324         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 325 
 326         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 327             for (int i = 0; i < a.length; i += SPECIES.length()) {
 328                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 329                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 330                 av.lanewise(VectorOperators.AND, bv, vmask).intoArray(r, i);
 331             }
 332         }
 333 
 334         bh.consume(r);
 335     }
 336 
 337     @Benchmark
 338     public void AND_NOT(Blackhole bh) {
 339         short[] a = fa.apply(SPECIES.length());
 340         short[] b = fb.apply(SPECIES.length());
 341         short[] r = fr.apply(SPECIES.length());
 342 
 343         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 344             for (int i = 0; i < a.length; i += SPECIES.length()) {
 345                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 346                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 347                 av.lanewise(VectorOperators.AND_NOT, bv).intoArray(r, i);
 348             }
 349         }
 350 
 351         bh.consume(r);
 352     }
 353 
 354     @Benchmark
 355     public void AND_NOTMasked(Blackhole bh) {
 356         short[] a = fa.apply(SPECIES.length());
 357         short[] b = fb.apply(SPECIES.length());
 358         short[] r = fr.apply(SPECIES.length());
 359         boolean[] mask = fm.apply(SPECIES.length());
 360         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 361 
 362         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 363             for (int i = 0; i < a.length; i += SPECIES.length()) {
 364                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 365                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 366                 av.lanewise(VectorOperators.AND_NOT, bv, vmask).intoArray(r, i);
 367             }
 368         }
 369 
 370         bh.consume(r);
 371     }
 372 
 373     @Benchmark
 374     public void OR(Blackhole bh) {
 375         short[] a = fa.apply(SPECIES.length());
 376         short[] b = fb.apply(SPECIES.length());
 377         short[] r = fr.apply(SPECIES.length());
 378 
 379         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 380             for (int i = 0; i < a.length; i += SPECIES.length()) {
 381                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 382                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 383                 av.lanewise(VectorOperators.OR, bv).intoArray(r, i);
 384             }
 385         }
 386 
 387         bh.consume(r);
 388     }
 389 
 390     @Benchmark
 391     public void ORMasked(Blackhole bh) {
 392         short[] a = fa.apply(SPECIES.length());
 393         short[] b = fb.apply(SPECIES.length());
 394         short[] r = fr.apply(SPECIES.length());
 395         boolean[] mask = fm.apply(SPECIES.length());
 396         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 397 
 398         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 399             for (int i = 0; i < a.length; i += SPECIES.length()) {
 400                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 401                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 402                 av.lanewise(VectorOperators.OR, bv, vmask).intoArray(r, i);
 403             }
 404         }
 405 
 406         bh.consume(r);
 407     }
 408 
 409     @Benchmark
 410     public void XOR(Blackhole bh) {
 411         short[] a = fa.apply(SPECIES.length());
 412         short[] b = fb.apply(SPECIES.length());
 413         short[] r = fr.apply(SPECIES.length());
 414 
 415         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 416             for (int i = 0; i < a.length; i += SPECIES.length()) {
 417                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 418                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 419                 av.lanewise(VectorOperators.XOR, bv).intoArray(r, i);
 420             }
 421         }
 422 
 423         bh.consume(r);
 424     }
 425 
 426     @Benchmark
 427     public void XORMasked(Blackhole bh) {
 428         short[] a = fa.apply(SPECIES.length());
 429         short[] b = fb.apply(SPECIES.length());
 430         short[] r = fr.apply(SPECIES.length());
 431         boolean[] mask = fm.apply(SPECIES.length());
 432         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 433 
 434         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 435             for (int i = 0; i < a.length; i += SPECIES.length()) {
 436                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 437                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 438                 av.lanewise(VectorOperators.XOR, bv, vmask).intoArray(r, i);
 439             }
 440         }
 441 
 442         bh.consume(r);
 443     }
 444 
 445     @Benchmark
 446     public void LSHL(Blackhole bh) {
 447         short[] a = fa.apply(SPECIES.length());
 448         short[] b = fb.apply(SPECIES.length());
 449         short[] r = fr.apply(SPECIES.length());
 450 
 451         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 452             for (int i = 0; i < a.length; i += SPECIES.length()) {
 453                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 454                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 455                 av.lanewise(VectorOperators.LSHL, bv).intoArray(r, i);
 456             }
 457         }
 458 
 459         bh.consume(r);
 460     }
 461 
 462     @Benchmark
 463     public void LSHLMasked(Blackhole bh) {
 464         short[] a = fa.apply(SPECIES.length());
 465         short[] b = fb.apply(SPECIES.length());
 466         short[] r = fr.apply(SPECIES.length());
 467         boolean[] mask = fm.apply(SPECIES.length());
 468         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 469 
 470         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 471             for (int i = 0; i < a.length; i += SPECIES.length()) {
 472                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 473                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 474                 av.lanewise(VectorOperators.LSHL, bv, vmask).intoArray(r, i);
 475             }
 476         }
 477 
 478         bh.consume(r);
 479     }
 480 
 481     @Benchmark
 482     public void ASHR(Blackhole bh) {
 483         short[] a = fa.apply(SPECIES.length());
 484         short[] b = fb.apply(SPECIES.length());
 485         short[] r = fr.apply(SPECIES.length());
 486 
 487         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 488             for (int i = 0; i < a.length; i += SPECIES.length()) {
 489                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 490                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 491                 av.lanewise(VectorOperators.ASHR, bv).intoArray(r, i);
 492             }
 493         }
 494 
 495         bh.consume(r);
 496     }
 497 
 498     @Benchmark
 499     public void ASHRMasked(Blackhole bh) {
 500         short[] a = fa.apply(SPECIES.length());
 501         short[] b = fb.apply(SPECIES.length());
 502         short[] r = fr.apply(SPECIES.length());
 503         boolean[] mask = fm.apply(SPECIES.length());
 504         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 505 
 506         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 507             for (int i = 0; i < a.length; i += SPECIES.length()) {
 508                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 509                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 510                 av.lanewise(VectorOperators.ASHR, bv, vmask).intoArray(r, i);
 511             }
 512         }
 513 
 514         bh.consume(r);
 515     }
 516 
 517     @Benchmark
 518     public void LSHR(Blackhole bh) {
 519         short[] a = fa.apply(SPECIES.length());
 520         short[] b = fb.apply(SPECIES.length());
 521         short[] r = fr.apply(SPECIES.length());
 522 
 523         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 524             for (int i = 0; i < a.length; i += SPECIES.length()) {
 525                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 526                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 527                 av.lanewise(VectorOperators.LSHR, bv).intoArray(r, i);
 528             }
 529         }
 530 
 531         bh.consume(r);
 532     }
 533 
 534     @Benchmark
 535     public void LSHRMasked(Blackhole bh) {
 536         short[] a = fa.apply(SPECIES.length());
 537         short[] b = fb.apply(SPECIES.length());
 538         short[] r = fr.apply(SPECIES.length());
 539         boolean[] mask = fm.apply(SPECIES.length());
 540         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 541 
 542         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 543             for (int i = 0; i < a.length; i += SPECIES.length()) {
 544                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 545                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 546                 av.lanewise(VectorOperators.LSHR, bv, vmask).intoArray(r, i);
 547             }
 548         }
 549 
 550         bh.consume(r);
 551     }
 552 
 553     @Benchmark
 554     public void LSHLShift(Blackhole bh) {
 555         short[] a = fa.apply(SPECIES.length());
 556         short[] b = fb.apply(SPECIES.length());
 557         short[] r = fr.apply(SPECIES.length());
 558 
 559         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 560             for (int i = 0; i < a.length; i += SPECIES.length()) {
 561                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 562                 av.lanewise(VectorOperators.LSHL, (int)b[i]).intoArray(r, i);
 563             }
 564         }
 565 
 566         bh.consume(r);
 567     }
 568 
 569     @Benchmark
 570     public void LSHLMaskedShift(Blackhole bh) {
 571         short[] a = fa.apply(SPECIES.length());
 572         short[] b = fb.apply(SPECIES.length());
 573         short[] r = fr.apply(SPECIES.length());
 574         boolean[] mask = fm.apply(SPECIES.length());
 575         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 576 
 577         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 578             for (int i = 0; i < a.length; i += SPECIES.length()) {
 579                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 580                 av.lanewise(VectorOperators.LSHL, (int)b[i], vmask).intoArray(r, i);
 581             }
 582         }
 583 
 584         bh.consume(r);
 585     }
 586 
 587     @Benchmark
 588     public void LSHRShift(Blackhole bh) {
 589         short[] a = fa.apply(SPECIES.length());
 590         short[] b = fb.apply(SPECIES.length());
 591         short[] r = fr.apply(SPECIES.length());
 592 
 593         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 594             for (int i = 0; i < a.length; i += SPECIES.length()) {
 595                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 596                 av.lanewise(VectorOperators.LSHR, (int)b[i]).intoArray(r, i);
 597             }
 598         }
 599 
 600         bh.consume(r);
 601     }
 602 
 603     @Benchmark
 604     public void LSHRMaskedShift(Blackhole bh) {
 605         short[] a = fa.apply(SPECIES.length());
 606         short[] b = fb.apply(SPECIES.length());
 607         short[] r = fr.apply(SPECIES.length());
 608         boolean[] mask = fm.apply(SPECIES.length());
 609         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 610 
 611         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 612             for (int i = 0; i < a.length; i += SPECIES.length()) {
 613                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 614                 av.lanewise(VectorOperators.LSHR, (int)b[i], vmask).intoArray(r, i);
 615             }
 616         }
 617 
 618         bh.consume(r);
 619     }
 620 
 621     @Benchmark
 622     public void ASHRShift(Blackhole bh) {
 623         short[] a = fa.apply(SPECIES.length());
 624         short[] b = fb.apply(SPECIES.length());
 625         short[] r = fr.apply(SPECIES.length());
 626 
 627         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 628             for (int i = 0; i < a.length; i += SPECIES.length()) {
 629                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 630                 av.lanewise(VectorOperators.ASHR, (int)b[i]).intoArray(r, i);
 631             }
 632         }
 633 
 634         bh.consume(r);
 635     }
 636 
 637     @Benchmark
 638     public void ASHRMaskedShift(Blackhole bh) {
 639         short[] a = fa.apply(SPECIES.length());
 640         short[] b = fb.apply(SPECIES.length());
 641         short[] r = fr.apply(SPECIES.length());
 642         boolean[] mask = fm.apply(SPECIES.length());
 643         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 644 
 645         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 646             for (int i = 0; i < a.length; i += SPECIES.length()) {
 647                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 648                 av.lanewise(VectorOperators.ASHR, (int)b[i], vmask).intoArray(r, i);
 649             }
 650         }
 651 
 652         bh.consume(r);
 653     }
 654 
 655     @Benchmark
 656     public void ROR(Blackhole bh) {
 657         short[] a = fa.apply(SPECIES.length());
 658         short[] b = fb.apply(SPECIES.length());
 659         short[] r = fr.apply(SPECIES.length());
 660 
 661         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 662             for (int i = 0; i < a.length; i += SPECIES.length()) {
 663                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 664                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 665                 av.lanewise(VectorOperators.ROR, bv).intoArray(r, i);
 666             }
 667         }
 668 
 669         bh.consume(r);
 670     }
 671 
 672     @Benchmark
 673     public void RORMasked(Blackhole bh) {
 674         short[] a = fa.apply(SPECIES.length());
 675         short[] b = fb.apply(SPECIES.length());
 676         short[] r = fr.apply(SPECIES.length());
 677         boolean[] mask = fm.apply(SPECIES.length());
 678         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 679 
 680         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 681             for (int i = 0; i < a.length; i += SPECIES.length()) {
 682                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 683                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 684                 av.lanewise(VectorOperators.ROR, bv, vmask).intoArray(r, i);
 685             }
 686         }
 687 
 688         bh.consume(r);
 689     }
 690 
 691     @Benchmark
 692     public void ROL(Blackhole bh) {
 693         short[] a = fa.apply(SPECIES.length());
 694         short[] b = fb.apply(SPECIES.length());
 695         short[] r = fr.apply(SPECIES.length());
 696 
 697         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 698             for (int i = 0; i < a.length; i += SPECIES.length()) {
 699                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 700                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 701                 av.lanewise(VectorOperators.ROL, bv).intoArray(r, i);
 702             }
 703         }
 704 
 705         bh.consume(r);
 706     }
 707 
 708     @Benchmark
 709     public void ROLMasked(Blackhole bh) {
 710         short[] a = fa.apply(SPECIES.length());
 711         short[] b = fb.apply(SPECIES.length());
 712         short[] r = fr.apply(SPECIES.length());
 713         boolean[] mask = fm.apply(SPECIES.length());
 714         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 715 
 716         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 717             for (int i = 0; i < a.length; i += SPECIES.length()) {
 718                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 719                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 720                 av.lanewise(VectorOperators.ROL, bv, vmask).intoArray(r, i);
 721             }
 722         }
 723 
 724         bh.consume(r);
 725     }
 726 
 727     @Benchmark
 728     public void RORShift(Blackhole bh) {
 729         short[] a = fa.apply(SPECIES.length());
 730         short[] b = fb.apply(SPECIES.length());
 731         short[] r = fr.apply(SPECIES.length());
 732 
 733         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 734             for (int i = 0; i < a.length; i += SPECIES.length()) {
 735                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 736                 av.lanewise(VectorOperators.ROR, (int)b[i]).intoArray(r, i);
 737             }
 738         }
 739 
 740         bh.consume(r);
 741     }
 742 
 743     @Benchmark
 744     public void RORMaskedShift(Blackhole bh) {
 745         short[] a = fa.apply(SPECIES.length());
 746         short[] b = fb.apply(SPECIES.length());
 747         short[] r = fr.apply(SPECIES.length());
 748         boolean[] mask = fm.apply(SPECIES.length());
 749         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 750 
 751         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 752             for (int i = 0; i < a.length; i += SPECIES.length()) {
 753                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 754                 av.lanewise(VectorOperators.ROR, (int)b[i], vmask).intoArray(r, i);
 755             }
 756         }
 757 
 758         bh.consume(r);
 759     }
 760 
 761     @Benchmark
 762     public void ROLShift(Blackhole bh) {
 763         short[] a = fa.apply(SPECIES.length());
 764         short[] b = fb.apply(SPECIES.length());
 765         short[] r = fr.apply(SPECIES.length());
 766 
 767         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 768             for (int i = 0; i < a.length; i += SPECIES.length()) {
 769                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 770                 av.lanewise(VectorOperators.ROL, (int)b[i]).intoArray(r, i);
 771             }
 772         }
 773 
 774         bh.consume(r);
 775     }
 776 
 777     @Benchmark
 778     public void ROLMaskedShift(Blackhole bh) {
 779         short[] a = fa.apply(SPECIES.length());
 780         short[] b = fb.apply(SPECIES.length());
 781         short[] r = fr.apply(SPECIES.length());
 782         boolean[] mask = fm.apply(SPECIES.length());
 783         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 784 
 785         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 786             for (int i = 0; i < a.length; i += SPECIES.length()) {
 787                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 788                 av.lanewise(VectorOperators.ROL, (int)b[i], vmask).intoArray(r, i);
 789             }
 790         }
 791 
 792         bh.consume(r);
 793     }
 794 
 795     @Benchmark
 796     public void LSHRShiftConst(Blackhole bh) {
 797         short[] a = fa.apply(SPECIES.length());
 798         short[] r = fr.apply(SPECIES.length());
 799 
 800         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 801             for (int i = 0; i < a.length; i += SPECIES.length()) {
 802                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 803                 av.lanewise(VectorOperators.LSHR, CONST_SHIFT).intoArray(r, i);
 804             }
 805         }
 806 
 807         bh.consume(r);
 808     }
 809 
 810     @Benchmark
 811     public void LSHRMaskedShiftConst(Blackhole bh) {
 812         short[] a = fa.apply(SPECIES.length());
 813         short[] r = fr.apply(SPECIES.length());
 814         boolean[] mask = fm.apply(SPECIES.length());
 815         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 816 
 817         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 818             for (int i = 0; i < a.length; i += SPECIES.length()) {
 819                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 820                 av.lanewise(VectorOperators.LSHR, CONST_SHIFT, vmask).intoArray(r, i);
 821             }
 822         }
 823 
 824         bh.consume(r);
 825     }
 826 
 827     @Benchmark
 828     public void LSHLShiftConst(Blackhole bh) {
 829         short[] a = fa.apply(SPECIES.length());
 830         short[] r = fr.apply(SPECIES.length());
 831 
 832         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 833             for (int i = 0; i < a.length; i += SPECIES.length()) {
 834                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 835                 av.lanewise(VectorOperators.LSHL, CONST_SHIFT).intoArray(r, i);
 836             }
 837         }
 838 
 839         bh.consume(r);
 840     }
 841 
 842     @Benchmark
 843     public void LSHLMaskedShiftConst(Blackhole bh) {
 844         short[] a = fa.apply(SPECIES.length());
 845         short[] r = fr.apply(SPECIES.length());
 846         boolean[] mask = fm.apply(SPECIES.length());
 847         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 848 
 849         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 850             for (int i = 0; i < a.length; i += SPECIES.length()) {
 851                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 852                 av.lanewise(VectorOperators.LSHL, CONST_SHIFT, vmask).intoArray(r, i);
 853             }
 854         }
 855 
 856         bh.consume(r);
 857     }
 858 
 859     @Benchmark
 860     public void ASHRShiftConst(Blackhole bh) {
 861         short[] a = fa.apply(SPECIES.length());
 862         short[] r = fr.apply(SPECIES.length());
 863 
 864         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 865             for (int i = 0; i < a.length; i += SPECIES.length()) {
 866                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 867                 av.lanewise(VectorOperators.ASHR, CONST_SHIFT).intoArray(r, i);
 868             }
 869         }
 870 
 871         bh.consume(r);
 872     }
 873 
 874     @Benchmark
 875     public void ASHRMaskedShiftConst(Blackhole bh) {
 876         short[] a = fa.apply(SPECIES.length());
 877         short[] r = fr.apply(SPECIES.length());
 878         boolean[] mask = fm.apply(SPECIES.length());
 879         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 880 
 881         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 882             for (int i = 0; i < a.length; i += SPECIES.length()) {
 883                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 884                 av.lanewise(VectorOperators.ASHR, CONST_SHIFT, vmask).intoArray(r, i);
 885             }
 886         }
 887 
 888         bh.consume(r);
 889     }
 890 
 891     @Benchmark
 892     public void RORShiftConst(Blackhole bh) {
 893         short[] a = fa.apply(SPECIES.length());
 894         short[] r = fr.apply(SPECIES.length());
 895 
 896         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 897             for (int i = 0; i < a.length; i += SPECIES.length()) {
 898                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 899                 av.lanewise(VectorOperators.ROR, CONST_SHIFT).intoArray(r, i);
 900             }
 901         }
 902 
 903         bh.consume(r);
 904     }
 905 
 906     @Benchmark
 907     public void RORMaskedShiftConst(Blackhole bh) {
 908         short[] a = fa.apply(SPECIES.length());
 909         short[] r = fr.apply(SPECIES.length());
 910         boolean[] mask = fm.apply(SPECIES.length());
 911         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 912 
 913         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 914             for (int i = 0; i < a.length; i += SPECIES.length()) {
 915                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 916                 av.lanewise(VectorOperators.ROR, CONST_SHIFT, vmask).intoArray(r, i);
 917             }
 918         }
 919 
 920         bh.consume(r);
 921     }
 922 
 923     @Benchmark
 924     public void ROLShiftConst(Blackhole bh) {
 925         short[] a = fa.apply(SPECIES.length());
 926         short[] r = fr.apply(SPECIES.length());
 927 
 928         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 929             for (int i = 0; i < a.length; i += SPECIES.length()) {
 930                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 931                 av.lanewise(VectorOperators.ROL, CONST_SHIFT).intoArray(r, i);
 932             }
 933         }
 934 
 935         bh.consume(r);
 936     }
 937 
 938     @Benchmark
 939     public void ROLMaskedShiftConst(Blackhole bh) {
 940         short[] a = fa.apply(SPECIES.length());
 941         short[] r = fr.apply(SPECIES.length());
 942         boolean[] mask = fm.apply(SPECIES.length());
 943         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
 944 
 945         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 946             for (int i = 0; i < a.length; i += SPECIES.length()) {
 947                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 948                 av.lanewise(VectorOperators.ROL, CONST_SHIFT, vmask).intoArray(r, i);
 949             }
 950         }
 951 
 952         bh.consume(r);
 953     }
 954 
 955     @Benchmark
 956     public void MIN(Blackhole bh) {
 957         short[] a = fa.apply(SPECIES.length());
 958         short[] b = fb.apply(SPECIES.length());
 959         short[] r = fr.apply(SPECIES.length());
 960 
 961         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 962             for (int i = 0; i < a.length; i += SPECIES.length()) {
 963                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 964                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 965                 av.lanewise(VectorOperators.MIN, bv).intoArray(r, i);
 966             }
 967         }
 968 
 969         bh.consume(r);
 970     }
 971 
 972     @Benchmark
 973     public void MAX(Blackhole bh) {
 974         short[] a = fa.apply(SPECIES.length());
 975         short[] b = fb.apply(SPECIES.length());
 976         short[] r = fr.apply(SPECIES.length());
 977 
 978         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 979             for (int i = 0; i < a.length; i += SPECIES.length()) {
 980                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 981                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 982                 av.lanewise(VectorOperators.MAX, bv).intoArray(r, i);
 983             }
 984         }
 985 
 986         bh.consume(r);
 987     }
 988 
 989     @Benchmark
 990     public void ANDLanes(Blackhole bh) {
 991         short[] a = fa.apply(SPECIES.length());
 992         short ra = -1;
 993 
 994         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 995             ra = -1;
 996             for (int i = 0; i < a.length; i += SPECIES.length()) {
 997                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 998                 ra &= av.reduceLanes(VectorOperators.AND);
 999             }
1000         }
1001         bh.consume(ra);
1002     }
1003 
1004     @Benchmark
1005     public void ANDMaskedLanes(Blackhole bh) {
1006         short[] a = fa.apply(SPECIES.length());
1007         boolean[] mask = fm.apply(SPECIES.length());
1008         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1009         short ra = -1;
1010 
1011         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1012             ra = -1;
1013             for (int i = 0; i < a.length; i += SPECIES.length()) {
1014                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1015                 ra &= av.reduceLanes(VectorOperators.AND, vmask);
1016             }
1017         }
1018         bh.consume(ra);
1019     }
1020 
1021     @Benchmark
1022     public void ORLanes(Blackhole bh) {
1023         short[] a = fa.apply(SPECIES.length());
1024         short ra = 0;
1025 
1026         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1027             ra = 0;
1028             for (int i = 0; i < a.length; i += SPECIES.length()) {
1029                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1030                 ra |= av.reduceLanes(VectorOperators.OR);
1031             }
1032         }
1033         bh.consume(ra);
1034     }
1035 
1036     @Benchmark
1037     public void ORMaskedLanes(Blackhole bh) {
1038         short[] a = fa.apply(SPECIES.length());
1039         boolean[] mask = fm.apply(SPECIES.length());
1040         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1041         short ra = 0;
1042 
1043         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1044             ra = 0;
1045             for (int i = 0; i < a.length; i += SPECIES.length()) {
1046                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1047                 ra |= av.reduceLanes(VectorOperators.OR, vmask);
1048             }
1049         }
1050         bh.consume(ra);
1051     }
1052 
1053     @Benchmark
1054     public void XORLanes(Blackhole bh) {
1055         short[] a = fa.apply(SPECIES.length());
1056         short ra = 0;
1057 
1058         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1059             ra = 0;
1060             for (int i = 0; i < a.length; i += SPECIES.length()) {
1061                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1062                 ra ^= av.reduceLanes(VectorOperators.XOR);
1063             }
1064         }
1065         bh.consume(ra);
1066     }
1067 
1068     @Benchmark
1069     public void XORMaskedLanes(Blackhole bh) {
1070         short[] a = fa.apply(SPECIES.length());
1071         boolean[] mask = fm.apply(SPECIES.length());
1072         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1073         short ra = 0;
1074 
1075         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1076             ra = 0;
1077             for (int i = 0; i < a.length; i += SPECIES.length()) {
1078                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1079                 ra ^= av.reduceLanes(VectorOperators.XOR, vmask);
1080             }
1081         }
1082         bh.consume(ra);
1083     }
1084 
1085     @Benchmark
1086     public void ADDLanes(Blackhole bh) {
1087         short[] a = fa.apply(SPECIES.length());
1088         short ra = 0;
1089 
1090         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1091             ra = 0;
1092             for (int i = 0; i < a.length; i += SPECIES.length()) {
1093                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1094                 ra += av.reduceLanes(VectorOperators.ADD);
1095             }
1096         }
1097         bh.consume(ra);
1098     }
1099 
1100     @Benchmark
1101     public void ADDMaskedLanes(Blackhole bh) {
1102         short[] a = fa.apply(SPECIES.length());
1103         boolean[] mask = fm.apply(SPECIES.length());
1104         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1105         short ra = 0;
1106 
1107         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1108             ra = 0;
1109             for (int i = 0; i < a.length; i += SPECIES.length()) {
1110                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1111                 ra += av.reduceLanes(VectorOperators.ADD, vmask);
1112             }
1113         }
1114         bh.consume(ra);
1115     }
1116 
1117     @Benchmark
1118     public void MULLanes(Blackhole bh) {
1119         short[] a = fa.apply(SPECIES.length());
1120         short ra = 1;
1121 
1122         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1123             ra = 1;
1124             for (int i = 0; i < a.length; i += SPECIES.length()) {
1125                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1126                 ra *= av.reduceLanes(VectorOperators.MUL);
1127             }
1128         }
1129         bh.consume(ra);
1130     }
1131 
1132     @Benchmark
1133     public void MULMaskedLanes(Blackhole bh) {
1134         short[] a = fa.apply(SPECIES.length());
1135         boolean[] mask = fm.apply(SPECIES.length());
1136         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1137         short ra = 1;
1138 
1139         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1140             ra = 1;
1141             for (int i = 0; i < a.length; i += SPECIES.length()) {
1142                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1143                 ra *= av.reduceLanes(VectorOperators.MUL, vmask);
1144             }
1145         }
1146         bh.consume(ra);
1147     }
1148 
1149     @Benchmark
1150     public void MINLanes(Blackhole bh) {
1151         short[] a = fa.apply(SPECIES.length());
1152         short ra = Short.MAX_VALUE;
1153 
1154         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1155             ra = Short.MAX_VALUE;
1156             for (int i = 0; i < a.length; i += SPECIES.length()) {
1157                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1158                 ra = (short) Math.min(ra, av.reduceLanes(VectorOperators.MIN));
1159             }
1160         }
1161         bh.consume(ra);
1162     }
1163 
1164     @Benchmark
1165     public void MINMaskedLanes(Blackhole bh) {
1166         short[] a = fa.apply(SPECIES.length());
1167         boolean[] mask = fm.apply(SPECIES.length());
1168         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1169         short ra = Short.MAX_VALUE;
1170 
1171         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1172             ra = Short.MAX_VALUE;
1173             for (int i = 0; i < a.length; i += SPECIES.length()) {
1174                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1175                 ra = (short) Math.min(ra, av.reduceLanes(VectorOperators.MIN, vmask));
1176             }
1177         }
1178         bh.consume(ra);
1179     }
1180 
1181     @Benchmark
1182     public void MAXLanes(Blackhole bh) {
1183         short[] a = fa.apply(SPECIES.length());
1184         short ra = Short.MIN_VALUE;
1185 
1186         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1187             ra = Short.MIN_VALUE;
1188             for (int i = 0; i < a.length; i += SPECIES.length()) {
1189                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1190                 ra = (short) Math.max(ra, av.reduceLanes(VectorOperators.MAX));
1191             }
1192         }
1193         bh.consume(ra);
1194     }
1195 
1196     @Benchmark
1197     public void MAXMaskedLanes(Blackhole bh) {
1198         short[] a = fa.apply(SPECIES.length());
1199         boolean[] mask = fm.apply(SPECIES.length());
1200         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1201         short ra = Short.MIN_VALUE;
1202 
1203         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1204             ra = Short.MIN_VALUE;
1205             for (int i = 0; i < a.length; i += SPECIES.length()) {
1206                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1207                 ra = (short) Math.max(ra, av.reduceLanes(VectorOperators.MAX, vmask));
1208             }
1209         }
1210         bh.consume(ra);
1211     }
1212 
1213     @Benchmark
1214     public void FIRST_NONZEROLanes(Blackhole bh) {
1215         short[] a = fa.apply(SPECIES.length());
1216         short ra = (short) 0;
1217 
1218         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1219             ra = (short) 0;
1220             for (int i = 0; i < a.length; i += SPECIES.length()) {
1221                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1222                 ra = firstNonZero(ra, av.reduceLanes(VectorOperators.FIRST_NONZERO));
1223             }
1224         }
1225         bh.consume(ra);
1226     }
1227 
1228     @Benchmark
1229     public void FIRST_NONZEROMaskedLanes(Blackhole bh) {
1230         short[] a = fa.apply(SPECIES.length());
1231         boolean[] mask = fm.apply(SPECIES.length());
1232         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1233         short ra = (short) 0;
1234 
1235         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1236             ra = (short) 0;
1237             for (int i = 0; i < a.length; i += SPECIES.length()) {
1238                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1239                 ra = firstNonZero(ra, av.reduceLanes(VectorOperators.FIRST_NONZERO, vmask));
1240             }
1241         }
1242         bh.consume(ra);
1243     }
1244 
1245     @Benchmark
1246     public void anyTrue(Blackhole bh) {
1247         boolean[] mask = fm.apply(SPECIES.length());
1248         boolean[] r = fmr.apply(SPECIES.length());
1249 
1250         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1251             for (int i = 0; i < mask.length; i += SPECIES.length()) {
1252                 VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, i);
1253                 r[i] = vmask.anyTrue();
1254             }
1255         }
1256 
1257         bh.consume(r);
1258     }
1259 
1260     @Benchmark
1261     public void allTrue(Blackhole bh) {
1262         boolean[] mask = fm.apply(SPECIES.length());
1263         boolean[] r = fmr.apply(SPECIES.length());
1264 
1265         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1266             for (int i = 0; i < mask.length; i += SPECIES.length()) {
1267                 VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, i);
1268                 r[i] = vmask.allTrue();
1269             }
1270         }
1271 
1272         bh.consume(r);
1273     }
1274 
1275     @Benchmark
1276     public void withLane(Blackhole bh) {
1277         short[] a = fa.apply(SPECIES.length());
1278         short[] r = fr.apply(SPECIES.length());
1279 
1280         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1281             for (int i = 0, j = 0; i < a.length; i += SPECIES.length()) {
1282                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1283                 av.withLane((j++ & (SPECIES.length()-1)), (short)(65535+i)).intoArray(r, i);
1284             }
1285         }
1286 
1287         bh.consume(r);
1288     }
1289 
1290     @Benchmark
1291     public Object IS_DEFAULT() {
1292         short[] a = fa.apply(size);
1293         boolean[] ms = fmt.apply(size);
1294         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1295 
1296         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1297             for (int i = 0; i < a.length; i += SPECIES.length()) {
1298                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1299 
1300                 // accumulate results, so JIT can't eliminate relevant computations
1301                 m = m.and(av.test(VectorOperators.IS_DEFAULT));
1302             }
1303         }
1304 
1305         return m;
1306     }
1307 
1308     @Benchmark
1309     public Object IS_NEGATIVE() {
1310         short[] a = fa.apply(size);
1311         boolean[] ms = fmt.apply(size);
1312         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1313 
1314         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1315             for (int i = 0; i < a.length; i += SPECIES.length()) {
1316                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1317 
1318                 // accumulate results, so JIT can't eliminate relevant computations
1319                 m = m.and(av.test(VectorOperators.IS_NEGATIVE));
1320             }
1321         }
1322 
1323         return m;
1324     }
1325     @Benchmark
1326     public Object LT() {
1327         short[] a = fa.apply(size);
1328         short[] b = fb.apply(size);
1329         boolean[] ms = fmt.apply(size);
1330         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1331 
1332         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1333             for (int i = 0; i < a.length; i += SPECIES.length()) {
1334                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1335                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1336 
1337                 // accumulate results, so JIT can't eliminate relevant computations
1338                 m = m.and(av.compare(VectorOperators.LT, bv));
1339             }
1340         }
1341 
1342         return m;
1343     }
1344     @Benchmark
1345     public Object GT() {
1346         short[] a = fa.apply(size);
1347         short[] b = fb.apply(size);
1348         boolean[] ms = fmt.apply(size);
1349         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1350 
1351         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1352             for (int i = 0; i < a.length; i += SPECIES.length()) {
1353                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1354                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1355 
1356                 // accumulate results, so JIT can't eliminate relevant computations
1357                 m = m.and(av.compare(VectorOperators.GT, bv));
1358             }
1359         }
1360 
1361         return m;
1362     }
1363     @Benchmark
1364     public Object EQ() {
1365         short[] a = fa.apply(size);
1366         short[] b = fb.apply(size);
1367         boolean[] ms = fmt.apply(size);
1368         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1369 
1370         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1371             for (int i = 0; i < a.length; i += SPECIES.length()) {
1372                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1373                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1374 
1375                 // accumulate results, so JIT can't eliminate relevant computations
1376                 m = m.and(av.compare(VectorOperators.EQ, bv));
1377             }
1378         }
1379 
1380         return m;
1381     }
1382     @Benchmark
1383     public Object NE() {
1384         short[] a = fa.apply(size);
1385         short[] b = fb.apply(size);
1386         boolean[] ms = fmt.apply(size);
1387         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1388 
1389         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1390             for (int i = 0; i < a.length; i += SPECIES.length()) {
1391                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1392                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1393 
1394                 // accumulate results, so JIT can't eliminate relevant computations
1395                 m = m.and(av.compare(VectorOperators.NE, bv));
1396             }
1397         }
1398 
1399         return m;
1400     }
1401     @Benchmark
1402     public Object LE() {
1403         short[] a = fa.apply(size);
1404         short[] b = fb.apply(size);
1405         boolean[] ms = fmt.apply(size);
1406         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1407 
1408         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1409             for (int i = 0; i < a.length; i += SPECIES.length()) {
1410                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1411                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1412 
1413                 // accumulate results, so JIT can't eliminate relevant computations
1414                 m = m.and(av.compare(VectorOperators.LE, bv));
1415             }
1416         }
1417 
1418         return m;
1419     }
1420     @Benchmark
1421     public Object GE() {
1422         short[] a = fa.apply(size);
1423         short[] b = fb.apply(size);
1424         boolean[] ms = fmt.apply(size);
1425         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1426 
1427         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1428             for (int i = 0; i < a.length; i += SPECIES.length()) {
1429                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1430                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1431 
1432                 // accumulate results, so JIT can't eliminate relevant computations
1433                 m = m.and(av.compare(VectorOperators.GE, bv));
1434             }
1435         }
1436 
1437         return m;
1438     }
1439     @Benchmark
1440     public Object UNSIGNED_LT() {
1441         short[] a = fa.apply(size);
1442         short[] b = fb.apply(size);
1443         boolean[] ms = fmt.apply(size);
1444         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1445 
1446         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1447             for (int i = 0; i < a.length; i += SPECIES.length()) {
1448                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1449                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1450 
1451                 // accumulate results, so JIT can't eliminate relevant computations
1452                 m = m.and(av.compare(VectorOperators.UNSIGNED_LT, bv));
1453             }
1454         }
1455 
1456         return m;
1457     }
1458     @Benchmark
1459     public Object UNSIGNED_GT() {
1460         short[] a = fa.apply(size);
1461         short[] b = fb.apply(size);
1462         boolean[] ms = fmt.apply(size);
1463         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1464 
1465         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1466             for (int i = 0; i < a.length; i += SPECIES.length()) {
1467                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1468                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1469 
1470                 // accumulate results, so JIT can't eliminate relevant computations
1471                 m = m.and(av.compare(VectorOperators.UNSIGNED_GT, bv));
1472             }
1473         }
1474 
1475         return m;
1476     }
1477     @Benchmark
1478     public Object UNSIGNED_LE() {
1479         short[] a = fa.apply(size);
1480         short[] b = fb.apply(size);
1481         boolean[] ms = fmt.apply(size);
1482         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1483 
1484         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1485             for (int i = 0; i < a.length; i += SPECIES.length()) {
1486                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1487                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1488 
1489                 // accumulate results, so JIT can't eliminate relevant computations
1490                 m = m.and(av.compare(VectorOperators.UNSIGNED_LE, bv));
1491             }
1492         }
1493 
1494         return m;
1495     }
1496     @Benchmark
1497     public Object UNSIGNED_GE() {
1498         short[] a = fa.apply(size);
1499         short[] b = fb.apply(size);
1500         boolean[] ms = fmt.apply(size);
1501         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1502 
1503         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1504             for (int i = 0; i < a.length; i += SPECIES.length()) {
1505                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1506                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1507 
1508                 // accumulate results, so JIT can't eliminate relevant computations
1509                 m = m.and(av.compare(VectorOperators.UNSIGNED_GE, bv));
1510             }
1511         }
1512 
1513         return m;
1514     }
1515 
1516     @Benchmark
1517     public void blend(Blackhole bh) {
1518         short[] a = fa.apply(SPECIES.length());
1519         short[] b = fb.apply(SPECIES.length());
1520         short[] r = fr.apply(SPECIES.length());
1521         boolean[] mask = fm.apply(SPECIES.length());
1522         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1523 
1524         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1525             for (int i = 0; i < a.length; i += SPECIES.length()) {
1526                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1527                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1528                 av.blend(bv, vmask).intoArray(r, i);
1529             }
1530         }
1531 
1532         bh.consume(r);
1533     }
1534 
1535     @Benchmark
1536     public void rearrange(Blackhole bh) {
1537         short[] a = fa.apply(SPECIES.length());
1538         int[] order = fs.apply(a.length, SPECIES.length());
1539         short[] r = fr.apply(SPECIES.length());
1540 
1541         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1542             for (int i = 0; i < a.length; i += SPECIES.length()) {
1543                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1544                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
1545             }
1546         }
1547 
1548         bh.consume(r);
1549     }
1550     @Benchmark
1551     public Object compress() {
1552         short[] a = fa.apply(size);
1553         short[] r = fb.apply(size);
1554         boolean[] ms = fmt.apply(size);
1555         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1556 
1557         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1558             for (int i = 0; i < a.length; i += SPECIES.length()) {
1559                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1560                 av.compress(m).intoArray(r, i);
1561             }
1562         }
1563 
1564         return r;
1565     }
1566 
1567     @Benchmark
1568     public Object expand() {
1569         short[] a = fa.apply(size);
1570         short[] r = fb.apply(size);
1571         boolean[] ms = fmt.apply(size);
1572         VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, 0);
1573 
1574         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1575             for (int i = 0; i < a.length; i += SPECIES.length()) {
1576                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1577                 av.expand(m).intoArray(r, i);
1578             }
1579         }
1580 
1581         return r;
1582     }
1583 
1584     @Benchmark
1585     public Object maskCompress() {
1586         boolean[] ms = fmt.apply(size);
1587         boolean[] rs = fmt.apply(size);
1588 
1589         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1590             for (int i = 0, j = 0; i < ms.length; i += SPECIES.length()) {
1591                 VectorMask<Short> m = VectorMask.fromArray(SPECIES, ms, i);
1592                 m.compress().intoArray(rs, j);
1593                 j += m.trueCount();
1594             }
1595         }
1596 
1597         return rs;
1598     }
1599 
1600     @Benchmark
1601     public void laneextract(Blackhole bh) {
1602         short[] a = fa.apply(SPECIES.length());
1603         short[] r = fr.apply(SPECIES.length());
1604 
1605         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1606             for (int i = 0; i < a.length; i += SPECIES.length()) {
1607                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1608                 int num_lanes = SPECIES.length();
1609                 // Manually unroll because full unroll happens after intrinsification.
1610                 // Unroll is needed because get intrinsic requires for index to be a known constant.
1611                 if (num_lanes == 1) {
1612                     r[i]=av.lane(0);
1613                 } else if (num_lanes == 2) {
1614                     r[i]=av.lane(0);
1615                     r[i+1]=av.lane(1);
1616                 } else if (num_lanes == 4) {
1617                     r[i]=av.lane(0);
1618                     r[i+1]=av.lane(1);
1619                     r[i+2]=av.lane(2);
1620                     r[i+3]=av.lane(3);
1621                 } else if (num_lanes == 8) {
1622                     r[i]=av.lane(0);
1623                     r[i+1]=av.lane(1);
1624                     r[i+2]=av.lane(2);
1625                     r[i+3]=av.lane(3);
1626                     r[i+4]=av.lane(4);
1627                     r[i+5]=av.lane(5);
1628                     r[i+6]=av.lane(6);
1629                     r[i+7]=av.lane(7);
1630                 } else if (num_lanes == 16) {
1631                     r[i]=av.lane(0);
1632                     r[i+1]=av.lane(1);
1633                     r[i+2]=av.lane(2);
1634                     r[i+3]=av.lane(3);
1635                     r[i+4]=av.lane(4);
1636                     r[i+5]=av.lane(5);
1637                     r[i+6]=av.lane(6);
1638                     r[i+7]=av.lane(7);
1639                     r[i+8]=av.lane(8);
1640                     r[i+9]=av.lane(9);
1641                     r[i+10]=av.lane(10);
1642                     r[i+11]=av.lane(11);
1643                     r[i+12]=av.lane(12);
1644                     r[i+13]=av.lane(13);
1645                     r[i+14]=av.lane(14);
1646                     r[i+15]=av.lane(15);
1647                 } else if (num_lanes == 32) {
1648                     r[i]=av.lane(0);
1649                     r[i+1]=av.lane(1);
1650                     r[i+2]=av.lane(2);
1651                     r[i+3]=av.lane(3);
1652                     r[i+4]=av.lane(4);
1653                     r[i+5]=av.lane(5);
1654                     r[i+6]=av.lane(6);
1655                     r[i+7]=av.lane(7);
1656                     r[i+8]=av.lane(8);
1657                     r[i+9]=av.lane(9);
1658                     r[i+10]=av.lane(10);
1659                     r[i+11]=av.lane(11);
1660                     r[i+12]=av.lane(12);
1661                     r[i+13]=av.lane(13);
1662                     r[i+14]=av.lane(14);
1663                     r[i+15]=av.lane(15);
1664                     r[i+16]=av.lane(16);
1665                     r[i+17]=av.lane(17);
1666                     r[i+18]=av.lane(18);
1667                     r[i+19]=av.lane(19);
1668                     r[i+20]=av.lane(20);
1669                     r[i+21]=av.lane(21);
1670                     r[i+22]=av.lane(22);
1671                     r[i+23]=av.lane(23);
1672                     r[i+24]=av.lane(24);
1673                     r[i+25]=av.lane(25);
1674                     r[i+26]=av.lane(26);
1675                     r[i+27]=av.lane(27);
1676                     r[i+28]=av.lane(28);
1677                     r[i+29]=av.lane(29);
1678                     r[i+30]=av.lane(30);
1679                     r[i+31]=av.lane(31);
1680                 } else if (num_lanes == 64) {
1681                     r[i]=av.lane(0);
1682                     r[i+1]=av.lane(1);
1683                     r[i+2]=av.lane(2);
1684                     r[i+3]=av.lane(3);
1685                     r[i+4]=av.lane(4);
1686                     r[i+5]=av.lane(5);
1687                     r[i+6]=av.lane(6);
1688                     r[i+7]=av.lane(7);
1689                     r[i+8]=av.lane(8);
1690                     r[i+9]=av.lane(9);
1691                     r[i+10]=av.lane(10);
1692                     r[i+11]=av.lane(11);
1693                     r[i+12]=av.lane(12);
1694                     r[i+13]=av.lane(13);
1695                     r[i+14]=av.lane(14);
1696                     r[i+15]=av.lane(15);
1697                     r[i+16]=av.lane(16);
1698                     r[i+17]=av.lane(17);
1699                     r[i+18]=av.lane(18);
1700                     r[i+19]=av.lane(19);
1701                     r[i+20]=av.lane(20);
1702                     r[i+21]=av.lane(21);
1703                     r[i+22]=av.lane(22);
1704                     r[i+23]=av.lane(23);
1705                     r[i+24]=av.lane(24);
1706                     r[i+25]=av.lane(25);
1707                     r[i+26]=av.lane(26);
1708                     r[i+27]=av.lane(27);
1709                     r[i+28]=av.lane(28);
1710                     r[i+29]=av.lane(29);
1711                     r[i+30]=av.lane(30);
1712                     r[i+31]=av.lane(31);
1713                     r[i+32]=av.lane(32);
1714                     r[i+33]=av.lane(33);
1715                     r[i+34]=av.lane(34);
1716                     r[i+35]=av.lane(35);
1717                     r[i+36]=av.lane(36);
1718                     r[i+37]=av.lane(37);
1719                     r[i+38]=av.lane(38);
1720                     r[i+39]=av.lane(39);
1721                     r[i+40]=av.lane(40);
1722                     r[i+41]=av.lane(41);
1723                     r[i+42]=av.lane(42);
1724                     r[i+43]=av.lane(43);
1725                     r[i+44]=av.lane(44);
1726                     r[i+45]=av.lane(45);
1727                     r[i+46]=av.lane(46);
1728                     r[i+47]=av.lane(47);
1729                     r[i+48]=av.lane(48);
1730                     r[i+49]=av.lane(49);
1731                     r[i+50]=av.lane(50);
1732                     r[i+51]=av.lane(51);
1733                     r[i+52]=av.lane(52);
1734                     r[i+53]=av.lane(53);
1735                     r[i+54]=av.lane(54);
1736                     r[i+55]=av.lane(55);
1737                     r[i+56]=av.lane(56);
1738                     r[i+57]=av.lane(57);
1739                     r[i+58]=av.lane(58);
1740                     r[i+59]=av.lane(59);
1741                     r[i+60]=av.lane(60);
1742                     r[i+61]=av.lane(61);
1743                     r[i+62]=av.lane(62);
1744                     r[i+63]=av.lane(63);
1745                 } else {
1746                     for (int j = 0; j < SPECIES.length(); j++) {
1747                         r[i+j]=av.lane(j);
1748                     }
1749                 }
1750             }
1751         }
1752 
1753         bh.consume(r);
1754     }
1755 
1756     @Benchmark
1757     public void broadcast(Blackhole bh) {
1758         short[] a = fa.apply(SPECIES.length());
1759         short[] r = new short[a.length];
1760 
1761         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1762             for (int i = 0; i < a.length; i += SPECIES.length()) {
1763                 ShortVector.broadcast(SPECIES, a[i]).intoArray(r, i);
1764             }
1765         }
1766 
1767         bh.consume(r);
1768     }
1769 
1770     @Benchmark
1771     public void zero(Blackhole bh) {
1772         short[] a = fa.apply(SPECIES.length());
1773         short[] r = new short[a.length];
1774 
1775         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1776             for (int i = 0; i < a.length; i += SPECIES.length()) {
1777                 ShortVector.zero(SPECIES).intoArray(a, i);
1778             }
1779         }
1780 
1781         bh.consume(r);
1782     }
1783 
1784     @Benchmark
1785     public void sliceUnary(Blackhole bh) {
1786         short[] a = fa.apply(SPECIES.length());
1787         short[] r = new short[a.length];
1788         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1789         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1790             for (int i = 0; i < a.length; i += SPECIES.length()) {
1791                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1792                 av.slice(origin).intoArray(r, i);
1793             }
1794         }
1795 
1796         bh.consume(r);
1797     }
1798 
1799     @Benchmark
1800     public void sliceBinary(Blackhole bh) {
1801         short[] a = fa.apply(SPECIES.length());
1802         short[] b = fb.apply(SPECIES.length());
1803         short[] r = new short[a.length];
1804         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1805         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1806             for (int i = 0; i < a.length; i += SPECIES.length()) {
1807                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1808                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1809                 av.slice(origin, bv).intoArray(r, i);
1810             }
1811         }
1812 
1813         bh.consume(r);
1814     }
1815 
1816     @Benchmark
1817     public void sliceMasked(Blackhole bh) {
1818         short[] a = fa.apply(SPECIES.length());
1819         short[] b = fb.apply(SPECIES.length());
1820         boolean[] mask = fm.apply(SPECIES.length());
1821         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1822 
1823         short[] r = new short[a.length];
1824         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1825         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1826             for (int i = 0; i < a.length; i += SPECIES.length()) {
1827                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1828                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1829                 av.slice(origin, bv, vmask).intoArray(r, i);
1830             }
1831         }
1832 
1833         bh.consume(r);
1834     }
1835 
1836     @Benchmark
1837     public void unsliceUnary(Blackhole bh) {
1838         short[] a = fa.apply(SPECIES.length());
1839         short[] r = new short[a.length];
1840         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1841         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1842             for (int i = 0; i < a.length; i += SPECIES.length()) {
1843                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1844                 av.unslice(origin).intoArray(r, i);
1845             }
1846         }
1847 
1848         bh.consume(r);
1849     }
1850 
1851     @Benchmark
1852     public void unsliceBinary(Blackhole bh) {
1853         short[] a = fa.apply(SPECIES.length());
1854         short[] b = fb.apply(SPECIES.length());
1855         short[] r = new short[a.length];
1856         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1857         int part = (new java.util.Random()).nextInt(2);
1858         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1859             for (int i = 0; i < a.length; i += SPECIES.length()) {
1860                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1861                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1862                 av.unslice(origin, bv, part).intoArray(r, i);
1863             }
1864         }
1865 
1866         bh.consume(r);
1867     }
1868 
1869     @Benchmark
1870     public void unsliceMasked(Blackhole bh) {
1871         short[] a = fa.apply(SPECIES.length());
1872         short[] b = fb.apply(SPECIES.length());
1873         boolean[] mask = fm.apply(SPECIES.length());
1874         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1875         short[] r = new short[a.length];
1876         int origin = (new java.util.Random()).nextInt(SPECIES.length());
1877         int part = (new java.util.Random()).nextInt(2);
1878         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1879             for (int i = 0; i < a.length; i += SPECIES.length()) {
1880                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1881                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1882                 av.unslice(origin, bv, part, vmask).intoArray(r, i);
1883             }
1884         }
1885 
1886         bh.consume(r);
1887     }
1888 
1889     @Benchmark
1890     public void BITWISE_BLEND(Blackhole bh) {
1891         short[] a = fa.apply(SPECIES.length());
1892         short[] b = fb.apply(SPECIES.length());
1893         short[] c = fc.apply(SPECIES.length());
1894         short[] r = fr.apply(SPECIES.length());
1895 
1896         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1897             for (int i = 0; i < a.length; i += SPECIES.length()) {
1898                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1899                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1900                 ShortVector cv = ShortVector.fromArray(SPECIES, c, i);
1901                 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv).intoArray(r, i);
1902             }
1903         }
1904 
1905         bh.consume(r);
1906     }
1907 
1908     @Benchmark
1909     public void BITWISE_BLENDMasked(Blackhole bh) {
1910         short[] a = fa.apply(SPECIES.length());
1911         short[] b = fb.apply(SPECIES.length());
1912         short[] c = fc.apply(SPECIES.length());
1913         short[] r = fr.apply(SPECIES.length());
1914         boolean[] mask = fm.apply(SPECIES.length());
1915         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1916 
1917         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1918             for (int i = 0; i < a.length; i += SPECIES.length()) {
1919                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1920                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
1921                 ShortVector cv = ShortVector.fromArray(SPECIES, c, i);
1922                 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv, vmask).intoArray(r, i);
1923             }
1924         }
1925 
1926         bh.consume(r);
1927     }
1928 
1929     @Benchmark
1930     public void NEG(Blackhole bh) {
1931         short[] a = fa.apply(SPECIES.length());
1932         short[] r = fr.apply(SPECIES.length());
1933 
1934         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1935             for (int i = 0; i < a.length; i += SPECIES.length()) {
1936                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1937                 av.lanewise(VectorOperators.NEG).intoArray(r, i);
1938             }
1939         }
1940 
1941         bh.consume(r);
1942     }
1943 
1944     @Benchmark
1945     public void NEGMasked(Blackhole bh) {
1946         short[] a = fa.apply(SPECIES.length());
1947         short[] r = fr.apply(SPECIES.length());
1948         boolean[] mask = fm.apply(SPECIES.length());
1949         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1950 
1951         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1952             for (int i = 0; i < a.length; i += SPECIES.length()) {
1953                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1954                 av.lanewise(VectorOperators.NEG, vmask).intoArray(r, i);
1955             }
1956         }
1957 
1958         bh.consume(r);
1959     }
1960 
1961     @Benchmark
1962     public void ABS(Blackhole bh) {
1963         short[] a = fa.apply(SPECIES.length());
1964         short[] r = fr.apply(SPECIES.length());
1965 
1966         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1967             for (int i = 0; i < a.length; i += SPECIES.length()) {
1968                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1969                 av.lanewise(VectorOperators.ABS).intoArray(r, i);
1970             }
1971         }
1972 
1973         bh.consume(r);
1974     }
1975 
1976     @Benchmark
1977     public void ABSMasked(Blackhole bh) {
1978         short[] a = fa.apply(SPECIES.length());
1979         short[] r = fr.apply(SPECIES.length());
1980         boolean[] mask = fm.apply(SPECIES.length());
1981         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1982 
1983         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1984             for (int i = 0; i < a.length; i += SPECIES.length()) {
1985                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1986                 av.lanewise(VectorOperators.ABS, vmask).intoArray(r, i);
1987             }
1988         }
1989 
1990         bh.consume(r);
1991     }
1992 
1993     @Benchmark
1994     public void NOT(Blackhole bh) {
1995         short[] a = fa.apply(SPECIES.length());
1996         short[] r = fr.apply(SPECIES.length());
1997 
1998         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1999             for (int i = 0; i < a.length; i += SPECIES.length()) {
2000                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2001                 av.lanewise(VectorOperators.NOT).intoArray(r, i);
2002             }
2003         }
2004 
2005         bh.consume(r);
2006     }
2007 
2008     @Benchmark
2009     public void NOTMasked(Blackhole bh) {
2010         short[] a = fa.apply(SPECIES.length());
2011         short[] r = fr.apply(SPECIES.length());
2012         boolean[] mask = fm.apply(SPECIES.length());
2013         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2014 
2015         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2016             for (int i = 0; i < a.length; i += SPECIES.length()) {
2017                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2018                 av.lanewise(VectorOperators.NOT, vmask).intoArray(r, i);
2019             }
2020         }
2021 
2022         bh.consume(r);
2023     }
2024 
2025     @Benchmark
2026     public void ZOMO(Blackhole bh) {
2027         short[] a = fa.apply(SPECIES.length());
2028         short[] r = fr.apply(SPECIES.length());
2029 
2030         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2031             for (int i = 0; i < a.length; i += SPECIES.length()) {
2032                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2033                 av.lanewise(VectorOperators.ZOMO).intoArray(r, i);
2034             }
2035         }
2036 
2037         bh.consume(r);
2038     }
2039 
2040     @Benchmark
2041     public void ZOMOMasked(Blackhole bh) {
2042         short[] a = fa.apply(SPECIES.length());
2043         short[] r = fr.apply(SPECIES.length());
2044         boolean[] mask = fm.apply(SPECIES.length());
2045         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2046 
2047         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2048             for (int i = 0; i < a.length; i += SPECIES.length()) {
2049                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2050                 av.lanewise(VectorOperators.ZOMO, vmask).intoArray(r, i);
2051             }
2052         }
2053 
2054         bh.consume(r);
2055     }
2056 
2057     @Benchmark
2058     public void BIT_COUNT(Blackhole bh) {
2059         short[] a = fa.apply(SPECIES.length());
2060         short[] r = fr.apply(SPECIES.length());
2061 
2062         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2063             for (int i = 0; i < a.length; i += SPECIES.length()) {
2064                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2065                 av.lanewise(VectorOperators.BIT_COUNT).intoArray(r, i);
2066             }
2067         }
2068 
2069         bh.consume(r);
2070     }
2071 
2072     @Benchmark
2073     public void BIT_COUNTMasked(Blackhole bh) {
2074         short[] a = fa.apply(SPECIES.length());
2075         short[] r = fr.apply(SPECIES.length());
2076         boolean[] mask = fm.apply(SPECIES.length());
2077         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2078 
2079         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2080             for (int i = 0; i < a.length; i += SPECIES.length()) {
2081                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2082                 av.lanewise(VectorOperators.BIT_COUNT, vmask).intoArray(r, i);
2083             }
2084         }
2085 
2086         bh.consume(r);
2087     }
2088 
2089     @Benchmark
2090     public void TRAILING_ZEROS_COUNT(Blackhole bh) {
2091         short[] a = fa.apply(SPECIES.length());
2092         short[] r = fr.apply(SPECIES.length());
2093 
2094         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2095             for (int i = 0; i < a.length; i += SPECIES.length()) {
2096                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2097                 av.lanewise(VectorOperators.TRAILING_ZEROS_COUNT).intoArray(r, i);
2098             }
2099         }
2100 
2101         bh.consume(r);
2102     }
2103 
2104     @Benchmark
2105     public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) {
2106         short[] a = fa.apply(SPECIES.length());
2107         short[] r = fr.apply(SPECIES.length());
2108         boolean[] mask = fm.apply(SPECIES.length());
2109         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2110 
2111         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2112             for (int i = 0; i < a.length; i += SPECIES.length()) {
2113                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2114                 av.lanewise(VectorOperators.TRAILING_ZEROS_COUNT, vmask).intoArray(r, i);
2115             }
2116         }
2117 
2118         bh.consume(r);
2119     }
2120 
2121     @Benchmark
2122     public void LEADING_ZEROS_COUNT(Blackhole bh) {
2123         short[] a = fa.apply(SPECIES.length());
2124         short[] r = fr.apply(SPECIES.length());
2125 
2126         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2127             for (int i = 0; i < a.length; i += SPECIES.length()) {
2128                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2129                 av.lanewise(VectorOperators.LEADING_ZEROS_COUNT).intoArray(r, i);
2130             }
2131         }
2132 
2133         bh.consume(r);
2134     }
2135 
2136     @Benchmark
2137     public void LEADING_ZEROS_COUNTMasked(Blackhole bh) {
2138         short[] a = fa.apply(SPECIES.length());
2139         short[] r = fr.apply(SPECIES.length());
2140         boolean[] mask = fm.apply(SPECIES.length());
2141         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2142 
2143         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2144             for (int i = 0; i < a.length; i += SPECIES.length()) {
2145                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2146                 av.lanewise(VectorOperators.LEADING_ZEROS_COUNT, vmask).intoArray(r, i);
2147             }
2148         }
2149 
2150         bh.consume(r);
2151     }
2152 
2153     @Benchmark
2154     public void REVERSE(Blackhole bh) {
2155         short[] a = fa.apply(SPECIES.length());
2156         short[] r = fr.apply(SPECIES.length());
2157 
2158         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2159             for (int i = 0; i < a.length; i += SPECIES.length()) {
2160                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2161                 av.lanewise(VectorOperators.REVERSE).intoArray(r, i);
2162             }
2163         }
2164 
2165         bh.consume(r);
2166     }
2167 
2168     @Benchmark
2169     public void REVERSEMasked(Blackhole bh) {
2170         short[] a = fa.apply(SPECIES.length());
2171         short[] r = fr.apply(SPECIES.length());
2172         boolean[] mask = fm.apply(SPECIES.length());
2173         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2174 
2175         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2176             for (int i = 0; i < a.length; i += SPECIES.length()) {
2177                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2178                 av.lanewise(VectorOperators.REVERSE, vmask).intoArray(r, i);
2179             }
2180         }
2181 
2182         bh.consume(r);
2183     }
2184 
2185     @Benchmark
2186     public void REVERSE_BYTES(Blackhole bh) {
2187         short[] a = fa.apply(SPECIES.length());
2188         short[] r = fr.apply(SPECIES.length());
2189 
2190         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2191             for (int i = 0; i < a.length; i += SPECIES.length()) {
2192                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2193                 av.lanewise(VectorOperators.REVERSE_BYTES).intoArray(r, i);
2194             }
2195         }
2196 
2197         bh.consume(r);
2198     }
2199 
2200     @Benchmark
2201     public void REVERSE_BYTESMasked(Blackhole bh) {
2202         short[] a = fa.apply(SPECIES.length());
2203         short[] r = fr.apply(SPECIES.length());
2204         boolean[] mask = fm.apply(SPECIES.length());
2205         VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2206 
2207         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2208             for (int i = 0; i < a.length; i += SPECIES.length()) {
2209                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
2210                 av.lanewise(VectorOperators.REVERSE_BYTES, vmask).intoArray(r, i);
2211             }
2212         }
2213 
2214         bh.consume(r);
2215     }
2216 }