1 /*
   2  * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import java.util.concurrent.TimeUnit;
  29 import java.util.function.IntFunction;
  30 import jdk.incubator.vector.VectorMath;
  31 
  32 import org.openjdk.jmh.annotations.*;
  33 import org.openjdk.jmh.infra.Blackhole;
  34 
  35 @BenchmarkMode(Mode.Throughput)
  36 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  37 @State(Scope.Benchmark)
  38 @Warmup(iterations = 3, time = 1)
  39 @Measurement(iterations = 5, time = 1)
  40 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  41 public class ShortScalar extends AbstractVectorBenchmark {
  42     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  43 
  44     private static final short CONST_SHIFT = Short.SIZE / 2;
  45 
  46     @Param("1024")
  47     int size;
  48 
  49     short[] fill(IntFunction<Short> f) {
  50         short[] array = new short[size];
  51         for (int i = 0; i < array.length; i++) {
  52             array[i] = f.apply(i);
  53         }
  54         return array;
  55     }
  56 
  57     static short bits(short e) {
  58         return e;
  59     }
  60 
  61     short[] as, bs, cs, rs;
  62     boolean[] ms, mt, rms;
  63     int[] ss;
  64 
  65     @Setup
  66     public void init() {
  67         as = fill(i -> (short)(2*i));
  68         bs = fill(i -> (short)(i+1));
  69         cs = fill(i -> (short)(i+5));
  70         rs = fill(i -> (short)0);
  71         ms = fillMask(size, i -> (i % 2) == 0);
  72         mt = fillMask(size, i -> true);
  73         rms = fillMask(size, i -> false);
  74 
  75         ss = fillInt(size, i -> RAND.nextInt(Math.max(i,1)));
  76     }
  77 
  78     final IntFunction<short[]> fa = vl -> as;
  79     final IntFunction<short[]> fb = vl -> bs;
  80     final IntFunction<short[]> fc = vl -> cs;
  81     final IntFunction<short[]> fr = vl -> rs;
  82     final IntFunction<boolean[]> fm = vl -> ms;
  83     final IntFunction<boolean[]> fmt = vl -> mt;
  84     final IntFunction<boolean[]> fmr = vl -> rms;
  85     final IntFunction<int[]> fs = vl -> ss;
  86 
  87     static boolean eq(short a, short b) {
  88         return a == b;
  89     }
  90 
  91     static boolean neq(short a, short b) {
  92         return a != b;
  93     }
  94 
  95     static boolean lt(short a, short b) {
  96         return a < b;
  97     }
  98 
  99     static boolean le(short a, short b) {
 100         return a <= b;
 101     }
 102 
 103     static boolean gt(short a, short b) {
 104         return a > b;
 105     }
 106 
 107     static boolean ge(short a, short b) {
 108         return a >= b;
 109     }
 110 
 111     static boolean ult(short a, short b) {
 112         return Short.compareUnsigned(a, b) < 0;
 113     }
 114 
 115     static boolean ule(short a, short b) {
 116         return Short.compareUnsigned(a, b) <= 0;
 117     }
 118 
 119     static boolean ugt(short a, short b) {
 120         return Short.compareUnsigned(a, b) > 0;
 121     }
 122 
 123     static boolean uge(short a, short b) {
 124         return Short.compareUnsigned(a, b) >= 0;
 125     }
 126 
 127     static short ROL_scalar(short a, short b) {
 128         return (short)(((((short)a) & 0xFFFF) << (b & 15)) | ((((short)a) & 0xFFFF) >>> (16 - (b & 15))));
 129     }
 130 
 131     static short ROR_scalar(short a, short b) {
 132         return (short)(((((short)a) & 0xFFFF) >>> (b & 15)) | ((((short)a) & 0xFFFF) << (16 - (b & 15))));
 133     }
 134 
 135     static short TRAILING_ZEROS_COUNT_scalar(short a) {
 136         return (short) (a != 0 ? Integer.numberOfTrailingZeros(a) : 16);
 137     }
 138 
 139     static short LEADING_ZEROS_COUNT_scalar(short a) {
 140         return (short) (a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0);
 141     }
 142 
 143     static short REVERSE_scalar(short a) {
 144         short b = ROL_scalar(a, (short) 8);
 145         b = (short)(((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
 146         b = (short)(((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
 147         b = (short)(((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
 148         return b;
 149     }
 150 
 151     @Benchmark
 152     public void ADD(Blackhole bh) {
 153         short[] as = fa.apply(size);
 154         short[] bs = fb.apply(size);
 155         short[] rs = fr.apply(size);
 156 
 157         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 158             for (int i = 0; i < as.length; i++) {
 159                 short a = as[i];
 160                 short b = bs[i];
 161                 rs[i] = (short)(a + b);
 162             }
 163         }
 164 
 165         bh.consume(rs);
 166     }
 167 
 168     @Benchmark
 169     public void ADDMasked(Blackhole bh) {
 170         short[] as = fa.apply(size);
 171         short[] bs = fb.apply(size);
 172         short[] rs = fr.apply(size);
 173         boolean[] ms = fm.apply(size);
 174 
 175         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 176             for (int i = 0; i < as.length; i++) {
 177                 short a = as[i];
 178                 short b = bs[i];
 179                 if (ms[i % ms.length]) {
 180                     rs[i] = (short)(a + b);
 181                 } else {
 182                     rs[i] = a;
 183                 }
 184             }
 185         }
 186         bh.consume(rs);
 187     }
 188 
 189     @Benchmark
 190     public void SUB(Blackhole bh) {
 191         short[] as = fa.apply(size);
 192         short[] bs = fb.apply(size);
 193         short[] rs = fr.apply(size);
 194 
 195         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 196             for (int i = 0; i < as.length; i++) {
 197                 short a = as[i];
 198                 short b = bs[i];
 199                 rs[i] = (short)(a - b);
 200             }
 201         }
 202 
 203         bh.consume(rs);
 204     }
 205 
 206     @Benchmark
 207     public void SUBMasked(Blackhole bh) {
 208         short[] as = fa.apply(size);
 209         short[] bs = fb.apply(size);
 210         short[] rs = fr.apply(size);
 211         boolean[] ms = fm.apply(size);
 212 
 213         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 214             for (int i = 0; i < as.length; i++) {
 215                 short a = as[i];
 216                 short b = bs[i];
 217                 if (ms[i % ms.length]) {
 218                     rs[i] = (short)(a - b);
 219                 } else {
 220                     rs[i] = a;
 221                 }
 222             }
 223         }
 224         bh.consume(rs);
 225     }
 226 
 227     @Benchmark
 228     public void MUL(Blackhole bh) {
 229         short[] as = fa.apply(size);
 230         short[] bs = fb.apply(size);
 231         short[] rs = fr.apply(size);
 232 
 233         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 234             for (int i = 0; i < as.length; i++) {
 235                 short a = as[i];
 236                 short b = bs[i];
 237                 rs[i] = (short)(a * b);
 238             }
 239         }
 240 
 241         bh.consume(rs);
 242     }
 243 
 244     @Benchmark
 245     public void MULMasked(Blackhole bh) {
 246         short[] as = fa.apply(size);
 247         short[] bs = fb.apply(size);
 248         short[] rs = fr.apply(size);
 249         boolean[] ms = fm.apply(size);
 250 
 251         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 252             for (int i = 0; i < as.length; i++) {
 253                 short a = as[i];
 254                 short b = bs[i];
 255                 if (ms[i % ms.length]) {
 256                     rs[i] = (short)(a * b);
 257                 } else {
 258                     rs[i] = a;
 259                 }
 260             }
 261         }
 262         bh.consume(rs);
 263     }
 264 
 265     @Benchmark
 266     public void FIRST_NONZERO(Blackhole bh) {
 267         short[] as = fa.apply(size);
 268         short[] bs = fb.apply(size);
 269         short[] rs = fr.apply(size);
 270 
 271         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 272             for (int i = 0; i < as.length; i++) {
 273                 short a = as[i];
 274                 short b = bs[i];
 275                 rs[i] = (short)((a)!=0?a:b);
 276             }
 277         }
 278 
 279         bh.consume(rs);
 280     }
 281 
 282     @Benchmark
 283     public void FIRST_NONZEROMasked(Blackhole bh) {
 284         short[] as = fa.apply(size);
 285         short[] bs = fb.apply(size);
 286         short[] rs = fr.apply(size);
 287         boolean[] ms = fm.apply(size);
 288 
 289         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 290             for (int i = 0; i < as.length; i++) {
 291                 short a = as[i];
 292                 short b = bs[i];
 293                 if (ms[i % ms.length]) {
 294                     rs[i] = (short)((a)!=0?a:b);
 295                 } else {
 296                     rs[i] = a;
 297                 }
 298             }
 299         }
 300         bh.consume(rs);
 301     }
 302 
 303     @Benchmark
 304     public void AND(Blackhole bh) {
 305         short[] as = fa.apply(size);
 306         short[] bs = fb.apply(size);
 307         short[] rs = fr.apply(size);
 308 
 309         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 310             for (int i = 0; i < as.length; i++) {
 311                 short a = as[i];
 312                 short b = bs[i];
 313                 rs[i] = (short)(a & b);
 314             }
 315         }
 316 
 317         bh.consume(rs);
 318     }
 319 
 320     @Benchmark
 321     public void ANDMasked(Blackhole bh) {
 322         short[] as = fa.apply(size);
 323         short[] bs = fb.apply(size);
 324         short[] rs = fr.apply(size);
 325         boolean[] ms = fm.apply(size);
 326 
 327         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 328             for (int i = 0; i < as.length; i++) {
 329                 short a = as[i];
 330                 short b = bs[i];
 331                 if (ms[i % ms.length]) {
 332                     rs[i] = (short)(a & b);
 333                 } else {
 334                     rs[i] = a;
 335                 }
 336             }
 337         }
 338         bh.consume(rs);
 339     }
 340 
 341     @Benchmark
 342     public void AND_NOT(Blackhole bh) {
 343         short[] as = fa.apply(size);
 344         short[] bs = fb.apply(size);
 345         short[] rs = fr.apply(size);
 346 
 347         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 348             for (int i = 0; i < as.length; i++) {
 349                 short a = as[i];
 350                 short b = bs[i];
 351                 rs[i] = (short)(a & ~b);
 352             }
 353         }
 354 
 355         bh.consume(rs);
 356     }
 357 
 358     @Benchmark
 359     public void AND_NOTMasked(Blackhole bh) {
 360         short[] as = fa.apply(size);
 361         short[] bs = fb.apply(size);
 362         short[] rs = fr.apply(size);
 363         boolean[] ms = fm.apply(size);
 364 
 365         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 366             for (int i = 0; i < as.length; i++) {
 367                 short a = as[i];
 368                 short b = bs[i];
 369                 if (ms[i % ms.length]) {
 370                     rs[i] = (short)(a & ~b);
 371                 } else {
 372                     rs[i] = a;
 373                 }
 374             }
 375         }
 376         bh.consume(rs);
 377     }
 378 
 379     @Benchmark
 380     public void OR(Blackhole bh) {
 381         short[] as = fa.apply(size);
 382         short[] bs = fb.apply(size);
 383         short[] rs = fr.apply(size);
 384 
 385         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 386             for (int i = 0; i < as.length; i++) {
 387                 short a = as[i];
 388                 short b = bs[i];
 389                 rs[i] = (short)(a | b);
 390             }
 391         }
 392 
 393         bh.consume(rs);
 394     }
 395 
 396     @Benchmark
 397     public void ORMasked(Blackhole bh) {
 398         short[] as = fa.apply(size);
 399         short[] bs = fb.apply(size);
 400         short[] rs = fr.apply(size);
 401         boolean[] ms = fm.apply(size);
 402 
 403         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 404             for (int i = 0; i < as.length; i++) {
 405                 short a = as[i];
 406                 short b = bs[i];
 407                 if (ms[i % ms.length]) {
 408                     rs[i] = (short)(a | b);
 409                 } else {
 410                     rs[i] = a;
 411                 }
 412             }
 413         }
 414         bh.consume(rs);
 415     }
 416 
 417     @Benchmark
 418     public void XOR(Blackhole bh) {
 419         short[] as = fa.apply(size);
 420         short[] bs = fb.apply(size);
 421         short[] rs = fr.apply(size);
 422 
 423         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 424             for (int i = 0; i < as.length; i++) {
 425                 short a = as[i];
 426                 short b = bs[i];
 427                 rs[i] = (short)(a ^ b);
 428             }
 429         }
 430 
 431         bh.consume(rs);
 432     }
 433 
 434     @Benchmark
 435     public void XORMasked(Blackhole bh) {
 436         short[] as = fa.apply(size);
 437         short[] bs = fb.apply(size);
 438         short[] rs = fr.apply(size);
 439         boolean[] ms = fm.apply(size);
 440 
 441         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 442             for (int i = 0; i < as.length; i++) {
 443                 short a = as[i];
 444                 short b = bs[i];
 445                 if (ms[i % ms.length]) {
 446                     rs[i] = (short)(a ^ b);
 447                 } else {
 448                     rs[i] = a;
 449                 }
 450             }
 451         }
 452         bh.consume(rs);
 453     }
 454 
 455     @Benchmark
 456     public void LSHL(Blackhole bh) {
 457         short[] as = fa.apply(size);
 458         short[] bs = fb.apply(size);
 459         short[] rs = fr.apply(size);
 460 
 461         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 462             for (int i = 0; i < as.length; i++) {
 463                 short a = as[i];
 464                 short b = bs[i];
 465                 rs[i] = (short)((a << (b & 0xF)));
 466             }
 467         }
 468 
 469         bh.consume(rs);
 470     }
 471 
 472     @Benchmark
 473     public void LSHLMasked(Blackhole bh) {
 474         short[] as = fa.apply(size);
 475         short[] bs = fb.apply(size);
 476         short[] rs = fr.apply(size);
 477         boolean[] ms = fm.apply(size);
 478 
 479         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 480             for (int i = 0; i < as.length; i++) {
 481                 short a = as[i];
 482                 short b = bs[i];
 483                 if (ms[i % ms.length]) {
 484                     rs[i] = (short)((a << (b & 0xF)));
 485                 } else {
 486                     rs[i] = a;
 487                 }
 488             }
 489         }
 490         bh.consume(rs);
 491     }
 492 
 493     @Benchmark
 494     public void ASHR(Blackhole bh) {
 495         short[] as = fa.apply(size);
 496         short[] bs = fb.apply(size);
 497         short[] rs = fr.apply(size);
 498 
 499         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 500             for (int i = 0; i < as.length; i++) {
 501                 short a = as[i];
 502                 short b = bs[i];
 503                 rs[i] = (short)((a >> (b & 0xF)));
 504             }
 505         }
 506 
 507         bh.consume(rs);
 508     }
 509 
 510     @Benchmark
 511     public void ASHRMasked(Blackhole bh) {
 512         short[] as = fa.apply(size);
 513         short[] bs = fb.apply(size);
 514         short[] rs = fr.apply(size);
 515         boolean[] ms = fm.apply(size);
 516 
 517         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 518             for (int i = 0; i < as.length; i++) {
 519                 short a = as[i];
 520                 short b = bs[i];
 521                 if (ms[i % ms.length]) {
 522                     rs[i] = (short)((a >> (b & 0xF)));
 523                 } else {
 524                     rs[i] = a;
 525                 }
 526             }
 527         }
 528         bh.consume(rs);
 529     }
 530 
 531     @Benchmark
 532     public void LSHR(Blackhole bh) {
 533         short[] as = fa.apply(size);
 534         short[] bs = fb.apply(size);
 535         short[] rs = fr.apply(size);
 536 
 537         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 538             for (int i = 0; i < as.length; i++) {
 539                 short a = as[i];
 540                 short b = bs[i];
 541                 rs[i] = (short)(((a & 0xFFFF) >>> (b & 0xF)));
 542             }
 543         }
 544 
 545         bh.consume(rs);
 546     }
 547 
 548     @Benchmark
 549     public void LSHRMasked(Blackhole bh) {
 550         short[] as = fa.apply(size);
 551         short[] bs = fb.apply(size);
 552         short[] rs = fr.apply(size);
 553         boolean[] ms = fm.apply(size);
 554 
 555         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 556             for (int i = 0; i < as.length; i++) {
 557                 short a = as[i];
 558                 short b = bs[i];
 559                 if (ms[i % ms.length]) {
 560                     rs[i] = (short)(((a & 0xFFFF) >>> (b & 0xF)));
 561                 } else {
 562                     rs[i] = a;
 563                 }
 564             }
 565         }
 566         bh.consume(rs);
 567     }
 568 
 569     @Benchmark
 570     public void LSHLShift(Blackhole bh) {
 571         short[] as = fa.apply(size);
 572         short[] bs = fb.apply(size);
 573         short[] rs = fr.apply(size);
 574 
 575         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 576             for (int i = 0; i < as.length; i++) {
 577                 short a = as[i];
 578                 short b = bs[i];
 579                 rs[i] = (short)((a << (b & 15)));
 580             }
 581         }
 582 
 583         bh.consume(rs);
 584     }
 585 
 586     @Benchmark
 587     public void LSHLMaskedShift(Blackhole bh) {
 588         short[] as = fa.apply(size);
 589         short[] bs = fb.apply(size);
 590         short[] rs = fr.apply(size);
 591         boolean[] ms = fm.apply(size);
 592 
 593         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 594             for (int i = 0; i < as.length; i++) {
 595                 short a = as[i];
 596                 short b = bs[i];
 597                 boolean m = ms[i % ms.length];
 598                 rs[i] = (m ? (short)((a << (b & 15))) : a);
 599             }
 600         }
 601 
 602         bh.consume(rs);
 603     }
 604 
 605     @Benchmark
 606     public void LSHRShift(Blackhole bh) {
 607         short[] as = fa.apply(size);
 608         short[] bs = fb.apply(size);
 609         short[] rs = fr.apply(size);
 610 
 611         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 612             for (int i = 0; i < as.length; i++) {
 613                 short a = as[i];
 614                 short b = bs[i];
 615                 rs[i] = (short)(((a & 0xFFFF) >>> (b & 15)));
 616             }
 617         }
 618 
 619         bh.consume(rs);
 620     }
 621 
 622     @Benchmark
 623     public void LSHRMaskedShift(Blackhole bh) {
 624         short[] as = fa.apply(size);
 625         short[] bs = fb.apply(size);
 626         short[] rs = fr.apply(size);
 627         boolean[] ms = fm.apply(size);
 628 
 629         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 630             for (int i = 0; i < as.length; i++) {
 631                 short a = as[i];
 632                 short b = bs[i];
 633                 boolean m = ms[i % ms.length];
 634                 rs[i] = (m ? (short)(((a & 0xFFFF) >>> (b & 15))) : a);
 635             }
 636         }
 637 
 638         bh.consume(rs);
 639     }
 640 
 641     @Benchmark
 642     public void ASHRShift(Blackhole bh) {
 643         short[] as = fa.apply(size);
 644         short[] bs = fb.apply(size);
 645         short[] rs = fr.apply(size);
 646 
 647         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 648             for (int i = 0; i < as.length; i++) {
 649                 short a = as[i];
 650                 short b = bs[i];
 651                 rs[i] = (short)((a >> (b & 15)));
 652             }
 653         }
 654 
 655         bh.consume(rs);
 656     }
 657 
 658     @Benchmark
 659     public void ASHRMaskedShift(Blackhole bh) {
 660         short[] as = fa.apply(size);
 661         short[] bs = fb.apply(size);
 662         short[] rs = fr.apply(size);
 663         boolean[] ms = fm.apply(size);
 664 
 665         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 666             for (int i = 0; i < as.length; i++) {
 667                 short a = as[i];
 668                 short b = bs[i];
 669                 boolean m = ms[i % ms.length];
 670                 rs[i] = (m ? (short)((a >> (b & 15))) : a);
 671             }
 672         }
 673 
 674         bh.consume(rs);
 675     }
 676 
 677     @Benchmark
 678     public void ROR(Blackhole bh) {
 679         short[] as = fa.apply(size);
 680         short[] bs = fb.apply(size);
 681         short[] rs = fr.apply(size);
 682 
 683         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 684             for (int i = 0; i < as.length; i++) {
 685                 short a = as[i];
 686                 short b = bs[i];
 687                 rs[i] = (short)(ROR_scalar(a,b));
 688             }
 689         }
 690 
 691         bh.consume(rs);
 692     }
 693 
 694     @Benchmark
 695     public void RORMasked(Blackhole bh) {
 696         short[] as = fa.apply(size);
 697         short[] bs = fb.apply(size);
 698         short[] rs = fr.apply(size);
 699         boolean[] ms = fm.apply(size);
 700 
 701         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 702             for (int i = 0; i < as.length; i++) {
 703                 short a = as[i];
 704                 short b = bs[i];
 705                 if (ms[i % ms.length]) {
 706                     rs[i] = (short)(ROR_scalar(a,b));
 707                 } else {
 708                     rs[i] = a;
 709                 }
 710             }
 711         }
 712         bh.consume(rs);
 713     }
 714 
 715     @Benchmark
 716     public void ROL(Blackhole bh) {
 717         short[] as = fa.apply(size);
 718         short[] bs = fb.apply(size);
 719         short[] rs = fr.apply(size);
 720 
 721         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 722             for (int i = 0; i < as.length; i++) {
 723                 short a = as[i];
 724                 short b = bs[i];
 725                 rs[i] = (short)(ROL_scalar(a,b));
 726             }
 727         }
 728 
 729         bh.consume(rs);
 730     }
 731 
 732     @Benchmark
 733     public void ROLMasked(Blackhole bh) {
 734         short[] as = fa.apply(size);
 735         short[] bs = fb.apply(size);
 736         short[] rs = fr.apply(size);
 737         boolean[] ms = fm.apply(size);
 738 
 739         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 740             for (int i = 0; i < as.length; i++) {
 741                 short a = as[i];
 742                 short b = bs[i];
 743                 if (ms[i % ms.length]) {
 744                     rs[i] = (short)(ROL_scalar(a,b));
 745                 } else {
 746                     rs[i] = a;
 747                 }
 748             }
 749         }
 750         bh.consume(rs);
 751     }
 752 
 753     @Benchmark
 754     public void RORShift(Blackhole bh) {
 755         short[] as = fa.apply(size);
 756         short[] bs = fb.apply(size);
 757         short[] rs = fr.apply(size);
 758 
 759         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 760             for (int i = 0; i < as.length; i++) {
 761                 short a = as[i];
 762                 short b = bs[i];
 763                 rs[i] = (short)(ROR_scalar(a, b));
 764             }
 765         }
 766 
 767         bh.consume(rs);
 768     }
 769 
 770     @Benchmark
 771     public void RORMaskedShift(Blackhole bh) {
 772         short[] as = fa.apply(size);
 773         short[] bs = fb.apply(size);
 774         short[] rs = fr.apply(size);
 775         boolean[] ms = fm.apply(size);
 776 
 777         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 778             for (int i = 0; i < as.length; i++) {
 779                 short a = as[i];
 780                 short b = bs[i];
 781                 boolean m = ms[i % ms.length];
 782                 rs[i] = (m ? (short)(ROR_scalar(a, b)) : a);
 783             }
 784         }
 785 
 786         bh.consume(rs);
 787     }
 788 
 789     @Benchmark
 790     public void ROLShift(Blackhole bh) {
 791         short[] as = fa.apply(size);
 792         short[] bs = fb.apply(size);
 793         short[] rs = fr.apply(size);
 794 
 795         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 796             for (int i = 0; i < as.length; i++) {
 797                 short a = as[i];
 798                 short b = bs[i];
 799                 rs[i] = (short)(ROL_scalar(a, b));
 800             }
 801         }
 802 
 803         bh.consume(rs);
 804     }
 805 
 806     @Benchmark
 807     public void ROLMaskedShift(Blackhole bh) {
 808         short[] as = fa.apply(size);
 809         short[] bs = fb.apply(size);
 810         short[] rs = fr.apply(size);
 811         boolean[] ms = fm.apply(size);
 812 
 813         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 814             for (int i = 0; i < as.length; i++) {
 815                 short a = as[i];
 816                 short b = bs[i];
 817                 boolean m = ms[i % ms.length];
 818                 rs[i] = (m ? (short)(ROL_scalar(a, b)) : a);
 819             }
 820         }
 821 
 822         bh.consume(rs);
 823     }
 824 
 825     @Benchmark
 826     public void LSHRShiftConst(Blackhole bh) {
 827         short[] as = fa.apply(size);
 828         short[] bs = fb.apply(size);
 829         short[] rs = fr.apply(size);
 830 
 831         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 832             for (int i = 0; i < as.length; i++) {
 833                 short a = as[i];
 834                 short b = bs[i];
 835                 rs[i] = (short)(((a & 0xFFFF) >>> CONST_SHIFT));
 836             }
 837         }
 838 
 839         bh.consume(rs);
 840     }
 841 
 842     @Benchmark
 843     public void LSHRMaskedShiftConst(Blackhole bh) {
 844         short[] as = fa.apply(size);
 845         short[] bs = fb.apply(size);
 846         short[] rs = fr.apply(size);
 847         boolean[] ms = fm.apply(size);
 848 
 849         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 850             for (int i = 0; i < as.length; i++) {
 851                 short a = as[i];
 852                 short b = bs[i];
 853                 boolean m = ms[i % ms.length];
 854                 rs[i] = (m ? (short)(((a & 0xFFFF) >>> CONST_SHIFT)) : a);
 855             }
 856         }
 857 
 858         bh.consume(rs);
 859     }
 860 
 861     @Benchmark
 862     public void LSHLShiftConst(Blackhole bh) {
 863         short[] as = fa.apply(size);
 864         short[] bs = fb.apply(size);
 865         short[] rs = fr.apply(size);
 866 
 867         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 868             for (int i = 0; i < as.length; i++) {
 869                 short a = as[i];
 870                 short b = bs[i];
 871                 rs[i] = (short)((a << CONST_SHIFT));
 872             }
 873         }
 874 
 875         bh.consume(rs);
 876     }
 877 
 878     @Benchmark
 879     public void LSHLMaskedShiftConst(Blackhole bh) {
 880         short[] as = fa.apply(size);
 881         short[] bs = fb.apply(size);
 882         short[] rs = fr.apply(size);
 883         boolean[] ms = fm.apply(size);
 884 
 885         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 886             for (int i = 0; i < as.length; i++) {
 887                 short a = as[i];
 888                 short b = bs[i];
 889                 boolean m = ms[i % ms.length];
 890                 rs[i] = (m ? (short)((a << CONST_SHIFT)) : a);
 891             }
 892         }
 893 
 894         bh.consume(rs);
 895     }
 896 
 897     @Benchmark
 898     public void ASHRShiftConst(Blackhole bh) {
 899         short[] as = fa.apply(size);
 900         short[] bs = fb.apply(size);
 901         short[] rs = fr.apply(size);
 902 
 903         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 904             for (int i = 0; i < as.length; i++) {
 905                 short a = as[i];
 906                 short b = bs[i];
 907                 rs[i] = (short)((a >> CONST_SHIFT));
 908             }
 909         }
 910 
 911         bh.consume(rs);
 912     }
 913 
 914     @Benchmark
 915     public void ASHRMaskedShiftConst(Blackhole bh) {
 916         short[] as = fa.apply(size);
 917         short[] bs = fb.apply(size);
 918         short[] rs = fr.apply(size);
 919         boolean[] ms = fm.apply(size);
 920 
 921         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 922             for (int i = 0; i < as.length; i++) {
 923                 short a = as[i];
 924                 short b = bs[i];
 925                 boolean m = ms[i % ms.length];
 926                 rs[i] = (m ? (short)((a >> CONST_SHIFT)) : a);
 927             }
 928         }
 929 
 930         bh.consume(rs);
 931     }
 932 
 933     @Benchmark
 934     public void RORShiftConst(Blackhole bh) {
 935         short[] as = fa.apply(size);
 936         short[] bs = fb.apply(size);
 937         short[] rs = fr.apply(size);
 938 
 939         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 940             for (int i = 0; i < as.length; i++) {
 941                 short a = as[i];
 942                 short b = bs[i];
 943                 rs[i] = (short)(ROR_scalar(a, CONST_SHIFT));
 944             }
 945         }
 946 
 947         bh.consume(rs);
 948     }
 949 
 950     @Benchmark
 951     public void RORMaskedShiftConst(Blackhole bh) {
 952         short[] as = fa.apply(size);
 953         short[] bs = fb.apply(size);
 954         short[] rs = fr.apply(size);
 955         boolean[] ms = fm.apply(size);
 956 
 957         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 958             for (int i = 0; i < as.length; i++) {
 959                 short a = as[i];
 960                 short b = bs[i];
 961                 boolean m = ms[i % ms.length];
 962                 rs[i] = (m ? (short)(ROR_scalar(a, CONST_SHIFT)) : a);
 963             }
 964         }
 965 
 966         bh.consume(rs);
 967     }
 968 
 969     @Benchmark
 970     public void ROLShiftConst(Blackhole bh) {
 971         short[] as = fa.apply(size);
 972         short[] bs = fb.apply(size);
 973         short[] rs = fr.apply(size);
 974 
 975         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 976             for (int i = 0; i < as.length; i++) {
 977                 short a = as[i];
 978                 short b = bs[i];
 979                 rs[i] = (short)(ROL_scalar(a, CONST_SHIFT));
 980             }
 981         }
 982 
 983         bh.consume(rs);
 984     }
 985 
 986     @Benchmark
 987     public void ROLMaskedShiftConst(Blackhole bh) {
 988         short[] as = fa.apply(size);
 989         short[] bs = fb.apply(size);
 990         short[] rs = fr.apply(size);
 991         boolean[] ms = fm.apply(size);
 992 
 993         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 994             for (int i = 0; i < as.length; i++) {
 995                 short a = as[i];
 996                 short b = bs[i];
 997                 boolean m = ms[i % ms.length];
 998                 rs[i] = (m ? (short)(ROL_scalar(a, CONST_SHIFT)) : a);
 999             }
1000         }
1001 
1002         bh.consume(rs);
1003     }
1004 
1005     @Benchmark
1006     public void MIN(Blackhole bh) {
1007         short[] as = fa.apply(size);
1008         short[] bs = fb.apply(size);
1009         short[] rs = fr.apply(size);
1010 
1011         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1012             for (int i = 0; i < as.length; i++) {
1013                 short a = as[i];
1014                 short b = bs[i];
1015                 rs[i] = (short)(Math.min(a, b));
1016             }
1017         }
1018 
1019         bh.consume(rs);
1020     }
1021 
1022     @Benchmark
1023     public void MAX(Blackhole bh) {
1024         short[] as = fa.apply(size);
1025         short[] bs = fb.apply(size);
1026         short[] rs = fr.apply(size);
1027 
1028         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1029             for (int i = 0; i < as.length; i++) {
1030                 short a = as[i];
1031                 short b = bs[i];
1032                 rs[i] = (short)(Math.max(a, b));
1033             }
1034         }
1035 
1036         bh.consume(rs);
1037     }
1038 
1039     @Benchmark
1040     public void UMIN(Blackhole bh) {
1041         short[] as = fa.apply(size);
1042         short[] bs = fb.apply(size);
1043         short[] rs = fr.apply(size);
1044 
1045         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1046             for (int i = 0; i < as.length; i++) {
1047                 short a = as[i];
1048                 short b = bs[i];
1049                 rs[i] = (short)(VectorMath.minUnsigned(a, b));
1050             }
1051         }
1052 
1053         bh.consume(rs);
1054     }
1055 
1056     @Benchmark
1057     public void UMINMasked(Blackhole bh) {
1058         short[] as = fa.apply(size);
1059         short[] bs = fb.apply(size);
1060         short[] rs = fr.apply(size);
1061         boolean[] ms = fm.apply(size);
1062 
1063         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1064             for (int i = 0; i < as.length; i++) {
1065                 short a = as[i];
1066                 short b = bs[i];
1067                 if (ms[i % ms.length]) {
1068                     rs[i] = (short)(VectorMath.minUnsigned(a, b));
1069                 } else {
1070                     rs[i] = a;
1071                 }
1072             }
1073         }
1074         bh.consume(rs);
1075     }
1076 
1077     @Benchmark
1078     public void UMAX(Blackhole bh) {
1079         short[] as = fa.apply(size);
1080         short[] bs = fb.apply(size);
1081         short[] rs = fr.apply(size);
1082 
1083         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1084             for (int i = 0; i < as.length; i++) {
1085                 short a = as[i];
1086                 short b = bs[i];
1087                 rs[i] = (short)(VectorMath.maxUnsigned(a, b));
1088             }
1089         }
1090 
1091         bh.consume(rs);
1092     }
1093 
1094     @Benchmark
1095     public void UMAXMasked(Blackhole bh) {
1096         short[] as = fa.apply(size);
1097         short[] bs = fb.apply(size);
1098         short[] rs = fr.apply(size);
1099         boolean[] ms = fm.apply(size);
1100 
1101         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1102             for (int i = 0; i < as.length; i++) {
1103                 short a = as[i];
1104                 short b = bs[i];
1105                 if (ms[i % ms.length]) {
1106                     rs[i] = (short)(VectorMath.maxUnsigned(a, b));
1107                 } else {
1108                     rs[i] = a;
1109                 }
1110             }
1111         }
1112         bh.consume(rs);
1113     }
1114 
1115     @Benchmark
1116     public void ANDLanes(Blackhole bh) {
1117         short[] as = fa.apply(size);
1118         short r = -1;
1119         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1120             r = -1;
1121             for (int i = 0; i < as.length; i++) {
1122                 r &= as[i];
1123             }
1124         }
1125         bh.consume(r);
1126     }
1127 
1128     @Benchmark
1129     public void ANDMaskedLanes(Blackhole bh) {
1130         short[] as = fa.apply(size);
1131         boolean[] ms = fm.apply(size);
1132         short r = -1;
1133         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1134             r = -1;
1135             for (int i = 0; i < as.length; i++) {
1136                 if (ms[i % ms.length])
1137                     r &= as[i];
1138             }
1139         }
1140         bh.consume(r);
1141     }
1142 
1143     @Benchmark
1144     public void ORLanes(Blackhole bh) {
1145         short[] as = fa.apply(size);
1146         short r = 0;
1147         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1148             r = 0;
1149             for (int i = 0; i < as.length; i++) {
1150                 r |= as[i];
1151             }
1152         }
1153         bh.consume(r);
1154     }
1155 
1156     @Benchmark
1157     public void ORMaskedLanes(Blackhole bh) {
1158         short[] as = fa.apply(size);
1159         boolean[] ms = fm.apply(size);
1160         short r = 0;
1161         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1162             r = 0;
1163             for (int i = 0; i < as.length; i++) {
1164                 if (ms[i % ms.length])
1165                     r |= as[i];
1166             }
1167         }
1168         bh.consume(r);
1169     }
1170 
1171     @Benchmark
1172     public void XORLanes(Blackhole bh) {
1173         short[] as = fa.apply(size);
1174         short r = 0;
1175         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1176             r = 0;
1177             for (int i = 0; i < as.length; i++) {
1178                 r ^= as[i];
1179             }
1180         }
1181         bh.consume(r);
1182     }
1183 
1184     @Benchmark
1185     public void XORMaskedLanes(Blackhole bh) {
1186         short[] as = fa.apply(size);
1187         boolean[] ms = fm.apply(size);
1188         short r = 0;
1189         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1190             r = 0;
1191             for (int i = 0; i < as.length; i++) {
1192                 if (ms[i % ms.length])
1193                     r ^= as[i];
1194             }
1195         }
1196         bh.consume(r);
1197     }
1198 
1199     @Benchmark
1200     public void ADDLanes(Blackhole bh) {
1201         short[] as = fa.apply(size);
1202         short r = 0;
1203         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1204             r = 0;
1205             for (int i = 0; i < as.length; i++) {
1206                 r += as[i];
1207             }
1208         }
1209         bh.consume(r);
1210     }
1211 
1212     @Benchmark
1213     public void ADDMaskedLanes(Blackhole bh) {
1214         short[] as = fa.apply(size);
1215         boolean[] ms = fm.apply(size);
1216         short r = 0;
1217         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1218             r = 0;
1219             for (int i = 0; i < as.length; i++) {
1220                 if (ms[i % ms.length])
1221                     r += as[i];
1222             }
1223         }
1224         bh.consume(r);
1225     }
1226 
1227     @Benchmark
1228     public void MULLanes(Blackhole bh) {
1229         short[] as = fa.apply(size);
1230         short r = 1;
1231         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1232             r = 1;
1233             for (int i = 0; i < as.length; i++) {
1234                 r *= as[i];
1235             }
1236         }
1237         bh.consume(r);
1238     }
1239 
1240     @Benchmark
1241     public void MULMaskedLanes(Blackhole bh) {
1242         short[] as = fa.apply(size);
1243         boolean[] ms = fm.apply(size);
1244         short r = 1;
1245         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1246             r = 1;
1247             for (int i = 0; i < as.length; i++) {
1248                 if (ms[i % ms.length])
1249                     r *= as[i];
1250             }
1251         }
1252         bh.consume(r);
1253     }
1254 
1255     @Benchmark
1256     public void anyTrue(Blackhole bh) {
1257         boolean[] ms = fm.apply(size);
1258         boolean r = false;
1259         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1260             r = false;
1261             for (int i = 0; i < ms.length; i++) {
1262                 r |= ms[i];
1263             }
1264         }
1265         bh.consume(r);
1266     }
1267 
1268     @Benchmark
1269     public void allTrue(Blackhole bh) {
1270         boolean[] ms = fm.apply(size);
1271         boolean r = true;
1272         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1273             r = true;
1274             for (int i = 0; i < ms.length; i++) {
1275                 r &= ms[i];
1276             }
1277         }
1278         bh.consume(r);
1279     }
1280 
1281     @Benchmark
1282     public void IS_DEFAULT(Blackhole bh) {
1283         short[] as = fa.apply(size);
1284         boolean r = true;
1285 
1286         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1287             for (int i = 0; i < as.length; i++) {
1288                 short a = as[i];
1289                 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation
1290             }
1291         }
1292 
1293         bh.consume(r);
1294     }
1295 
1296     @Benchmark
1297     public void IS_NEGATIVE(Blackhole bh) {
1298         short[] as = fa.apply(size);
1299         boolean r = true;
1300 
1301         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1302             for (int i = 0; i < as.length; i++) {
1303                 short a = as[i];
1304                 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation
1305             }
1306         }
1307 
1308         bh.consume(r);
1309     }
1310 
1311     @Benchmark
1312     public void LT(Blackhole bh) {
1313         short[] as = fa.apply(size);
1314         short[] bs = fb.apply(size);
1315         boolean r = true;
1316 
1317         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1318             for (int i = 0; i < as.length; i++) {
1319                 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1320             }
1321         }
1322 
1323         bh.consume(r);
1324     }
1325 
1326     @Benchmark
1327     public void GT(Blackhole bh) {
1328         short[] as = fa.apply(size);
1329         short[] bs = fb.apply(size);
1330         boolean r = true;
1331 
1332         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1333             for (int i = 0; i < as.length; i++) {
1334                 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1335             }
1336         }
1337 
1338         bh.consume(r);
1339     }
1340 
1341     @Benchmark
1342     public void EQ(Blackhole bh) {
1343         short[] as = fa.apply(size);
1344         short[] bs = fb.apply(size);
1345         boolean r = true;
1346 
1347         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1348             for (int i = 0; i < as.length; i++) {
1349                 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1350             }
1351         }
1352 
1353         bh.consume(r);
1354     }
1355 
1356     @Benchmark
1357     public void NE(Blackhole bh) {
1358         short[] as = fa.apply(size);
1359         short[] bs = fb.apply(size);
1360         boolean r = true;
1361 
1362         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1363             for (int i = 0; i < as.length; i++) {
1364                 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1365             }
1366         }
1367 
1368         bh.consume(r);
1369     }
1370 
1371     @Benchmark
1372     public void LE(Blackhole bh) {
1373         short[] as = fa.apply(size);
1374         short[] bs = fb.apply(size);
1375         boolean r = true;
1376 
1377         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1378             for (int i = 0; i < as.length; i++) {
1379                 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1380             }
1381         }
1382 
1383         bh.consume(r);
1384     }
1385 
1386     @Benchmark
1387     public void GE(Blackhole bh) {
1388         short[] as = fa.apply(size);
1389         short[] bs = fb.apply(size);
1390         boolean r = true;
1391 
1392         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1393             for (int i = 0; i < as.length; i++) {
1394                 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1395             }
1396         }
1397 
1398         bh.consume(r);
1399     }
1400 
1401     @Benchmark
1402     public void ULT(Blackhole bh) {
1403         short[] as = fa.apply(size);
1404         short[] bs = fb.apply(size);
1405         boolean r = true;
1406 
1407         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1408             for (int i = 0; i < as.length; i++) {
1409                 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1410             }
1411         }
1412 
1413         bh.consume(r);
1414     }
1415 
1416     @Benchmark
1417     public void UGT(Blackhole bh) {
1418         short[] as = fa.apply(size);
1419         short[] bs = fb.apply(size);
1420         boolean r = true;
1421 
1422         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1423             for (int i = 0; i < as.length; i++) {
1424                 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1425             }
1426         }
1427 
1428         bh.consume(r);
1429     }
1430 
1431     @Benchmark
1432     public void ULE(Blackhole bh) {
1433         short[] as = fa.apply(size);
1434         short[] bs = fb.apply(size);
1435         boolean r = true;
1436 
1437         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1438             for (int i = 0; i < as.length; i++) {
1439                 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1440             }
1441         }
1442 
1443         bh.consume(r);
1444     }
1445 
1446     @Benchmark
1447     public void UGE(Blackhole bh) {
1448         short[] as = fa.apply(size);
1449         short[] bs = fb.apply(size);
1450         boolean r = true;
1451 
1452         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1453             for (int i = 0; i < as.length; i++) {
1454                 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1455             }
1456         }
1457 
1458         bh.consume(r);
1459     }
1460 
1461     @Benchmark
1462     public void blend(Blackhole bh) {
1463         short[] as = fa.apply(size);
1464         short[] bs = fb.apply(size);
1465         short[] rs = fr.apply(size);
1466         boolean[] ms = fm.apply(size);
1467 
1468         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1469             for (int i = 0; i < as.length; i++) {
1470                 short a = as[i];
1471                 short b = bs[i];
1472                 boolean m = ms[i % ms.length];
1473                 rs[i] = (m ? b : a);
1474             }
1475         }
1476 
1477         bh.consume(rs);
1478     }
1479 
1480     void rearrangeShared(int window, Blackhole bh) {
1481         short[] as = fa.apply(size);
1482         int[] order = fs.apply(size);
1483         short[] rs = fr.apply(size);
1484 
1485         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1486             for (int i = 0; i < as.length; i += window) {
1487                 for (int j = 0; j < window; j++) {
1488                     short a = as[i+j];
1489                     int pos = order[j];
1490                     rs[i + pos] = a;
1491                 }
1492             }
1493         }
1494 
1495         bh.consume(rs);
1496     }
1497 
1498     @Benchmark
1499     public void rearrange064(Blackhole bh) {
1500         int window = 64 / Short.SIZE;
1501         rearrangeShared(window, bh);
1502     }
1503 
1504     @Benchmark
1505     public void rearrange128(Blackhole bh) {
1506         int window = 128 / Short.SIZE;
1507         rearrangeShared(window, bh);
1508     }
1509 
1510     @Benchmark
1511     public void rearrange256(Blackhole bh) {
1512         int window = 256 / Short.SIZE;
1513         rearrangeShared(window, bh);
1514     }
1515 
1516     @Benchmark
1517     public void rearrange512(Blackhole bh) {
1518         int window = 512 / Short.SIZE;
1519         rearrangeShared(window, bh);
1520     }
1521 
1522     @Benchmark
1523     public void compressScalar(Blackhole bh) {
1524         short[] as = fa.apply(size);
1525         short[] rs = new short[size];
1526         boolean[] im = fmt.apply(size);
1527 
1528         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1529             for (int i = 0, j = 0; i < as.length; i++) {
1530                 if (im[i]) {
1531                     rs[j++] = as[i];
1532                 }
1533             }
1534         }
1535 
1536         bh.consume(rs);
1537     }
1538 
1539     @Benchmark
1540     public void expandScalar(Blackhole bh) {
1541         short[] as = fa.apply(size);
1542         short[] rs = new short[size];
1543         boolean[] im = fmt.apply(size);
1544 
1545         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1546             for (int i = 0, j = 0; i < as.length; i++) {
1547                 if (im[i]) {
1548                     rs[i++] = as[j++];
1549                 }
1550             }
1551         }
1552 
1553         bh.consume(rs);
1554     }
1555 
1556     @Benchmark
1557     public void maskCompressScalar(Blackhole bh) {
1558         boolean[] im = fmt.apply(size);
1559         boolean[] rm = new boolean[size];
1560 
1561         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1562             for (int i = 0, j = 0; i < im.length; i++) {
1563                 if (im[i]) {
1564                     rm[j++] = im[i];
1565                 }
1566             }
1567         }
1568 
1569         bh.consume(rm);
1570     }
1571 
1572     void broadcastShared(int window, Blackhole bh) {
1573         short[] as = fa.apply(size);
1574         short[] rs = fr.apply(size);
1575 
1576         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1577             for (int i = 0; i < as.length; i += window) {
1578                 int idx = i;
1579                 for (int j = 0; j < window; j++) {
1580                     rs[j] = as[idx];
1581                 }
1582             }
1583         }
1584 
1585         bh.consume(rs);
1586     }
1587 
1588     @Benchmark
1589     public void broadcast064(Blackhole bh) {
1590         int window = 64 / Short.SIZE;
1591         broadcastShared(window, bh);
1592     }
1593 
1594     @Benchmark
1595     public void broadcast128(Blackhole bh) {
1596         int window = 128 / Short.SIZE;
1597         broadcastShared(window, bh);
1598     }
1599 
1600     @Benchmark
1601     public void broadcast256(Blackhole bh) {
1602         int window = 256 / Short.SIZE;
1603         broadcastShared(window, bh);
1604     }
1605 
1606     @Benchmark
1607     public void broadcast512(Blackhole bh) {
1608         int window = 512 / Short.SIZE;
1609         broadcastShared(window, bh);
1610     }
1611 
1612     @Benchmark
1613     public void zero(Blackhole bh) {
1614         short[] as = fa.apply(size);
1615 
1616         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1617             for (int i = 0; i < as.length; i++) {
1618                 as[i] = (short)0;
1619             }
1620         }
1621 
1622         bh.consume(as);
1623     }
1624 
1625     @Benchmark
1626     public void BITWISE_BLEND(Blackhole bh) {
1627         short[] as = fa.apply(size);
1628         short[] bs = fb.apply(size);
1629         short[] cs = fc.apply(size);
1630         short[] rs = fr.apply(size);
1631 
1632         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1633             for (int i = 0; i < as.length; i++) {
1634                 short a = as[i];
1635                 short b = bs[i];
1636                 short c = cs[i];
1637                 rs[i] = (short)((a&~(c))|(b&c));
1638             }
1639         }
1640 
1641         bh.consume(rs);
1642     }
1643 
1644     @Benchmark
1645     public void BITWISE_BLENDMasked(Blackhole bh) {
1646         short[] as = fa.apply(size);
1647         short[] bs = fb.apply(size);
1648         short[] cs = fc.apply(size);
1649         short[] rs = fr.apply(size);
1650         boolean[] ms = fm.apply(size);
1651 
1652         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1653             for (int i = 0; i < as.length; i++) {
1654                 short a = as[i];
1655                 short b = bs[i];
1656                 short c = cs[i];
1657                 if (ms[i % ms.length]) {
1658                     rs[i] = (short)((a&~(c))|(b&c));
1659                 } else {
1660                     rs[i] = a;
1661                 }
1662             }
1663         }
1664         bh.consume(rs);
1665     }
1666     @Benchmark
1667     public void NEG(Blackhole bh) {
1668         short[] as = fa.apply(size);
1669         short[] rs = fr.apply(size);
1670 
1671         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1672             for (int i = 0; i < as.length; i++) {
1673                 short a = as[i];
1674                 rs[i] = (short)(-((short)a));
1675             }
1676         }
1677 
1678         bh.consume(rs);
1679     }
1680 
1681     @Benchmark
1682     public void NEGMasked(Blackhole bh) {
1683         short[] as = fa.apply(size);
1684         short[] rs = fr.apply(size);
1685         boolean[] ms = fm.apply(size);
1686 
1687         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1688             for (int i = 0; i < as.length; i++) {
1689                 short a = as[i];
1690                 boolean m = ms[i % ms.length];
1691                 rs[i] = (m ? (short)(-((short)a)) : a);
1692             }
1693         }
1694 
1695         bh.consume(rs);
1696     }
1697     @Benchmark
1698     public void ABS(Blackhole bh) {
1699         short[] as = fa.apply(size);
1700         short[] rs = fr.apply(size);
1701 
1702         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1703             for (int i = 0; i < as.length; i++) {
1704                 short a = as[i];
1705                 rs[i] = (short)(Math.abs((short)a));
1706             }
1707         }
1708 
1709         bh.consume(rs);
1710     }
1711 
1712     @Benchmark
1713     public void ABSMasked(Blackhole bh) {
1714         short[] as = fa.apply(size);
1715         short[] rs = fr.apply(size);
1716         boolean[] ms = fm.apply(size);
1717 
1718         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1719             for (int i = 0; i < as.length; i++) {
1720                 short a = as[i];
1721                 boolean m = ms[i % ms.length];
1722                 rs[i] = (m ? (short)(Math.abs((short)a)) : a);
1723             }
1724         }
1725 
1726         bh.consume(rs);
1727     }
1728     @Benchmark
1729     public void NOT(Blackhole bh) {
1730         short[] as = fa.apply(size);
1731         short[] rs = fr.apply(size);
1732 
1733         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1734             for (int i = 0; i < as.length; i++) {
1735                 short a = as[i];
1736                 rs[i] = (short)(~((short)a));
1737             }
1738         }
1739 
1740         bh.consume(rs);
1741     }
1742 
1743     @Benchmark
1744     public void NOTMasked(Blackhole bh) {
1745         short[] as = fa.apply(size);
1746         short[] rs = fr.apply(size);
1747         boolean[] ms = fm.apply(size);
1748 
1749         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1750             for (int i = 0; i < as.length; i++) {
1751                 short a = as[i];
1752                 boolean m = ms[i % ms.length];
1753                 rs[i] = (m ? (short)(~((short)a)) : a);
1754             }
1755         }
1756 
1757         bh.consume(rs);
1758     }
1759     @Benchmark
1760     public void ZOMO(Blackhole bh) {
1761         short[] as = fa.apply(size);
1762         short[] rs = fr.apply(size);
1763 
1764         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1765             for (int i = 0; i < as.length; i++) {
1766                 short a = as[i];
1767                 rs[i] = (short)((a==0?0:-1));
1768             }
1769         }
1770 
1771         bh.consume(rs);
1772     }
1773 
1774     @Benchmark
1775     public void ZOMOMasked(Blackhole bh) {
1776         short[] as = fa.apply(size);
1777         short[] rs = fr.apply(size);
1778         boolean[] ms = fm.apply(size);
1779 
1780         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1781             for (int i = 0; i < as.length; i++) {
1782                 short a = as[i];
1783                 boolean m = ms[i % ms.length];
1784                 rs[i] = (m ? (short)((a==0?0:-1)) : a);
1785             }
1786         }
1787 
1788         bh.consume(rs);
1789     }
1790     @Benchmark
1791     public void BIT_COUNT(Blackhole bh) {
1792         short[] as = fa.apply(size);
1793         short[] rs = fr.apply(size);
1794 
1795         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1796             for (int i = 0; i < as.length; i++) {
1797                 short a = as[i];
1798                 rs[i] = (short)(Integer.bitCount((int)a & 0xFFFF));
1799             }
1800         }
1801 
1802         bh.consume(rs);
1803     }
1804 
1805     @Benchmark
1806     public void BIT_COUNTMasked(Blackhole bh) {
1807         short[] as = fa.apply(size);
1808         short[] rs = fr.apply(size);
1809         boolean[] ms = fm.apply(size);
1810 
1811         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1812             for (int i = 0; i < as.length; i++) {
1813                 short a = as[i];
1814                 boolean m = ms[i % ms.length];
1815                 rs[i] = (m ? (short)(Integer.bitCount((int)a & 0xFFFF)) : a);
1816             }
1817         }
1818 
1819         bh.consume(rs);
1820     }
1821     @Benchmark
1822     public void TRAILING_ZEROS_COUNT(Blackhole bh) {
1823         short[] as = fa.apply(size);
1824         short[] rs = fr.apply(size);
1825 
1826         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1827             for (int i = 0; i < as.length; i++) {
1828                 short a = as[i];
1829                 rs[i] = (short)(TRAILING_ZEROS_COUNT_scalar(a));
1830             }
1831         }
1832 
1833         bh.consume(rs);
1834     }
1835 
1836     @Benchmark
1837     public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) {
1838         short[] as = fa.apply(size);
1839         short[] rs = fr.apply(size);
1840         boolean[] ms = fm.apply(size);
1841 
1842         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1843             for (int i = 0; i < as.length; i++) {
1844                 short a = as[i];
1845                 boolean m = ms[i % ms.length];
1846                 rs[i] = (m ? (short)(TRAILING_ZEROS_COUNT_scalar(a)) : a);
1847             }
1848         }
1849 
1850         bh.consume(rs);
1851     }
1852     @Benchmark
1853     public void LEADING_ZEROS_COUNT(Blackhole bh) {
1854         short[] as = fa.apply(size);
1855         short[] rs = fr.apply(size);
1856 
1857         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1858             for (int i = 0; i < as.length; i++) {
1859                 short a = as[i];
1860                 rs[i] = (short)(LEADING_ZEROS_COUNT_scalar(a));
1861             }
1862         }
1863 
1864         bh.consume(rs);
1865     }
1866 
1867     @Benchmark
1868     public void LEADING_ZEROS_COUNTMasked(Blackhole bh) {
1869         short[] as = fa.apply(size);
1870         short[] rs = fr.apply(size);
1871         boolean[] ms = fm.apply(size);
1872 
1873         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1874             for (int i = 0; i < as.length; i++) {
1875                 short a = as[i];
1876                 boolean m = ms[i % ms.length];
1877                 rs[i] = (m ? (short)(LEADING_ZEROS_COUNT_scalar(a)) : a);
1878             }
1879         }
1880 
1881         bh.consume(rs);
1882     }
1883     @Benchmark
1884     public void REVERSE(Blackhole bh) {
1885         short[] as = fa.apply(size);
1886         short[] rs = fr.apply(size);
1887 
1888         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1889             for (int i = 0; i < as.length; i++) {
1890                 short a = as[i];
1891                 rs[i] = (short)(REVERSE_scalar(a));
1892             }
1893         }
1894 
1895         bh.consume(rs);
1896     }
1897 
1898     @Benchmark
1899     public void REVERSEMasked(Blackhole bh) {
1900         short[] as = fa.apply(size);
1901         short[] rs = fr.apply(size);
1902         boolean[] ms = fm.apply(size);
1903 
1904         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1905             for (int i = 0; i < as.length; i++) {
1906                 short a = as[i];
1907                 boolean m = ms[i % ms.length];
1908                 rs[i] = (m ? (short)(REVERSE_scalar(a)) : a);
1909             }
1910         }
1911 
1912         bh.consume(rs);
1913     }
1914     @Benchmark
1915     public void REVERSE_BYTES(Blackhole bh) {
1916         short[] as = fa.apply(size);
1917         short[] rs = fr.apply(size);
1918 
1919         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1920             for (int i = 0; i < as.length; i++) {
1921                 short a = as[i];
1922                 rs[i] = (short)(Short.reverseBytes(a));
1923             }
1924         }
1925 
1926         bh.consume(rs);
1927     }
1928 
1929     @Benchmark
1930     public void REVERSE_BYTESMasked(Blackhole bh) {
1931         short[] as = fa.apply(size);
1932         short[] rs = fr.apply(size);
1933         boolean[] ms = fm.apply(size);
1934 
1935         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1936             for (int i = 0; i < as.length; i++) {
1937                 short a = as[i];
1938                 boolean m = ms[i % ms.length];
1939                 rs[i] = (m ? (short)(Short.reverseBytes(a)) : a);
1940             }
1941         }
1942 
1943         bh.consume(rs);
1944     }
1945 }