1 /*
   2  * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import java.util.concurrent.TimeUnit;
  29 import java.util.function.IntFunction;
  30 
  31 import org.openjdk.jmh.annotations.*;
  32 import org.openjdk.jmh.infra.Blackhole;
  33 
  34 @BenchmarkMode(Mode.Throughput)
  35 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  36 @State(Scope.Benchmark)
  37 @Warmup(iterations = 3, time = 1)
  38 @Measurement(iterations = 5, time = 1)
  39 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  40 public class ShortScalar extends AbstractVectorBenchmark {
  41     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  42 
  43     private static final short CONST_SHIFT = Short.SIZE / 2;
  44 
  45     @Param("1024")
  46     int size;
  47 
  48     short[] fill(IntFunction<Short> f) {
  49         short[] array = new short[size];
  50         for (int i = 0; i < array.length; i++) {
  51             array[i] = f.apply(i);
  52         }
  53         return array;
  54     }
  55 
  56     static short bits(short e) {
  57         return e;
  58     }
  59 
  60     short[] as, bs, cs, rs;
  61     boolean[] ms, mt, rms;
  62     int[] ss;
  63 
  64     @Setup
  65     public void init() {
  66         as = fill(i -> (short)(2*i));
  67         bs = fill(i -> (short)(i+1));
  68         cs = fill(i -> (short)(i+5));
  69         rs = fill(i -> (short)0);
  70         ms = fillMask(size, i -> (i % 2) == 0);
  71         mt = fillMask(size, i -> true);
  72         rms = fillMask(size, i -> false);
  73 
  74         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  75     }
  76 
  77     final IntFunction<short[]> fa = vl -> as;
  78     final IntFunction<short[]> fb = vl -> bs;
  79     final IntFunction<short[]> fc = vl -> cs;
  80     final IntFunction<short[]> fr = vl -> rs;
  81     final IntFunction<boolean[]> fm = vl -> ms;
  82     final IntFunction<boolean[]> fmt = vl -> mt;
  83     final IntFunction<boolean[]> fmr = vl -> rms;
  84     final IntFunction<int[]> fs = vl -> ss;
  85 
  86     static boolean eq(short a, short b) {
  87         return a == b;
  88     }
  89 
  90     static boolean neq(short a, short b) {
  91         return a != b;
  92     }
  93 
  94     static boolean lt(short a, short b) {
  95         return a < b;
  96     }
  97 
  98     static boolean le(short a, short b) {
  99         return a <= b;
 100     }
 101 
 102     static boolean gt(short a, short b) {
 103         return a > b;
 104     }
 105 
 106     static boolean ge(short a, short b) {
 107         return a >= b;
 108     }
 109 
 110     static boolean ult(short a, short b) {
 111         return Short.compareUnsigned(a, b) < 0;
 112     }
 113 
 114     static boolean ule(short a, short b) {
 115         return Short.compareUnsigned(a, b) <= 0;
 116     }
 117 
 118     static boolean ugt(short a, short b) {
 119         return Short.compareUnsigned(a, b) > 0;
 120     }
 121 
 122     static boolean uge(short a, short b) {
 123         return Short.compareUnsigned(a, b) >= 0;
 124     }
 125 
 126     static short ROL_scalar(short a, short b) {
 127         return (short)(((((short)a) & 0xFFFF) << (b & 15)) | ((((short)a) & 0xFFFF) >>> (16 - (b & 15))));
 128     }
 129 
 130     static short ROR_scalar(short a, short b) {
 131         return (short)(((((short)a) & 0xFFFF) >>> (b & 15)) | ((((short)a) & 0xFFFF) << (16 - (b & 15))));
 132     }
 133 
 134     static short TRAILING_ZEROS_COUNT_scalar(short a) {
 135         return (short) (a != 0 ? Integer.numberOfTrailingZeros(a) : 16);
 136     }
 137 
 138     static short LEADING_ZEROS_COUNT_scalar(short a) {
 139         return (short) (a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0);
 140     }
 141 
 142     static short REVERSE_scalar(short a) {
 143         short b = ROL_scalar(a, (short) 8);
 144         b = (short)(((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
 145         b = (short)(((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
 146         b = (short)(((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
 147         return b;
 148     }
 149 
 150     @Benchmark
 151     public void ADD(Blackhole bh) {
 152         short[] as = fa.apply(size);
 153         short[] bs = fb.apply(size);
 154         short[] rs = fr.apply(size);
 155 
 156         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 157             for (int i = 0; i < as.length; i++) {
 158                 short a = as[i];
 159                 short b = bs[i];
 160                 rs[i] = (short)(a + b);
 161             }
 162         }
 163 
 164         bh.consume(rs);
 165     }
 166 
 167     @Benchmark
 168     public void ADDMasked(Blackhole bh) {
 169         short[] as = fa.apply(size);
 170         short[] bs = fb.apply(size);
 171         short[] rs = fr.apply(size);
 172         boolean[] ms = fm.apply(size);
 173 
 174         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 175             for (int i = 0; i < as.length; i++) {
 176                 short a = as[i];
 177                 short b = bs[i];
 178                 if (ms[i % ms.length]) {
 179                     rs[i] = (short)(a + b);
 180                 } else {
 181                     rs[i] = a;
 182                 }
 183             }
 184         }
 185         bh.consume(rs);
 186     }
 187 
 188     @Benchmark
 189     public void SUB(Blackhole bh) {
 190         short[] as = fa.apply(size);
 191         short[] bs = fb.apply(size);
 192         short[] rs = fr.apply(size);
 193 
 194         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 195             for (int i = 0; i < as.length; i++) {
 196                 short a = as[i];
 197                 short b = bs[i];
 198                 rs[i] = (short)(a - b);
 199             }
 200         }
 201 
 202         bh.consume(rs);
 203     }
 204 
 205     @Benchmark
 206     public void SUBMasked(Blackhole bh) {
 207         short[] as = fa.apply(size);
 208         short[] bs = fb.apply(size);
 209         short[] rs = fr.apply(size);
 210         boolean[] ms = fm.apply(size);
 211 
 212         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 213             for (int i = 0; i < as.length; i++) {
 214                 short a = as[i];
 215                 short b = bs[i];
 216                 if (ms[i % ms.length]) {
 217                     rs[i] = (short)(a - b);
 218                 } else {
 219                     rs[i] = a;
 220                 }
 221             }
 222         }
 223         bh.consume(rs);
 224     }
 225 
 226     @Benchmark
 227     public void MUL(Blackhole bh) {
 228         short[] as = fa.apply(size);
 229         short[] bs = fb.apply(size);
 230         short[] rs = fr.apply(size);
 231 
 232         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 233             for (int i = 0; i < as.length; i++) {
 234                 short a = as[i];
 235                 short b = bs[i];
 236                 rs[i] = (short)(a * b);
 237             }
 238         }
 239 
 240         bh.consume(rs);
 241     }
 242 
 243     @Benchmark
 244     public void MULMasked(Blackhole bh) {
 245         short[] as = fa.apply(size);
 246         short[] bs = fb.apply(size);
 247         short[] rs = fr.apply(size);
 248         boolean[] ms = fm.apply(size);
 249 
 250         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 251             for (int i = 0; i < as.length; i++) {
 252                 short a = as[i];
 253                 short b = bs[i];
 254                 if (ms[i % ms.length]) {
 255                     rs[i] = (short)(a * b);
 256                 } else {
 257                     rs[i] = a;
 258                 }
 259             }
 260         }
 261         bh.consume(rs);
 262     }
 263 
 264     @Benchmark
 265     public void FIRST_NONZERO(Blackhole bh) {
 266         short[] as = fa.apply(size);
 267         short[] bs = fb.apply(size);
 268         short[] rs = fr.apply(size);
 269 
 270         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 271             for (int i = 0; i < as.length; i++) {
 272                 short a = as[i];
 273                 short b = bs[i];
 274                 rs[i] = (short)((a)!=0?a:b);
 275             }
 276         }
 277 
 278         bh.consume(rs);
 279     }
 280 
 281     @Benchmark
 282     public void FIRST_NONZEROMasked(Blackhole bh) {
 283         short[] as = fa.apply(size);
 284         short[] bs = fb.apply(size);
 285         short[] rs = fr.apply(size);
 286         boolean[] ms = fm.apply(size);
 287 
 288         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 289             for (int i = 0; i < as.length; i++) {
 290                 short a = as[i];
 291                 short b = bs[i];
 292                 if (ms[i % ms.length]) {
 293                     rs[i] = (short)((a)!=0?a:b);
 294                 } else {
 295                     rs[i] = a;
 296                 }
 297             }
 298         }
 299         bh.consume(rs);
 300     }
 301 
 302     @Benchmark
 303     public void AND(Blackhole bh) {
 304         short[] as = fa.apply(size);
 305         short[] bs = fb.apply(size);
 306         short[] rs = fr.apply(size);
 307 
 308         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 309             for (int i = 0; i < as.length; i++) {
 310                 short a = as[i];
 311                 short b = bs[i];
 312                 rs[i] = (short)(a & b);
 313             }
 314         }
 315 
 316         bh.consume(rs);
 317     }
 318 
 319     @Benchmark
 320     public void ANDMasked(Blackhole bh) {
 321         short[] as = fa.apply(size);
 322         short[] bs = fb.apply(size);
 323         short[] rs = fr.apply(size);
 324         boolean[] ms = fm.apply(size);
 325 
 326         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 327             for (int i = 0; i < as.length; i++) {
 328                 short a = as[i];
 329                 short b = bs[i];
 330                 if (ms[i % ms.length]) {
 331                     rs[i] = (short)(a & b);
 332                 } else {
 333                     rs[i] = a;
 334                 }
 335             }
 336         }
 337         bh.consume(rs);
 338     }
 339 
 340     @Benchmark
 341     public void AND_NOT(Blackhole bh) {
 342         short[] as = fa.apply(size);
 343         short[] bs = fb.apply(size);
 344         short[] rs = fr.apply(size);
 345 
 346         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 347             for (int i = 0; i < as.length; i++) {
 348                 short a = as[i];
 349                 short b = bs[i];
 350                 rs[i] = (short)(a & ~b);
 351             }
 352         }
 353 
 354         bh.consume(rs);
 355     }
 356 
 357     @Benchmark
 358     public void AND_NOTMasked(Blackhole bh) {
 359         short[] as = fa.apply(size);
 360         short[] bs = fb.apply(size);
 361         short[] rs = fr.apply(size);
 362         boolean[] ms = fm.apply(size);
 363 
 364         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 365             for (int i = 0; i < as.length; i++) {
 366                 short a = as[i];
 367                 short b = bs[i];
 368                 if (ms[i % ms.length]) {
 369                     rs[i] = (short)(a & ~b);
 370                 } else {
 371                     rs[i] = a;
 372                 }
 373             }
 374         }
 375         bh.consume(rs);
 376     }
 377 
 378     @Benchmark
 379     public void OR(Blackhole bh) {
 380         short[] as = fa.apply(size);
 381         short[] bs = fb.apply(size);
 382         short[] rs = fr.apply(size);
 383 
 384         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 385             for (int i = 0; i < as.length; i++) {
 386                 short a = as[i];
 387                 short b = bs[i];
 388                 rs[i] = (short)(a | b);
 389             }
 390         }
 391 
 392         bh.consume(rs);
 393     }
 394 
 395     @Benchmark
 396     public void ORMasked(Blackhole bh) {
 397         short[] as = fa.apply(size);
 398         short[] bs = fb.apply(size);
 399         short[] rs = fr.apply(size);
 400         boolean[] ms = fm.apply(size);
 401 
 402         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 403             for (int i = 0; i < as.length; i++) {
 404                 short a = as[i];
 405                 short b = bs[i];
 406                 if (ms[i % ms.length]) {
 407                     rs[i] = (short)(a | b);
 408                 } else {
 409                     rs[i] = a;
 410                 }
 411             }
 412         }
 413         bh.consume(rs);
 414     }
 415 
 416     @Benchmark
 417     public void XOR(Blackhole bh) {
 418         short[] as = fa.apply(size);
 419         short[] bs = fb.apply(size);
 420         short[] rs = fr.apply(size);
 421 
 422         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 423             for (int i = 0; i < as.length; i++) {
 424                 short a = as[i];
 425                 short b = bs[i];
 426                 rs[i] = (short)(a ^ b);
 427             }
 428         }
 429 
 430         bh.consume(rs);
 431     }
 432 
 433     @Benchmark
 434     public void XORMasked(Blackhole bh) {
 435         short[] as = fa.apply(size);
 436         short[] bs = fb.apply(size);
 437         short[] rs = fr.apply(size);
 438         boolean[] ms = fm.apply(size);
 439 
 440         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 441             for (int i = 0; i < as.length; i++) {
 442                 short a = as[i];
 443                 short b = bs[i];
 444                 if (ms[i % ms.length]) {
 445                     rs[i] = (short)(a ^ b);
 446                 } else {
 447                     rs[i] = a;
 448                 }
 449             }
 450         }
 451         bh.consume(rs);
 452     }
 453 
 454     @Benchmark
 455     public void LSHL(Blackhole bh) {
 456         short[] as = fa.apply(size);
 457         short[] bs = fb.apply(size);
 458         short[] rs = fr.apply(size);
 459 
 460         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 461             for (int i = 0; i < as.length; i++) {
 462                 short a = as[i];
 463                 short b = bs[i];
 464                 rs[i] = (short)((a << (b & 0xF)));
 465             }
 466         }
 467 
 468         bh.consume(rs);
 469     }
 470 
 471     @Benchmark
 472     public void LSHLMasked(Blackhole bh) {
 473         short[] as = fa.apply(size);
 474         short[] bs = fb.apply(size);
 475         short[] rs = fr.apply(size);
 476         boolean[] ms = fm.apply(size);
 477 
 478         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 479             for (int i = 0; i < as.length; i++) {
 480                 short a = as[i];
 481                 short b = bs[i];
 482                 if (ms[i % ms.length]) {
 483                     rs[i] = (short)((a << (b & 0xF)));
 484                 } else {
 485                     rs[i] = a;
 486                 }
 487             }
 488         }
 489         bh.consume(rs);
 490     }
 491 
 492     @Benchmark
 493     public void ASHR(Blackhole bh) {
 494         short[] as = fa.apply(size);
 495         short[] bs = fb.apply(size);
 496         short[] rs = fr.apply(size);
 497 
 498         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 499             for (int i = 0; i < as.length; i++) {
 500                 short a = as[i];
 501                 short b = bs[i];
 502                 rs[i] = (short)((a >> (b & 0xF)));
 503             }
 504         }
 505 
 506         bh.consume(rs);
 507     }
 508 
 509     @Benchmark
 510     public void ASHRMasked(Blackhole bh) {
 511         short[] as = fa.apply(size);
 512         short[] bs = fb.apply(size);
 513         short[] rs = fr.apply(size);
 514         boolean[] ms = fm.apply(size);
 515 
 516         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 517             for (int i = 0; i < as.length; i++) {
 518                 short a = as[i];
 519                 short b = bs[i];
 520                 if (ms[i % ms.length]) {
 521                     rs[i] = (short)((a >> (b & 0xF)));
 522                 } else {
 523                     rs[i] = a;
 524                 }
 525             }
 526         }
 527         bh.consume(rs);
 528     }
 529 
 530     @Benchmark
 531     public void LSHR(Blackhole bh) {
 532         short[] as = fa.apply(size);
 533         short[] bs = fb.apply(size);
 534         short[] rs = fr.apply(size);
 535 
 536         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 537             for (int i = 0; i < as.length; i++) {
 538                 short a = as[i];
 539                 short b = bs[i];
 540                 rs[i] = (short)(((a & 0xFFFF) >>> (b & 0xF)));
 541             }
 542         }
 543 
 544         bh.consume(rs);
 545     }
 546 
 547     @Benchmark
 548     public void LSHRMasked(Blackhole bh) {
 549         short[] as = fa.apply(size);
 550         short[] bs = fb.apply(size);
 551         short[] rs = fr.apply(size);
 552         boolean[] ms = fm.apply(size);
 553 
 554         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 555             for (int i = 0; i < as.length; i++) {
 556                 short a = as[i];
 557                 short b = bs[i];
 558                 if (ms[i % ms.length]) {
 559                     rs[i] = (short)(((a & 0xFFFF) >>> (b & 0xF)));
 560                 } else {
 561                     rs[i] = a;
 562                 }
 563             }
 564         }
 565         bh.consume(rs);
 566     }
 567 
 568     @Benchmark
 569     public void LSHLShift(Blackhole bh) {
 570         short[] as = fa.apply(size);
 571         short[] bs = fb.apply(size);
 572         short[] rs = fr.apply(size);
 573 
 574         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 575             for (int i = 0; i < as.length; i++) {
 576                 short a = as[i];
 577                 short b = bs[i];
 578                 rs[i] = (short)((a << (b & 15)));
 579             }
 580         }
 581 
 582         bh.consume(rs);
 583     }
 584 
 585     @Benchmark
 586     public void LSHLMaskedShift(Blackhole bh) {
 587         short[] as = fa.apply(size);
 588         short[] bs = fb.apply(size);
 589         short[] rs = fr.apply(size);
 590         boolean[] ms = fm.apply(size);
 591 
 592         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 593             for (int i = 0; i < as.length; i++) {
 594                 short a = as[i];
 595                 short b = bs[i];
 596                 boolean m = ms[i % ms.length];
 597                 rs[i] = (m ? (short)((a << (b & 15))) : a);
 598             }
 599         }
 600 
 601         bh.consume(rs);
 602     }
 603 
 604     @Benchmark
 605     public void LSHRShift(Blackhole bh) {
 606         short[] as = fa.apply(size);
 607         short[] bs = fb.apply(size);
 608         short[] rs = fr.apply(size);
 609 
 610         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 611             for (int i = 0; i < as.length; i++) {
 612                 short a = as[i];
 613                 short b = bs[i];
 614                 rs[i] = (short)(((a & 0xFFFF) >>> (b & 15)));
 615             }
 616         }
 617 
 618         bh.consume(rs);
 619     }
 620 
 621     @Benchmark
 622     public void LSHRMaskedShift(Blackhole bh) {
 623         short[] as = fa.apply(size);
 624         short[] bs = fb.apply(size);
 625         short[] rs = fr.apply(size);
 626         boolean[] ms = fm.apply(size);
 627 
 628         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 629             for (int i = 0; i < as.length; i++) {
 630                 short a = as[i];
 631                 short b = bs[i];
 632                 boolean m = ms[i % ms.length];
 633                 rs[i] = (m ? (short)(((a & 0xFFFF) >>> (b & 15))) : a);
 634             }
 635         }
 636 
 637         bh.consume(rs);
 638     }
 639 
 640     @Benchmark
 641     public void ASHRShift(Blackhole bh) {
 642         short[] as = fa.apply(size);
 643         short[] bs = fb.apply(size);
 644         short[] rs = fr.apply(size);
 645 
 646         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 647             for (int i = 0; i < as.length; i++) {
 648                 short a = as[i];
 649                 short b = bs[i];
 650                 rs[i] = (short)((a >> (b & 15)));
 651             }
 652         }
 653 
 654         bh.consume(rs);
 655     }
 656 
 657     @Benchmark
 658     public void ASHRMaskedShift(Blackhole bh) {
 659         short[] as = fa.apply(size);
 660         short[] bs = fb.apply(size);
 661         short[] rs = fr.apply(size);
 662         boolean[] ms = fm.apply(size);
 663 
 664         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 665             for (int i = 0; i < as.length; i++) {
 666                 short a = as[i];
 667                 short b = bs[i];
 668                 boolean m = ms[i % ms.length];
 669                 rs[i] = (m ? (short)((a >> (b & 15))) : a);
 670             }
 671         }
 672 
 673         bh.consume(rs);
 674     }
 675 
 676     @Benchmark
 677     public void ROR(Blackhole bh) {
 678         short[] as = fa.apply(size);
 679         short[] bs = fb.apply(size);
 680         short[] rs = fr.apply(size);
 681 
 682         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 683             for (int i = 0; i < as.length; i++) {
 684                 short a = as[i];
 685                 short b = bs[i];
 686                 rs[i] = (short)(ROR_scalar(a,b));
 687             }
 688         }
 689 
 690         bh.consume(rs);
 691     }
 692 
 693     @Benchmark
 694     public void RORMasked(Blackhole bh) {
 695         short[] as = fa.apply(size);
 696         short[] bs = fb.apply(size);
 697         short[] rs = fr.apply(size);
 698         boolean[] ms = fm.apply(size);
 699 
 700         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 701             for (int i = 0; i < as.length; i++) {
 702                 short a = as[i];
 703                 short b = bs[i];
 704                 if (ms[i % ms.length]) {
 705                     rs[i] = (short)(ROR_scalar(a,b));
 706                 } else {
 707                     rs[i] = a;
 708                 }
 709             }
 710         }
 711         bh.consume(rs);
 712     }
 713 
 714     @Benchmark
 715     public void ROL(Blackhole bh) {
 716         short[] as = fa.apply(size);
 717         short[] bs = fb.apply(size);
 718         short[] rs = fr.apply(size);
 719 
 720         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 721             for (int i = 0; i < as.length; i++) {
 722                 short a = as[i];
 723                 short b = bs[i];
 724                 rs[i] = (short)(ROL_scalar(a,b));
 725             }
 726         }
 727 
 728         bh.consume(rs);
 729     }
 730 
 731     @Benchmark
 732     public void ROLMasked(Blackhole bh) {
 733         short[] as = fa.apply(size);
 734         short[] bs = fb.apply(size);
 735         short[] rs = fr.apply(size);
 736         boolean[] ms = fm.apply(size);
 737 
 738         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 739             for (int i = 0; i < as.length; i++) {
 740                 short a = as[i];
 741                 short b = bs[i];
 742                 if (ms[i % ms.length]) {
 743                     rs[i] = (short)(ROL_scalar(a,b));
 744                 } else {
 745                     rs[i] = a;
 746                 }
 747             }
 748         }
 749         bh.consume(rs);
 750     }
 751 
 752     @Benchmark
 753     public void RORShift(Blackhole bh) {
 754         short[] as = fa.apply(size);
 755         short[] bs = fb.apply(size);
 756         short[] rs = fr.apply(size);
 757 
 758         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 759             for (int i = 0; i < as.length; i++) {
 760                 short a = as[i];
 761                 short b = bs[i];
 762                 rs[i] = (short)(ROR_scalar(a, b));
 763             }
 764         }
 765 
 766         bh.consume(rs);
 767     }
 768 
 769     @Benchmark
 770     public void RORMaskedShift(Blackhole bh) {
 771         short[] as = fa.apply(size);
 772         short[] bs = fb.apply(size);
 773         short[] rs = fr.apply(size);
 774         boolean[] ms = fm.apply(size);
 775 
 776         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 777             for (int i = 0; i < as.length; i++) {
 778                 short a = as[i];
 779                 short b = bs[i];
 780                 boolean m = ms[i % ms.length];
 781                 rs[i] = (m ? (short)(ROR_scalar(a, b)) : a);
 782             }
 783         }
 784 
 785         bh.consume(rs);
 786     }
 787 
 788     @Benchmark
 789     public void ROLShift(Blackhole bh) {
 790         short[] as = fa.apply(size);
 791         short[] bs = fb.apply(size);
 792         short[] rs = fr.apply(size);
 793 
 794         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 795             for (int i = 0; i < as.length; i++) {
 796                 short a = as[i];
 797                 short b = bs[i];
 798                 rs[i] = (short)(ROL_scalar(a, b));
 799             }
 800         }
 801 
 802         bh.consume(rs);
 803     }
 804 
 805     @Benchmark
 806     public void ROLMaskedShift(Blackhole bh) {
 807         short[] as = fa.apply(size);
 808         short[] bs = fb.apply(size);
 809         short[] rs = fr.apply(size);
 810         boolean[] ms = fm.apply(size);
 811 
 812         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 813             for (int i = 0; i < as.length; i++) {
 814                 short a = as[i];
 815                 short b = bs[i];
 816                 boolean m = ms[i % ms.length];
 817                 rs[i] = (m ? (short)(ROL_scalar(a, b)) : a);
 818             }
 819         }
 820 
 821         bh.consume(rs);
 822     }
 823 
 824     @Benchmark
 825     public void LSHRShiftConst(Blackhole bh) {
 826         short[] as = fa.apply(size);
 827         short[] bs = fb.apply(size);
 828         short[] rs = fr.apply(size);
 829 
 830         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 831             for (int i = 0; i < as.length; i++) {
 832                 short a = as[i];
 833                 short b = bs[i];
 834                 rs[i] = (short)(((a & 0xFFFF) >>> CONST_SHIFT));
 835             }
 836         }
 837 
 838         bh.consume(rs);
 839     }
 840 
 841     @Benchmark
 842     public void LSHRMaskedShiftConst(Blackhole bh) {
 843         short[] as = fa.apply(size);
 844         short[] bs = fb.apply(size);
 845         short[] rs = fr.apply(size);
 846         boolean[] ms = fm.apply(size);
 847 
 848         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 849             for (int i = 0; i < as.length; i++) {
 850                 short a = as[i];
 851                 short b = bs[i];
 852                 boolean m = ms[i % ms.length];
 853                 rs[i] = (m ? (short)(((a & 0xFFFF) >>> CONST_SHIFT)) : a);
 854             }
 855         }
 856 
 857         bh.consume(rs);
 858     }
 859 
 860     @Benchmark
 861     public void LSHLShiftConst(Blackhole bh) {
 862         short[] as = fa.apply(size);
 863         short[] bs = fb.apply(size);
 864         short[] rs = fr.apply(size);
 865 
 866         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 867             for (int i = 0; i < as.length; i++) {
 868                 short a = as[i];
 869                 short b = bs[i];
 870                 rs[i] = (short)((a << CONST_SHIFT));
 871             }
 872         }
 873 
 874         bh.consume(rs);
 875     }
 876 
 877     @Benchmark
 878     public void LSHLMaskedShiftConst(Blackhole bh) {
 879         short[] as = fa.apply(size);
 880         short[] bs = fb.apply(size);
 881         short[] rs = fr.apply(size);
 882         boolean[] ms = fm.apply(size);
 883 
 884         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 885             for (int i = 0; i < as.length; i++) {
 886                 short a = as[i];
 887                 short b = bs[i];
 888                 boolean m = ms[i % ms.length];
 889                 rs[i] = (m ? (short)((a << CONST_SHIFT)) : a);
 890             }
 891         }
 892 
 893         bh.consume(rs);
 894     }
 895 
 896     @Benchmark
 897     public void ASHRShiftConst(Blackhole bh) {
 898         short[] as = fa.apply(size);
 899         short[] bs = fb.apply(size);
 900         short[] rs = fr.apply(size);
 901 
 902         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 903             for (int i = 0; i < as.length; i++) {
 904                 short a = as[i];
 905                 short b = bs[i];
 906                 rs[i] = (short)((a >> CONST_SHIFT));
 907             }
 908         }
 909 
 910         bh.consume(rs);
 911     }
 912 
 913     @Benchmark
 914     public void ASHRMaskedShiftConst(Blackhole bh) {
 915         short[] as = fa.apply(size);
 916         short[] bs = fb.apply(size);
 917         short[] rs = fr.apply(size);
 918         boolean[] ms = fm.apply(size);
 919 
 920         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 921             for (int i = 0; i < as.length; i++) {
 922                 short a = as[i];
 923                 short b = bs[i];
 924                 boolean m = ms[i % ms.length];
 925                 rs[i] = (m ? (short)((a >> CONST_SHIFT)) : a);
 926             }
 927         }
 928 
 929         bh.consume(rs);
 930     }
 931 
 932     @Benchmark
 933     public void RORShiftConst(Blackhole bh) {
 934         short[] as = fa.apply(size);
 935         short[] bs = fb.apply(size);
 936         short[] rs = fr.apply(size);
 937 
 938         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 939             for (int i = 0; i < as.length; i++) {
 940                 short a = as[i];
 941                 short b = bs[i];
 942                 rs[i] = (short)(ROR_scalar(a, CONST_SHIFT));
 943             }
 944         }
 945 
 946         bh.consume(rs);
 947     }
 948 
 949     @Benchmark
 950     public void RORMaskedShiftConst(Blackhole bh) {
 951         short[] as = fa.apply(size);
 952         short[] bs = fb.apply(size);
 953         short[] rs = fr.apply(size);
 954         boolean[] ms = fm.apply(size);
 955 
 956         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 957             for (int i = 0; i < as.length; i++) {
 958                 short a = as[i];
 959                 short b = bs[i];
 960                 boolean m = ms[i % ms.length];
 961                 rs[i] = (m ? (short)(ROR_scalar(a, CONST_SHIFT)) : a);
 962             }
 963         }
 964 
 965         bh.consume(rs);
 966     }
 967 
 968     @Benchmark
 969     public void ROLShiftConst(Blackhole bh) {
 970         short[] as = fa.apply(size);
 971         short[] bs = fb.apply(size);
 972         short[] rs = fr.apply(size);
 973 
 974         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 975             for (int i = 0; i < as.length; i++) {
 976                 short a = as[i];
 977                 short b = bs[i];
 978                 rs[i] = (short)(ROL_scalar(a, CONST_SHIFT));
 979             }
 980         }
 981 
 982         bh.consume(rs);
 983     }
 984 
 985     @Benchmark
 986     public void ROLMaskedShiftConst(Blackhole bh) {
 987         short[] as = fa.apply(size);
 988         short[] bs = fb.apply(size);
 989         short[] rs = fr.apply(size);
 990         boolean[] ms = fm.apply(size);
 991 
 992         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 993             for (int i = 0; i < as.length; i++) {
 994                 short a = as[i];
 995                 short b = bs[i];
 996                 boolean m = ms[i % ms.length];
 997                 rs[i] = (m ? (short)(ROL_scalar(a, CONST_SHIFT)) : a);
 998             }
 999         }
1000 
1001         bh.consume(rs);
1002     }
1003 
1004     @Benchmark
1005     public void MIN(Blackhole bh) {
1006         short[] as = fa.apply(size);
1007         short[] bs = fb.apply(size);
1008         short[] rs = fr.apply(size);
1009 
1010         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1011             for (int i = 0; i < as.length; i++) {
1012                 short a = as[i];
1013                 short b = bs[i];
1014                 rs[i] = (short)(Math.min(a, b));
1015             }
1016         }
1017 
1018         bh.consume(rs);
1019     }
1020 
1021     @Benchmark
1022     public void MAX(Blackhole bh) {
1023         short[] as = fa.apply(size);
1024         short[] bs = fb.apply(size);
1025         short[] rs = fr.apply(size);
1026 
1027         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1028             for (int i = 0; i < as.length; i++) {
1029                 short a = as[i];
1030                 short b = bs[i];
1031                 rs[i] = (short)(Math.max(a, b));
1032             }
1033         }
1034 
1035         bh.consume(rs);
1036     }
1037 
1038     @Benchmark
1039     public void ANDLanes(Blackhole bh) {
1040         short[] as = fa.apply(size);
1041         short r = -1;
1042         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1043             r = -1;
1044             for (int i = 0; i < as.length; i++) {
1045                 r &= as[i];
1046             }
1047         }
1048         bh.consume(r);
1049     }
1050 
1051     @Benchmark
1052     public void ANDMaskedLanes(Blackhole bh) {
1053         short[] as = fa.apply(size);
1054         boolean[] ms = fm.apply(size);
1055         short r = -1;
1056         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1057             r = -1;
1058             for (int i = 0; i < as.length; i++) {
1059                 if (ms[i % ms.length])
1060                     r &= as[i];
1061             }
1062         }
1063         bh.consume(r);
1064     }
1065 
1066     @Benchmark
1067     public void ORLanes(Blackhole bh) {
1068         short[] as = fa.apply(size);
1069         short r = 0;
1070         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1071             r = 0;
1072             for (int i = 0; i < as.length; i++) {
1073                 r |= as[i];
1074             }
1075         }
1076         bh.consume(r);
1077     }
1078 
1079     @Benchmark
1080     public void ORMaskedLanes(Blackhole bh) {
1081         short[] as = fa.apply(size);
1082         boolean[] ms = fm.apply(size);
1083         short r = 0;
1084         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1085             r = 0;
1086             for (int i = 0; i < as.length; i++) {
1087                 if (ms[i % ms.length])
1088                     r |= as[i];
1089             }
1090         }
1091         bh.consume(r);
1092     }
1093 
1094     @Benchmark
1095     public void XORLanes(Blackhole bh) {
1096         short[] as = fa.apply(size);
1097         short r = 0;
1098         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1099             r = 0;
1100             for (int i = 0; i < as.length; i++) {
1101                 r ^= as[i];
1102             }
1103         }
1104         bh.consume(r);
1105     }
1106 
1107     @Benchmark
1108     public void XORMaskedLanes(Blackhole bh) {
1109         short[] as = fa.apply(size);
1110         boolean[] ms = fm.apply(size);
1111         short r = 0;
1112         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1113             r = 0;
1114             for (int i = 0; i < as.length; i++) {
1115                 if (ms[i % ms.length])
1116                     r ^= as[i];
1117             }
1118         }
1119         bh.consume(r);
1120     }
1121 
1122     @Benchmark
1123     public void ADDLanes(Blackhole bh) {
1124         short[] as = fa.apply(size);
1125         short r = 0;
1126         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1127             r = 0;
1128             for (int i = 0; i < as.length; i++) {
1129                 r += as[i];
1130             }
1131         }
1132         bh.consume(r);
1133     }
1134 
1135     @Benchmark
1136     public void ADDMaskedLanes(Blackhole bh) {
1137         short[] as = fa.apply(size);
1138         boolean[] ms = fm.apply(size);
1139         short r = 0;
1140         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1141             r = 0;
1142             for (int i = 0; i < as.length; i++) {
1143                 if (ms[i % ms.length])
1144                     r += as[i];
1145             }
1146         }
1147         bh.consume(r);
1148     }
1149 
1150     @Benchmark
1151     public void MULLanes(Blackhole bh) {
1152         short[] as = fa.apply(size);
1153         short r = 1;
1154         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1155             r = 1;
1156             for (int i = 0; i < as.length; i++) {
1157                 r *= as[i];
1158             }
1159         }
1160         bh.consume(r);
1161     }
1162 
1163     @Benchmark
1164     public void MULMaskedLanes(Blackhole bh) {
1165         short[] as = fa.apply(size);
1166         boolean[] ms = fm.apply(size);
1167         short r = 1;
1168         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1169             r = 1;
1170             for (int i = 0; i < as.length; i++) {
1171                 if (ms[i % ms.length])
1172                     r *= as[i];
1173             }
1174         }
1175         bh.consume(r);
1176     }
1177 
1178     @Benchmark
1179     public void anyTrue(Blackhole bh) {
1180         boolean[] ms = fm.apply(size);
1181         boolean r = false;
1182         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1183             r = false;
1184             for (int i = 0; i < ms.length; i++) {
1185                 r |= ms[i];
1186             }
1187         }
1188         bh.consume(r);
1189     }
1190 
1191     @Benchmark
1192     public void allTrue(Blackhole bh) {
1193         boolean[] ms = fm.apply(size);
1194         boolean r = true;
1195         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1196             r = true;
1197             for (int i = 0; i < ms.length; i++) {
1198                 r &= ms[i];
1199             }
1200         }
1201         bh.consume(r);
1202     }
1203 
1204     @Benchmark
1205     public void IS_DEFAULT(Blackhole bh) {
1206         short[] as = fa.apply(size);
1207         boolean r = true;
1208 
1209         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1210             for (int i = 0; i < as.length; i++) {
1211                 short a = as[i];
1212                 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation
1213             }
1214         }
1215 
1216         bh.consume(r);
1217     }
1218 
1219     @Benchmark
1220     public void IS_NEGATIVE(Blackhole bh) {
1221         short[] as = fa.apply(size);
1222         boolean r = true;
1223 
1224         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1225             for (int i = 0; i < as.length; i++) {
1226                 short a = as[i];
1227                 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation
1228             }
1229         }
1230 
1231         bh.consume(r);
1232     }
1233 
1234     @Benchmark
1235     public void LT(Blackhole bh) {
1236         short[] as = fa.apply(size);
1237         short[] bs = fb.apply(size);
1238         boolean r = true;
1239 
1240         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1241             for (int i = 0; i < as.length; i++) {
1242                 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1243             }
1244         }
1245 
1246         bh.consume(r);
1247     }
1248 
1249     @Benchmark
1250     public void GT(Blackhole bh) {
1251         short[] as = fa.apply(size);
1252         short[] bs = fb.apply(size);
1253         boolean r = true;
1254 
1255         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1256             for (int i = 0; i < as.length; i++) {
1257                 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1258             }
1259         }
1260 
1261         bh.consume(r);
1262     }
1263 
1264     @Benchmark
1265     public void EQ(Blackhole bh) {
1266         short[] as = fa.apply(size);
1267         short[] bs = fb.apply(size);
1268         boolean r = true;
1269 
1270         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1271             for (int i = 0; i < as.length; i++) {
1272                 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1273             }
1274         }
1275 
1276         bh.consume(r);
1277     }
1278 
1279     @Benchmark
1280     public void NE(Blackhole bh) {
1281         short[] as = fa.apply(size);
1282         short[] bs = fb.apply(size);
1283         boolean r = true;
1284 
1285         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1286             for (int i = 0; i < as.length; i++) {
1287                 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1288             }
1289         }
1290 
1291         bh.consume(r);
1292     }
1293 
1294     @Benchmark
1295     public void LE(Blackhole bh) {
1296         short[] as = fa.apply(size);
1297         short[] bs = fb.apply(size);
1298         boolean r = true;
1299 
1300         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1301             for (int i = 0; i < as.length; i++) {
1302                 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1303             }
1304         }
1305 
1306         bh.consume(r);
1307     }
1308 
1309     @Benchmark
1310     public void GE(Blackhole bh) {
1311         short[] as = fa.apply(size);
1312         short[] bs = fb.apply(size);
1313         boolean r = true;
1314 
1315         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1316             for (int i = 0; i < as.length; i++) {
1317                 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1318             }
1319         }
1320 
1321         bh.consume(r);
1322     }
1323 
1324     @Benchmark
1325     public void UNSIGNED_LT(Blackhole bh) {
1326         short[] as = fa.apply(size);
1327         short[] bs = fb.apply(size);
1328         boolean r = true;
1329 
1330         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1331             for (int i = 0; i < as.length; i++) {
1332                 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1333             }
1334         }
1335 
1336         bh.consume(r);
1337     }
1338 
1339     @Benchmark
1340     public void UNSIGNED_GT(Blackhole bh) {
1341         short[] as = fa.apply(size);
1342         short[] bs = fb.apply(size);
1343         boolean r = true;
1344 
1345         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1346             for (int i = 0; i < as.length; i++) {
1347                 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1348             }
1349         }
1350 
1351         bh.consume(r);
1352     }
1353 
1354     @Benchmark
1355     public void UNSIGNED_LE(Blackhole bh) {
1356         short[] as = fa.apply(size);
1357         short[] bs = fb.apply(size);
1358         boolean r = true;
1359 
1360         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1361             for (int i = 0; i < as.length; i++) {
1362                 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1363             }
1364         }
1365 
1366         bh.consume(r);
1367     }
1368 
1369     @Benchmark
1370     public void UNSIGNED_GE(Blackhole bh) {
1371         short[] as = fa.apply(size);
1372         short[] bs = fb.apply(size);
1373         boolean r = true;
1374 
1375         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1376             for (int i = 0; i < as.length; i++) {
1377                 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1378             }
1379         }
1380 
1381         bh.consume(r);
1382     }
1383 
1384     @Benchmark
1385     public void blend(Blackhole bh) {
1386         short[] as = fa.apply(size);
1387         short[] bs = fb.apply(size);
1388         short[] rs = fr.apply(size);
1389         boolean[] ms = fm.apply(size);
1390 
1391         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1392             for (int i = 0; i < as.length; i++) {
1393                 short a = as[i];
1394                 short b = bs[i];
1395                 boolean m = ms[i % ms.length];
1396                 rs[i] = (m ? b : a);
1397             }
1398         }
1399 
1400         bh.consume(rs);
1401     }
1402 
1403     void rearrangeShared(int window, Blackhole bh) {
1404         short[] as = fa.apply(size);
1405         int[] order = fs.apply(size);
1406         short[] rs = fr.apply(size);
1407 
1408         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1409             for (int i = 0; i < as.length; i += window) {
1410                 for (int j = 0; j < window; j++) {
1411                     short a = as[i+j];
1412                     int pos = order[j];
1413                     rs[i + pos] = a;
1414                 }
1415             }
1416         }
1417 
1418         bh.consume(rs);
1419     }
1420 
1421     @Benchmark
1422     public void rearrange064(Blackhole bh) {
1423         int window = 64 / Short.SIZE;
1424         rearrangeShared(window, bh);
1425     }
1426 
1427     @Benchmark
1428     public void rearrange128(Blackhole bh) {
1429         int window = 128 / Short.SIZE;
1430         rearrangeShared(window, bh);
1431     }
1432 
1433     @Benchmark
1434     public void rearrange256(Blackhole bh) {
1435         int window = 256 / Short.SIZE;
1436         rearrangeShared(window, bh);
1437     }
1438 
1439     @Benchmark
1440     public void rearrange512(Blackhole bh) {
1441         int window = 512 / Short.SIZE;
1442         rearrangeShared(window, bh);
1443     }
1444 
1445     @Benchmark
1446     public void compressScalar(Blackhole bh) {
1447         short[] as = fa.apply(size);
1448         short[] rs = new short[size];
1449         boolean[] im = fmt.apply(size);
1450 
1451         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1452             for (int i = 0, j = 0; i < as.length; i++) {
1453                 if (im[i]) {
1454                     rs[j++] = as[i];
1455                 }
1456             }
1457         }
1458 
1459         bh.consume(rs);
1460     }
1461 
1462     @Benchmark
1463     public void expandScalar(Blackhole bh) {
1464         short[] as = fa.apply(size);
1465         short[] rs = new short[size];
1466         boolean[] im = fmt.apply(size);
1467 
1468         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1469             for (int i = 0, j = 0; i < as.length; i++) {
1470                 if (im[i]) {
1471                     rs[i++] = as[j++];
1472                 }
1473             }
1474         }
1475 
1476         bh.consume(rs);
1477     }
1478 
1479     @Benchmark
1480     public void maskCompressScalar(Blackhole bh) {
1481         boolean[] im = fmt.apply(size);
1482         boolean[] rm = new boolean[size];
1483 
1484         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1485             for (int i = 0, j = 0; i < im.length; i++) {
1486                 if (im[i]) {
1487                     rm[j++] = im[i];
1488                 }
1489             }
1490         }
1491 
1492         bh.consume(rm);
1493     }
1494 
1495     void broadcastShared(int window, Blackhole bh) {
1496         short[] as = fa.apply(size);
1497         short[] rs = fr.apply(size);
1498 
1499         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1500             for (int i = 0; i < as.length; i += window) {
1501                 int idx = i;
1502                 for (int j = 0; j < window; j++) {
1503                     rs[j] = as[idx];
1504                 }
1505             }
1506         }
1507 
1508         bh.consume(rs);
1509     }
1510 
1511     @Benchmark
1512     public void broadcast064(Blackhole bh) {
1513         int window = 64 / Short.SIZE;
1514         broadcastShared(window, bh);
1515     }
1516 
1517     @Benchmark
1518     public void broadcast128(Blackhole bh) {
1519         int window = 128 / Short.SIZE;
1520         broadcastShared(window, bh);
1521     }
1522 
1523     @Benchmark
1524     public void broadcast256(Blackhole bh) {
1525         int window = 256 / Short.SIZE;
1526         broadcastShared(window, bh);
1527     }
1528 
1529     @Benchmark
1530     public void broadcast512(Blackhole bh) {
1531         int window = 512 / Short.SIZE;
1532         broadcastShared(window, bh);
1533     }
1534 
1535     @Benchmark
1536     public void zero(Blackhole bh) {
1537         short[] as = fa.apply(size);
1538 
1539         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1540             for (int i = 0; i < as.length; i++) {
1541                 as[i] = (short)0;
1542             }
1543         }
1544 
1545         bh.consume(as);
1546     }
1547 
1548     @Benchmark
1549     public void BITWISE_BLEND(Blackhole bh) {
1550         short[] as = fa.apply(size);
1551         short[] bs = fb.apply(size);
1552         short[] cs = fc.apply(size);
1553         short[] rs = fr.apply(size);
1554 
1555         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1556             for (int i = 0; i < as.length; i++) {
1557                 short a = as[i];
1558                 short b = bs[i];
1559                 short c = cs[i];
1560                 rs[i] = (short)((a&~(c))|(b&c));
1561             }
1562         }
1563 
1564         bh.consume(rs);
1565     }
1566 
1567     @Benchmark
1568     public void BITWISE_BLENDMasked(Blackhole bh) {
1569         short[] as = fa.apply(size);
1570         short[] bs = fb.apply(size);
1571         short[] cs = fc.apply(size);
1572         short[] rs = fr.apply(size);
1573         boolean[] ms = fm.apply(size);
1574 
1575         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1576             for (int i = 0; i < as.length; i++) {
1577                 short a = as[i];
1578                 short b = bs[i];
1579                 short c = cs[i];
1580                 if (ms[i % ms.length]) {
1581                     rs[i] = (short)((a&~(c))|(b&c));
1582                 } else {
1583                     rs[i] = a;
1584                 }
1585             }
1586         }
1587         bh.consume(rs);
1588     }
1589     @Benchmark
1590     public void NEG(Blackhole bh) {
1591         short[] as = fa.apply(size);
1592         short[] rs = fr.apply(size);
1593 
1594         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1595             for (int i = 0; i < as.length; i++) {
1596                 short a = as[i];
1597                 rs[i] = (short)(-((short)a));
1598             }
1599         }
1600 
1601         bh.consume(rs);
1602     }
1603 
1604     @Benchmark
1605     public void NEGMasked(Blackhole bh) {
1606         short[] as = fa.apply(size);
1607         short[] rs = fr.apply(size);
1608         boolean[] ms = fm.apply(size);
1609 
1610         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1611             for (int i = 0; i < as.length; i++) {
1612                 short a = as[i];
1613                 boolean m = ms[i % ms.length];
1614                 rs[i] = (m ? (short)(-((short)a)) : a);
1615             }
1616         }
1617 
1618         bh.consume(rs);
1619     }
1620     @Benchmark
1621     public void ABS(Blackhole bh) {
1622         short[] as = fa.apply(size);
1623         short[] rs = fr.apply(size);
1624 
1625         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1626             for (int i = 0; i < as.length; i++) {
1627                 short a = as[i];
1628                 rs[i] = (short)(Math.abs((short)a));
1629             }
1630         }
1631 
1632         bh.consume(rs);
1633     }
1634 
1635     @Benchmark
1636     public void ABSMasked(Blackhole bh) {
1637         short[] as = fa.apply(size);
1638         short[] rs = fr.apply(size);
1639         boolean[] ms = fm.apply(size);
1640 
1641         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1642             for (int i = 0; i < as.length; i++) {
1643                 short a = as[i];
1644                 boolean m = ms[i % ms.length];
1645                 rs[i] = (m ? (short)(Math.abs((short)a)) : a);
1646             }
1647         }
1648 
1649         bh.consume(rs);
1650     }
1651     @Benchmark
1652     public void NOT(Blackhole bh) {
1653         short[] as = fa.apply(size);
1654         short[] rs = fr.apply(size);
1655 
1656         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1657             for (int i = 0; i < as.length; i++) {
1658                 short a = as[i];
1659                 rs[i] = (short)(~((short)a));
1660             }
1661         }
1662 
1663         bh.consume(rs);
1664     }
1665 
1666     @Benchmark
1667     public void NOTMasked(Blackhole bh) {
1668         short[] as = fa.apply(size);
1669         short[] rs = fr.apply(size);
1670         boolean[] ms = fm.apply(size);
1671 
1672         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1673             for (int i = 0; i < as.length; i++) {
1674                 short a = as[i];
1675                 boolean m = ms[i % ms.length];
1676                 rs[i] = (m ? (short)(~((short)a)) : a);
1677             }
1678         }
1679 
1680         bh.consume(rs);
1681     }
1682     @Benchmark
1683     public void ZOMO(Blackhole bh) {
1684         short[] as = fa.apply(size);
1685         short[] rs = fr.apply(size);
1686 
1687         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1688             for (int i = 0; i < as.length; i++) {
1689                 short a = as[i];
1690                 rs[i] = (short)((a==0?0:-1));
1691             }
1692         }
1693 
1694         bh.consume(rs);
1695     }
1696 
1697     @Benchmark
1698     public void ZOMOMasked(Blackhole bh) {
1699         short[] as = fa.apply(size);
1700         short[] rs = fr.apply(size);
1701         boolean[] ms = fm.apply(size);
1702 
1703         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1704             for (int i = 0; i < as.length; i++) {
1705                 short a = as[i];
1706                 boolean m = ms[i % ms.length];
1707                 rs[i] = (m ? (short)((a==0?0:-1)) : a);
1708             }
1709         }
1710 
1711         bh.consume(rs);
1712     }
1713     @Benchmark
1714     public void BIT_COUNT(Blackhole bh) {
1715         short[] as = fa.apply(size);
1716         short[] rs = fr.apply(size);
1717 
1718         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1719             for (int i = 0; i < as.length; i++) {
1720                 short a = as[i];
1721                 rs[i] = (short)(Integer.bitCount((int)a & 0xFFFF));
1722             }
1723         }
1724 
1725         bh.consume(rs);
1726     }
1727 
1728     @Benchmark
1729     public void BIT_COUNTMasked(Blackhole bh) {
1730         short[] as = fa.apply(size);
1731         short[] rs = fr.apply(size);
1732         boolean[] ms = fm.apply(size);
1733 
1734         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1735             for (int i = 0; i < as.length; i++) {
1736                 short a = as[i];
1737                 boolean m = ms[i % ms.length];
1738                 rs[i] = (m ? (short)(Integer.bitCount((int)a & 0xFFFF)) : a);
1739             }
1740         }
1741 
1742         bh.consume(rs);
1743     }
1744     @Benchmark
1745     public void TRAILING_ZEROS_COUNT(Blackhole bh) {
1746         short[] as = fa.apply(size);
1747         short[] rs = fr.apply(size);
1748 
1749         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1750             for (int i = 0; i < as.length; i++) {
1751                 short a = as[i];
1752                 rs[i] = (short)(TRAILING_ZEROS_COUNT_scalar(a));
1753             }
1754         }
1755 
1756         bh.consume(rs);
1757     }
1758 
1759     @Benchmark
1760     public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) {
1761         short[] as = fa.apply(size);
1762         short[] rs = fr.apply(size);
1763         boolean[] ms = fm.apply(size);
1764 
1765         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1766             for (int i = 0; i < as.length; i++) {
1767                 short a = as[i];
1768                 boolean m = ms[i % ms.length];
1769                 rs[i] = (m ? (short)(TRAILING_ZEROS_COUNT_scalar(a)) : a);
1770             }
1771         }
1772 
1773         bh.consume(rs);
1774     }
1775     @Benchmark
1776     public void LEADING_ZEROS_COUNT(Blackhole bh) {
1777         short[] as = fa.apply(size);
1778         short[] rs = fr.apply(size);
1779 
1780         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1781             for (int i = 0; i < as.length; i++) {
1782                 short a = as[i];
1783                 rs[i] = (short)(LEADING_ZEROS_COUNT_scalar(a));
1784             }
1785         }
1786 
1787         bh.consume(rs);
1788     }
1789 
1790     @Benchmark
1791     public void LEADING_ZEROS_COUNTMasked(Blackhole bh) {
1792         short[] as = fa.apply(size);
1793         short[] rs = fr.apply(size);
1794         boolean[] ms = fm.apply(size);
1795 
1796         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1797             for (int i = 0; i < as.length; i++) {
1798                 short a = as[i];
1799                 boolean m = ms[i % ms.length];
1800                 rs[i] = (m ? (short)(LEADING_ZEROS_COUNT_scalar(a)) : a);
1801             }
1802         }
1803 
1804         bh.consume(rs);
1805     }
1806     @Benchmark
1807     public void REVERSE(Blackhole bh) {
1808         short[] as = fa.apply(size);
1809         short[] rs = fr.apply(size);
1810 
1811         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1812             for (int i = 0; i < as.length; i++) {
1813                 short a = as[i];
1814                 rs[i] = (short)(REVERSE_scalar(a));
1815             }
1816         }
1817 
1818         bh.consume(rs);
1819     }
1820 
1821     @Benchmark
1822     public void REVERSEMasked(Blackhole bh) {
1823         short[] as = fa.apply(size);
1824         short[] rs = fr.apply(size);
1825         boolean[] ms = fm.apply(size);
1826 
1827         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1828             for (int i = 0; i < as.length; i++) {
1829                 short a = as[i];
1830                 boolean m = ms[i % ms.length];
1831                 rs[i] = (m ? (short)(REVERSE_scalar(a)) : a);
1832             }
1833         }
1834 
1835         bh.consume(rs);
1836     }
1837     @Benchmark
1838     public void REVERSE_BYTES(Blackhole bh) {
1839         short[] as = fa.apply(size);
1840         short[] rs = fr.apply(size);
1841 
1842         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1843             for (int i = 0; i < as.length; i++) {
1844                 short a = as[i];
1845                 rs[i] = (short)(Short.reverseBytes(a));
1846             }
1847         }
1848 
1849         bh.consume(rs);
1850     }
1851 
1852     @Benchmark
1853     public void REVERSE_BYTESMasked(Blackhole bh) {
1854         short[] as = fa.apply(size);
1855         short[] rs = fr.apply(size);
1856         boolean[] ms = fm.apply(size);
1857 
1858         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1859             for (int i = 0; i < as.length; i++) {
1860                 short a = as[i];
1861                 boolean m = ms[i % ms.length];
1862                 rs[i] = (m ? (short)(Short.reverseBytes(a)) : a);
1863             }
1864         }
1865 
1866         bh.consume(rs);
1867     }
1868 }