1 /*
   2  * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import java.util.concurrent.TimeUnit;
  29 import java.util.function.IntFunction;
  30 import jdk.incubator.vector.VectorMath;
  31 
  32 import org.openjdk.jmh.annotations.*;
  33 import org.openjdk.jmh.infra.Blackhole;
  34 
  35 @BenchmarkMode(Mode.Throughput)
  36 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  37 @State(Scope.Benchmark)
  38 @Warmup(iterations = 3, time = 1)
  39 @Measurement(iterations = 5, time = 1)
  40 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  41 public class LongScalar extends AbstractVectorBenchmark {
  42     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  43 
  44     private static final long CONST_SHIFT = Long.SIZE / 2;
  45 
  46     @Param("1024")
  47     int size;
  48 
  49     long[] fill(IntFunction<Long> f) {
  50         long[] array = new long[size];
  51         for (int i = 0; i < array.length; i++) {
  52             array[i] = f.apply(i);
  53         }
  54         return array;
  55     }
  56 
  57     static long bits(long e) {
  58         return e;
  59     }
  60 
  61     long[] as, bs, cs, rs;
  62     boolean[] ms, mt, rms;
  63     int[] ss;
  64 
  65     @Setup
  66     public void init() {
  67         as = fill(i -> (long)(2*i));
  68         bs = fill(i -> (long)(i+1));
  69         cs = fill(i -> (long)(i+5));
  70         rs = fill(i -> (long)0);
  71         ms = fillMask(size, i -> (i % 2) == 0);
  72         mt = fillMask(size, i -> true);
  73         rms = fillMask(size, i -> false);
  74 
  75         ss = fillInt(size, i -> RAND.nextInt(Math.max(i,1)));
  76     }
  77 
  78     final IntFunction<long[]> fa = vl -> as;
  79     final IntFunction<long[]> fb = vl -> bs;
  80     final IntFunction<long[]> fc = vl -> cs;
  81     final IntFunction<long[]> fr = vl -> rs;
  82     final IntFunction<boolean[]> fm = vl -> ms;
  83     final IntFunction<boolean[]> fmt = vl -> mt;
  84     final IntFunction<boolean[]> fmr = vl -> rms;
  85     final IntFunction<int[]> fs = vl -> ss;
  86 
  87     static boolean eq(long a, long b) {
  88         return a == b;
  89     }
  90 
  91     static boolean neq(long a, long b) {
  92         return a != b;
  93     }
  94 
  95     static boolean lt(long a, long b) {
  96         return a < b;
  97     }
  98 
  99     static boolean le(long a, long b) {
 100         return a <= b;
 101     }
 102 
 103     static boolean gt(long a, long b) {
 104         return a > b;
 105     }
 106 
 107     static boolean ge(long a, long b) {
 108         return a >= b;
 109     }
 110 
 111     static boolean ult(long a, long b) {
 112         return Long.compareUnsigned(a, b) < 0;
 113     }
 114 
 115     static boolean ule(long a, long b) {
 116         return Long.compareUnsigned(a, b) <= 0;
 117     }
 118 
 119     static boolean ugt(long a, long b) {
 120         return Long.compareUnsigned(a, b) > 0;
 121     }
 122 
 123     static boolean uge(long a, long b) {
 124         return Long.compareUnsigned(a, b) >= 0;
 125     }
 126 
 127     static long ROL_scalar(long a, long b) {
 128         return Long.rotateLeft(a, ((int)b));
 129     }
 130 
 131     static long ROR_scalar(long a, long b) {
 132         return Long.rotateRight(a, ((int)b));
 133     }
 134 
 135     static long TRAILING_ZEROS_COUNT_scalar(long a) {
 136         return Long.numberOfTrailingZeros(a);
 137     }
 138 
 139     static long LEADING_ZEROS_COUNT_scalar(long a) {
 140         return Long.numberOfLeadingZeros(a);
 141     }
 142 
 143     static long REVERSE_scalar(long a) {
 144         return Long.reverse(a);
 145     }
 146 
 147     @Benchmark
 148     public void ADD(Blackhole bh) {
 149         long[] as = fa.apply(size);
 150         long[] bs = fb.apply(size);
 151         long[] rs = fr.apply(size);
 152 
 153         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 154             for (int i = 0; i < as.length; i++) {
 155                 long a = as[i];
 156                 long b = bs[i];
 157                 rs[i] = (long)(a + b);
 158             }
 159         }
 160 
 161         bh.consume(rs);
 162     }
 163 
 164     @Benchmark
 165     public void ADDMasked(Blackhole bh) {
 166         long[] as = fa.apply(size);
 167         long[] bs = fb.apply(size);
 168         long[] rs = fr.apply(size);
 169         boolean[] ms = fm.apply(size);
 170 
 171         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 172             for (int i = 0; i < as.length; i++) {
 173                 long a = as[i];
 174                 long b = bs[i];
 175                 if (ms[i % ms.length]) {
 176                     rs[i] = (long)(a + b);
 177                 } else {
 178                     rs[i] = a;
 179                 }
 180             }
 181         }
 182         bh.consume(rs);
 183     }
 184 
 185     @Benchmark
 186     public void SUB(Blackhole bh) {
 187         long[] as = fa.apply(size);
 188         long[] bs = fb.apply(size);
 189         long[] rs = fr.apply(size);
 190 
 191         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 192             for (int i = 0; i < as.length; i++) {
 193                 long a = as[i];
 194                 long b = bs[i];
 195                 rs[i] = (long)(a - b);
 196             }
 197         }
 198 
 199         bh.consume(rs);
 200     }
 201 
 202     @Benchmark
 203     public void SUBMasked(Blackhole bh) {
 204         long[] as = fa.apply(size);
 205         long[] bs = fb.apply(size);
 206         long[] rs = fr.apply(size);
 207         boolean[] ms = fm.apply(size);
 208 
 209         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 210             for (int i = 0; i < as.length; i++) {
 211                 long a = as[i];
 212                 long b = bs[i];
 213                 if (ms[i % ms.length]) {
 214                     rs[i] = (long)(a - b);
 215                 } else {
 216                     rs[i] = a;
 217                 }
 218             }
 219         }
 220         bh.consume(rs);
 221     }
 222 
 223     @Benchmark
 224     public void MUL(Blackhole bh) {
 225         long[] as = fa.apply(size);
 226         long[] bs = fb.apply(size);
 227         long[] rs = fr.apply(size);
 228 
 229         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 230             for (int i = 0; i < as.length; i++) {
 231                 long a = as[i];
 232                 long b = bs[i];
 233                 rs[i] = (long)(a * b);
 234             }
 235         }
 236 
 237         bh.consume(rs);
 238     }
 239 
 240     @Benchmark
 241     public void MULMasked(Blackhole bh) {
 242         long[] as = fa.apply(size);
 243         long[] bs = fb.apply(size);
 244         long[] rs = fr.apply(size);
 245         boolean[] ms = fm.apply(size);
 246 
 247         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 248             for (int i = 0; i < as.length; i++) {
 249                 long a = as[i];
 250                 long b = bs[i];
 251                 if (ms[i % ms.length]) {
 252                     rs[i] = (long)(a * b);
 253                 } else {
 254                     rs[i] = a;
 255                 }
 256             }
 257         }
 258         bh.consume(rs);
 259     }
 260 
 261     @Benchmark
 262     public void FIRST_NONZERO(Blackhole bh) {
 263         long[] as = fa.apply(size);
 264         long[] bs = fb.apply(size);
 265         long[] rs = fr.apply(size);
 266 
 267         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 268             for (int i = 0; i < as.length; i++) {
 269                 long a = as[i];
 270                 long b = bs[i];
 271                 rs[i] = (long)((a)!=0?a:b);
 272             }
 273         }
 274 
 275         bh.consume(rs);
 276     }
 277 
 278     @Benchmark
 279     public void FIRST_NONZEROMasked(Blackhole bh) {
 280         long[] as = fa.apply(size);
 281         long[] bs = fb.apply(size);
 282         long[] rs = fr.apply(size);
 283         boolean[] ms = fm.apply(size);
 284 
 285         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 286             for (int i = 0; i < as.length; i++) {
 287                 long a = as[i];
 288                 long b = bs[i];
 289                 if (ms[i % ms.length]) {
 290                     rs[i] = (long)((a)!=0?a:b);
 291                 } else {
 292                     rs[i] = a;
 293                 }
 294             }
 295         }
 296         bh.consume(rs);
 297     }
 298 
 299     @Benchmark
 300     public void AND(Blackhole bh) {
 301         long[] as = fa.apply(size);
 302         long[] bs = fb.apply(size);
 303         long[] rs = fr.apply(size);
 304 
 305         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 306             for (int i = 0; i < as.length; i++) {
 307                 long a = as[i];
 308                 long b = bs[i];
 309                 rs[i] = (long)(a & b);
 310             }
 311         }
 312 
 313         bh.consume(rs);
 314     }
 315 
 316     @Benchmark
 317     public void ANDMasked(Blackhole bh) {
 318         long[] as = fa.apply(size);
 319         long[] bs = fb.apply(size);
 320         long[] rs = fr.apply(size);
 321         boolean[] ms = fm.apply(size);
 322 
 323         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 324             for (int i = 0; i < as.length; i++) {
 325                 long a = as[i];
 326                 long b = bs[i];
 327                 if (ms[i % ms.length]) {
 328                     rs[i] = (long)(a & b);
 329                 } else {
 330                     rs[i] = a;
 331                 }
 332             }
 333         }
 334         bh.consume(rs);
 335     }
 336 
 337     @Benchmark
 338     public void AND_NOT(Blackhole bh) {
 339         long[] as = fa.apply(size);
 340         long[] bs = fb.apply(size);
 341         long[] rs = fr.apply(size);
 342 
 343         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 344             for (int i = 0; i < as.length; i++) {
 345                 long a = as[i];
 346                 long b = bs[i];
 347                 rs[i] = (long)(a & ~b);
 348             }
 349         }
 350 
 351         bh.consume(rs);
 352     }
 353 
 354     @Benchmark
 355     public void AND_NOTMasked(Blackhole bh) {
 356         long[] as = fa.apply(size);
 357         long[] bs = fb.apply(size);
 358         long[] rs = fr.apply(size);
 359         boolean[] ms = fm.apply(size);
 360 
 361         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 362             for (int i = 0; i < as.length; i++) {
 363                 long a = as[i];
 364                 long b = bs[i];
 365                 if (ms[i % ms.length]) {
 366                     rs[i] = (long)(a & ~b);
 367                 } else {
 368                     rs[i] = a;
 369                 }
 370             }
 371         }
 372         bh.consume(rs);
 373     }
 374 
 375     @Benchmark
 376     public void OR(Blackhole bh) {
 377         long[] as = fa.apply(size);
 378         long[] bs = fb.apply(size);
 379         long[] rs = fr.apply(size);
 380 
 381         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 382             for (int i = 0; i < as.length; i++) {
 383                 long a = as[i];
 384                 long b = bs[i];
 385                 rs[i] = (long)(a | b);
 386             }
 387         }
 388 
 389         bh.consume(rs);
 390     }
 391 
 392     @Benchmark
 393     public void ORMasked(Blackhole bh) {
 394         long[] as = fa.apply(size);
 395         long[] bs = fb.apply(size);
 396         long[] rs = fr.apply(size);
 397         boolean[] ms = fm.apply(size);
 398 
 399         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 400             for (int i = 0; i < as.length; i++) {
 401                 long a = as[i];
 402                 long b = bs[i];
 403                 if (ms[i % ms.length]) {
 404                     rs[i] = (long)(a | b);
 405                 } else {
 406                     rs[i] = a;
 407                 }
 408             }
 409         }
 410         bh.consume(rs);
 411     }
 412 
 413     @Benchmark
 414     public void XOR(Blackhole bh) {
 415         long[] as = fa.apply(size);
 416         long[] bs = fb.apply(size);
 417         long[] rs = fr.apply(size);
 418 
 419         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 420             for (int i = 0; i < as.length; i++) {
 421                 long a = as[i];
 422                 long b = bs[i];
 423                 rs[i] = (long)(a ^ b);
 424             }
 425         }
 426 
 427         bh.consume(rs);
 428     }
 429 
 430     @Benchmark
 431     public void XORMasked(Blackhole bh) {
 432         long[] as = fa.apply(size);
 433         long[] bs = fb.apply(size);
 434         long[] rs = fr.apply(size);
 435         boolean[] ms = fm.apply(size);
 436 
 437         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 438             for (int i = 0; i < as.length; i++) {
 439                 long a = as[i];
 440                 long b = bs[i];
 441                 if (ms[i % ms.length]) {
 442                     rs[i] = (long)(a ^ b);
 443                 } else {
 444                     rs[i] = a;
 445                 }
 446             }
 447         }
 448         bh.consume(rs);
 449     }
 450 
 451     @Benchmark
 452     public void COMPRESS_BITS(Blackhole bh) {
 453         long[] as = fa.apply(size);
 454         long[] bs = fb.apply(size);
 455         long[] rs = fr.apply(size);
 456 
 457         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 458             for (int i = 0; i < as.length; i++) {
 459                 long a = as[i];
 460                 long b = bs[i];
 461                 rs[i] = (long)(Long.compress(a, b));
 462             }
 463         }
 464 
 465         bh.consume(rs);
 466     }
 467 
 468     @Benchmark
 469     public void COMPRESS_BITSMasked(Blackhole bh) {
 470         long[] as = fa.apply(size);
 471         long[] bs = fb.apply(size);
 472         long[] rs = fr.apply(size);
 473         boolean[] ms = fm.apply(size);
 474 
 475         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 476             for (int i = 0; i < as.length; i++) {
 477                 long a = as[i];
 478                 long b = bs[i];
 479                 if (ms[i % ms.length]) {
 480                     rs[i] = (long)(Long.compress(a, b));
 481                 } else {
 482                     rs[i] = a;
 483                 }
 484             }
 485         }
 486         bh.consume(rs);
 487     }
 488 
 489     @Benchmark
 490     public void EXPAND_BITS(Blackhole bh) {
 491         long[] as = fa.apply(size);
 492         long[] bs = fb.apply(size);
 493         long[] rs = fr.apply(size);
 494 
 495         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 496             for (int i = 0; i < as.length; i++) {
 497                 long a = as[i];
 498                 long b = bs[i];
 499                 rs[i] = (long)(Long.expand(a, b));
 500             }
 501         }
 502 
 503         bh.consume(rs);
 504     }
 505 
 506     @Benchmark
 507     public void EXPAND_BITSMasked(Blackhole bh) {
 508         long[] as = fa.apply(size);
 509         long[] bs = fb.apply(size);
 510         long[] rs = fr.apply(size);
 511         boolean[] ms = fm.apply(size);
 512 
 513         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 514             for (int i = 0; i < as.length; i++) {
 515                 long a = as[i];
 516                 long b = bs[i];
 517                 if (ms[i % ms.length]) {
 518                     rs[i] = (long)(Long.expand(a, b));
 519                 } else {
 520                     rs[i] = a;
 521                 }
 522             }
 523         }
 524         bh.consume(rs);
 525     }
 526 
 527     @Benchmark
 528     public void LSHL(Blackhole bh) {
 529         long[] as = fa.apply(size);
 530         long[] bs = fb.apply(size);
 531         long[] rs = fr.apply(size);
 532 
 533         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 534             for (int i = 0; i < as.length; i++) {
 535                 long a = as[i];
 536                 long b = bs[i];
 537                 rs[i] = (long)((a << b));
 538             }
 539         }
 540 
 541         bh.consume(rs);
 542     }
 543 
 544     @Benchmark
 545     public void LSHLMasked(Blackhole bh) {
 546         long[] as = fa.apply(size);
 547         long[] bs = fb.apply(size);
 548         long[] rs = fr.apply(size);
 549         boolean[] ms = fm.apply(size);
 550 
 551         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 552             for (int i = 0; i < as.length; i++) {
 553                 long a = as[i];
 554                 long b = bs[i];
 555                 if (ms[i % ms.length]) {
 556                     rs[i] = (long)((a << b));
 557                 } else {
 558                     rs[i] = a;
 559                 }
 560             }
 561         }
 562         bh.consume(rs);
 563     }
 564 
 565     @Benchmark
 566     public void ASHR(Blackhole bh) {
 567         long[] as = fa.apply(size);
 568         long[] bs = fb.apply(size);
 569         long[] rs = fr.apply(size);
 570 
 571         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 572             for (int i = 0; i < as.length; i++) {
 573                 long a = as[i];
 574                 long b = bs[i];
 575                 rs[i] = (long)((a >> b));
 576             }
 577         }
 578 
 579         bh.consume(rs);
 580     }
 581 
 582     @Benchmark
 583     public void ASHRMasked(Blackhole bh) {
 584         long[] as = fa.apply(size);
 585         long[] bs = fb.apply(size);
 586         long[] rs = fr.apply(size);
 587         boolean[] ms = fm.apply(size);
 588 
 589         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 590             for (int i = 0; i < as.length; i++) {
 591                 long a = as[i];
 592                 long b = bs[i];
 593                 if (ms[i % ms.length]) {
 594                     rs[i] = (long)((a >> b));
 595                 } else {
 596                     rs[i] = a;
 597                 }
 598             }
 599         }
 600         bh.consume(rs);
 601     }
 602 
 603     @Benchmark
 604     public void LSHR(Blackhole bh) {
 605         long[] as = fa.apply(size);
 606         long[] bs = fb.apply(size);
 607         long[] rs = fr.apply(size);
 608 
 609         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 610             for (int i = 0; i < as.length; i++) {
 611                 long a = as[i];
 612                 long b = bs[i];
 613                 rs[i] = (long)((a >>> b));
 614             }
 615         }
 616 
 617         bh.consume(rs);
 618     }
 619 
 620     @Benchmark
 621     public void LSHRMasked(Blackhole bh) {
 622         long[] as = fa.apply(size);
 623         long[] bs = fb.apply(size);
 624         long[] rs = fr.apply(size);
 625         boolean[] ms = fm.apply(size);
 626 
 627         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 628             for (int i = 0; i < as.length; i++) {
 629                 long a = as[i];
 630                 long b = bs[i];
 631                 if (ms[i % ms.length]) {
 632                     rs[i] = (long)((a >>> b));
 633                 } else {
 634                     rs[i] = a;
 635                 }
 636             }
 637         }
 638         bh.consume(rs);
 639     }
 640 
 641     @Benchmark
 642     public void LSHLShift(Blackhole bh) {
 643         long[] as = fa.apply(size);
 644         long[] bs = fb.apply(size);
 645         long[] rs = fr.apply(size);
 646 
 647         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 648             for (int i = 0; i < as.length; i++) {
 649                 long a = as[i];
 650                 long b = bs[i];
 651                 rs[i] = (long)((a << b));
 652             }
 653         }
 654 
 655         bh.consume(rs);
 656     }
 657 
 658     @Benchmark
 659     public void LSHLMaskedShift(Blackhole bh) {
 660         long[] as = fa.apply(size);
 661         long[] bs = fb.apply(size);
 662         long[] rs = fr.apply(size);
 663         boolean[] ms = fm.apply(size);
 664 
 665         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 666             for (int i = 0; i < as.length; i++) {
 667                 long a = as[i];
 668                 long b = bs[i];
 669                 boolean m = ms[i % ms.length];
 670                 rs[i] = (m ? (long)((a << b)) : a);
 671             }
 672         }
 673 
 674         bh.consume(rs);
 675     }
 676 
 677     @Benchmark
 678     public void LSHRShift(Blackhole bh) {
 679         long[] as = fa.apply(size);
 680         long[] bs = fb.apply(size);
 681         long[] rs = fr.apply(size);
 682 
 683         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 684             for (int i = 0; i < as.length; i++) {
 685                 long a = as[i];
 686                 long b = bs[i];
 687                 rs[i] = (long)((a >>> b));
 688             }
 689         }
 690 
 691         bh.consume(rs);
 692     }
 693 
 694     @Benchmark
 695     public void LSHRMaskedShift(Blackhole bh) {
 696         long[] as = fa.apply(size);
 697         long[] bs = fb.apply(size);
 698         long[] rs = fr.apply(size);
 699         boolean[] ms = fm.apply(size);
 700 
 701         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 702             for (int i = 0; i < as.length; i++) {
 703                 long a = as[i];
 704                 long b = bs[i];
 705                 boolean m = ms[i % ms.length];
 706                 rs[i] = (m ? (long)((a >>> b)) : a);
 707             }
 708         }
 709 
 710         bh.consume(rs);
 711     }
 712 
 713     @Benchmark
 714     public void ASHRShift(Blackhole bh) {
 715         long[] as = fa.apply(size);
 716         long[] bs = fb.apply(size);
 717         long[] rs = fr.apply(size);
 718 
 719         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 720             for (int i = 0; i < as.length; i++) {
 721                 long a = as[i];
 722                 long b = bs[i];
 723                 rs[i] = (long)((a >> b));
 724             }
 725         }
 726 
 727         bh.consume(rs);
 728     }
 729 
 730     @Benchmark
 731     public void ASHRMaskedShift(Blackhole bh) {
 732         long[] as = fa.apply(size);
 733         long[] bs = fb.apply(size);
 734         long[] rs = fr.apply(size);
 735         boolean[] ms = fm.apply(size);
 736 
 737         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 738             for (int i = 0; i < as.length; i++) {
 739                 long a = as[i];
 740                 long b = bs[i];
 741                 boolean m = ms[i % ms.length];
 742                 rs[i] = (m ? (long)((a >> b)) : a);
 743             }
 744         }
 745 
 746         bh.consume(rs);
 747     }
 748 
 749     @Benchmark
 750     public void ROR(Blackhole bh) {
 751         long[] as = fa.apply(size);
 752         long[] bs = fb.apply(size);
 753         long[] rs = fr.apply(size);
 754 
 755         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 756             for (int i = 0; i < as.length; i++) {
 757                 long a = as[i];
 758                 long b = bs[i];
 759                 rs[i] = (long)(ROR_scalar(a,b));
 760             }
 761         }
 762 
 763         bh.consume(rs);
 764     }
 765 
 766     @Benchmark
 767     public void RORMasked(Blackhole bh) {
 768         long[] as = fa.apply(size);
 769         long[] bs = fb.apply(size);
 770         long[] rs = fr.apply(size);
 771         boolean[] ms = fm.apply(size);
 772 
 773         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 774             for (int i = 0; i < as.length; i++) {
 775                 long a = as[i];
 776                 long b = bs[i];
 777                 if (ms[i % ms.length]) {
 778                     rs[i] = (long)(ROR_scalar(a,b));
 779                 } else {
 780                     rs[i] = a;
 781                 }
 782             }
 783         }
 784         bh.consume(rs);
 785     }
 786 
 787     @Benchmark
 788     public void ROL(Blackhole bh) {
 789         long[] as = fa.apply(size);
 790         long[] bs = fb.apply(size);
 791         long[] rs = fr.apply(size);
 792 
 793         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 794             for (int i = 0; i < as.length; i++) {
 795                 long a = as[i];
 796                 long b = bs[i];
 797                 rs[i] = (long)(ROL_scalar(a,b));
 798             }
 799         }
 800 
 801         bh.consume(rs);
 802     }
 803 
 804     @Benchmark
 805     public void ROLMasked(Blackhole bh) {
 806         long[] as = fa.apply(size);
 807         long[] bs = fb.apply(size);
 808         long[] rs = fr.apply(size);
 809         boolean[] ms = fm.apply(size);
 810 
 811         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 812             for (int i = 0; i < as.length; i++) {
 813                 long a = as[i];
 814                 long b = bs[i];
 815                 if (ms[i % ms.length]) {
 816                     rs[i] = (long)(ROL_scalar(a,b));
 817                 } else {
 818                     rs[i] = a;
 819                 }
 820             }
 821         }
 822         bh.consume(rs);
 823     }
 824 
 825     @Benchmark
 826     public void RORShift(Blackhole bh) {
 827         long[] as = fa.apply(size);
 828         long[] bs = fb.apply(size);
 829         long[] rs = fr.apply(size);
 830 
 831         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 832             for (int i = 0; i < as.length; i++) {
 833                 long a = as[i];
 834                 long b = bs[i];
 835                 rs[i] = (long)(ROR_scalar(a, b));
 836             }
 837         }
 838 
 839         bh.consume(rs);
 840     }
 841 
 842     @Benchmark
 843     public void RORMaskedShift(Blackhole bh) {
 844         long[] as = fa.apply(size);
 845         long[] bs = fb.apply(size);
 846         long[] rs = fr.apply(size);
 847         boolean[] ms = fm.apply(size);
 848 
 849         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 850             for (int i = 0; i < as.length; i++) {
 851                 long a = as[i];
 852                 long b = bs[i];
 853                 boolean m = ms[i % ms.length];
 854                 rs[i] = (m ? (long)(ROR_scalar(a, b)) : a);
 855             }
 856         }
 857 
 858         bh.consume(rs);
 859     }
 860 
 861     @Benchmark
 862     public void ROLShift(Blackhole bh) {
 863         long[] as = fa.apply(size);
 864         long[] bs = fb.apply(size);
 865         long[] rs = fr.apply(size);
 866 
 867         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 868             for (int i = 0; i < as.length; i++) {
 869                 long a = as[i];
 870                 long b = bs[i];
 871                 rs[i] = (long)(ROL_scalar(a, b));
 872             }
 873         }
 874 
 875         bh.consume(rs);
 876     }
 877 
 878     @Benchmark
 879     public void ROLMaskedShift(Blackhole bh) {
 880         long[] as = fa.apply(size);
 881         long[] bs = fb.apply(size);
 882         long[] rs = fr.apply(size);
 883         boolean[] ms = fm.apply(size);
 884 
 885         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 886             for (int i = 0; i < as.length; i++) {
 887                 long a = as[i];
 888                 long b = bs[i];
 889                 boolean m = ms[i % ms.length];
 890                 rs[i] = (m ? (long)(ROL_scalar(a, b)) : a);
 891             }
 892         }
 893 
 894         bh.consume(rs);
 895     }
 896 
 897     @Benchmark
 898     public void LSHRShiftConst(Blackhole bh) {
 899         long[] as = fa.apply(size);
 900         long[] bs = fb.apply(size);
 901         long[] rs = fr.apply(size);
 902 
 903         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 904             for (int i = 0; i < as.length; i++) {
 905                 long a = as[i];
 906                 long b = bs[i];
 907                 rs[i] = (long)((a >>> CONST_SHIFT));
 908             }
 909         }
 910 
 911         bh.consume(rs);
 912     }
 913 
 914     @Benchmark
 915     public void LSHRMaskedShiftConst(Blackhole bh) {
 916         long[] as = fa.apply(size);
 917         long[] bs = fb.apply(size);
 918         long[] rs = fr.apply(size);
 919         boolean[] ms = fm.apply(size);
 920 
 921         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 922             for (int i = 0; i < as.length; i++) {
 923                 long a = as[i];
 924                 long b = bs[i];
 925                 boolean m = ms[i % ms.length];
 926                 rs[i] = (m ? (long)((a >>> CONST_SHIFT)) : a);
 927             }
 928         }
 929 
 930         bh.consume(rs);
 931     }
 932 
 933     @Benchmark
 934     public void LSHLShiftConst(Blackhole bh) {
 935         long[] as = fa.apply(size);
 936         long[] bs = fb.apply(size);
 937         long[] rs = fr.apply(size);
 938 
 939         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 940             for (int i = 0; i < as.length; i++) {
 941                 long a = as[i];
 942                 long b = bs[i];
 943                 rs[i] = (long)((a << CONST_SHIFT));
 944             }
 945         }
 946 
 947         bh.consume(rs);
 948     }
 949 
 950     @Benchmark
 951     public void LSHLMaskedShiftConst(Blackhole bh) {
 952         long[] as = fa.apply(size);
 953         long[] bs = fb.apply(size);
 954         long[] rs = fr.apply(size);
 955         boolean[] ms = fm.apply(size);
 956 
 957         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 958             for (int i = 0; i < as.length; i++) {
 959                 long a = as[i];
 960                 long b = bs[i];
 961                 boolean m = ms[i % ms.length];
 962                 rs[i] = (m ? (long)((a << CONST_SHIFT)) : a);
 963             }
 964         }
 965 
 966         bh.consume(rs);
 967     }
 968 
 969     @Benchmark
 970     public void ASHRShiftConst(Blackhole bh) {
 971         long[] as = fa.apply(size);
 972         long[] bs = fb.apply(size);
 973         long[] rs = fr.apply(size);
 974 
 975         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 976             for (int i = 0; i < as.length; i++) {
 977                 long a = as[i];
 978                 long b = bs[i];
 979                 rs[i] = (long)((a >> CONST_SHIFT));
 980             }
 981         }
 982 
 983         bh.consume(rs);
 984     }
 985 
 986     @Benchmark
 987     public void ASHRMaskedShiftConst(Blackhole bh) {
 988         long[] as = fa.apply(size);
 989         long[] bs = fb.apply(size);
 990         long[] rs = fr.apply(size);
 991         boolean[] ms = fm.apply(size);
 992 
 993         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 994             for (int i = 0; i < as.length; i++) {
 995                 long a = as[i];
 996                 long b = bs[i];
 997                 boolean m = ms[i % ms.length];
 998                 rs[i] = (m ? (long)((a >> CONST_SHIFT)) : a);
 999             }
1000         }
1001 
1002         bh.consume(rs);
1003     }
1004 
1005     @Benchmark
1006     public void RORShiftConst(Blackhole bh) {
1007         long[] as = fa.apply(size);
1008         long[] bs = fb.apply(size);
1009         long[] rs = fr.apply(size);
1010 
1011         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1012             for (int i = 0; i < as.length; i++) {
1013                 long a = as[i];
1014                 long b = bs[i];
1015                 rs[i] = (long)(ROR_scalar(a, CONST_SHIFT));
1016             }
1017         }
1018 
1019         bh.consume(rs);
1020     }
1021 
1022     @Benchmark
1023     public void RORMaskedShiftConst(Blackhole bh) {
1024         long[] as = fa.apply(size);
1025         long[] bs = fb.apply(size);
1026         long[] rs = fr.apply(size);
1027         boolean[] ms = fm.apply(size);
1028 
1029         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1030             for (int i = 0; i < as.length; i++) {
1031                 long a = as[i];
1032                 long b = bs[i];
1033                 boolean m = ms[i % ms.length];
1034                 rs[i] = (m ? (long)(ROR_scalar(a, CONST_SHIFT)) : a);
1035             }
1036         }
1037 
1038         bh.consume(rs);
1039     }
1040 
1041     @Benchmark
1042     public void ROLShiftConst(Blackhole bh) {
1043         long[] as = fa.apply(size);
1044         long[] bs = fb.apply(size);
1045         long[] rs = fr.apply(size);
1046 
1047         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1048             for (int i = 0; i < as.length; i++) {
1049                 long a = as[i];
1050                 long b = bs[i];
1051                 rs[i] = (long)(ROL_scalar(a, CONST_SHIFT));
1052             }
1053         }
1054 
1055         bh.consume(rs);
1056     }
1057 
1058     @Benchmark
1059     public void ROLMaskedShiftConst(Blackhole bh) {
1060         long[] as = fa.apply(size);
1061         long[] bs = fb.apply(size);
1062         long[] rs = fr.apply(size);
1063         boolean[] ms = fm.apply(size);
1064 
1065         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1066             for (int i = 0; i < as.length; i++) {
1067                 long a = as[i];
1068                 long b = bs[i];
1069                 boolean m = ms[i % ms.length];
1070                 rs[i] = (m ? (long)(ROL_scalar(a, CONST_SHIFT)) : a);
1071             }
1072         }
1073 
1074         bh.consume(rs);
1075     }
1076 
1077     @Benchmark
1078     public void MIN(Blackhole bh) {
1079         long[] as = fa.apply(size);
1080         long[] bs = fb.apply(size);
1081         long[] rs = fr.apply(size);
1082 
1083         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1084             for (int i = 0; i < as.length; i++) {
1085                 long a = as[i];
1086                 long b = bs[i];
1087                 rs[i] = (long)(Math.min(a, b));
1088             }
1089         }
1090 
1091         bh.consume(rs);
1092     }
1093 
1094     @Benchmark
1095     public void MAX(Blackhole bh) {
1096         long[] as = fa.apply(size);
1097         long[] bs = fb.apply(size);
1098         long[] rs = fr.apply(size);
1099 
1100         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1101             for (int i = 0; i < as.length; i++) {
1102                 long a = as[i];
1103                 long b = bs[i];
1104                 rs[i] = (long)(Math.max(a, b));
1105             }
1106         }
1107 
1108         bh.consume(rs);
1109     }
1110 
1111     @Benchmark
1112     public void UMIN(Blackhole bh) {
1113         long[] as = fa.apply(size);
1114         long[] bs = fb.apply(size);
1115         long[] rs = fr.apply(size);
1116 
1117         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1118             for (int i = 0; i < as.length; i++) {
1119                 long a = as[i];
1120                 long b = bs[i];
1121                 rs[i] = (long)(VectorMath.minUnsigned(a, b));
1122             }
1123         }
1124 
1125         bh.consume(rs);
1126     }
1127 
1128     @Benchmark
1129     public void UMINMasked(Blackhole bh) {
1130         long[] as = fa.apply(size);
1131         long[] bs = fb.apply(size);
1132         long[] rs = fr.apply(size);
1133         boolean[] ms = fm.apply(size);
1134 
1135         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1136             for (int i = 0; i < as.length; i++) {
1137                 long a = as[i];
1138                 long b = bs[i];
1139                 if (ms[i % ms.length]) {
1140                     rs[i] = (long)(VectorMath.minUnsigned(a, b));
1141                 } else {
1142                     rs[i] = a;
1143                 }
1144             }
1145         }
1146         bh.consume(rs);
1147     }
1148 
1149     @Benchmark
1150     public void UMAX(Blackhole bh) {
1151         long[] as = fa.apply(size);
1152         long[] bs = fb.apply(size);
1153         long[] rs = fr.apply(size);
1154 
1155         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1156             for (int i = 0; i < as.length; i++) {
1157                 long a = as[i];
1158                 long b = bs[i];
1159                 rs[i] = (long)(VectorMath.maxUnsigned(a, b));
1160             }
1161         }
1162 
1163         bh.consume(rs);
1164     }
1165 
1166     @Benchmark
1167     public void UMAXMasked(Blackhole bh) {
1168         long[] as = fa.apply(size);
1169         long[] bs = fb.apply(size);
1170         long[] rs = fr.apply(size);
1171         boolean[] ms = fm.apply(size);
1172 
1173         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1174             for (int i = 0; i < as.length; i++) {
1175                 long a = as[i];
1176                 long b = bs[i];
1177                 if (ms[i % ms.length]) {
1178                     rs[i] = (long)(VectorMath.maxUnsigned(a, b));
1179                 } else {
1180                     rs[i] = a;
1181                 }
1182             }
1183         }
1184         bh.consume(rs);
1185     }
1186 
1187     @Benchmark
1188     public void ANDLanes(Blackhole bh) {
1189         long[] as = fa.apply(size);
1190         long r = -1;
1191         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1192             r = -1;
1193             for (int i = 0; i < as.length; i++) {
1194                 r &= as[i];
1195             }
1196         }
1197         bh.consume(r);
1198     }
1199 
1200     @Benchmark
1201     public void ANDMaskedLanes(Blackhole bh) {
1202         long[] as = fa.apply(size);
1203         boolean[] ms = fm.apply(size);
1204         long r = -1;
1205         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1206             r = -1;
1207             for (int i = 0; i < as.length; i++) {
1208                 if (ms[i % ms.length])
1209                     r &= as[i];
1210             }
1211         }
1212         bh.consume(r);
1213     }
1214 
1215     @Benchmark
1216     public void ORLanes(Blackhole bh) {
1217         long[] as = fa.apply(size);
1218         long r = 0;
1219         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1220             r = 0;
1221             for (int i = 0; i < as.length; i++) {
1222                 r |= as[i];
1223             }
1224         }
1225         bh.consume(r);
1226     }
1227 
1228     @Benchmark
1229     public void ORMaskedLanes(Blackhole bh) {
1230         long[] as = fa.apply(size);
1231         boolean[] ms = fm.apply(size);
1232         long r = 0;
1233         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1234             r = 0;
1235             for (int i = 0; i < as.length; i++) {
1236                 if (ms[i % ms.length])
1237                     r |= as[i];
1238             }
1239         }
1240         bh.consume(r);
1241     }
1242 
1243     @Benchmark
1244     public void XORLanes(Blackhole bh) {
1245         long[] as = fa.apply(size);
1246         long r = 0;
1247         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1248             r = 0;
1249             for (int i = 0; i < as.length; i++) {
1250                 r ^= as[i];
1251             }
1252         }
1253         bh.consume(r);
1254     }
1255 
1256     @Benchmark
1257     public void XORMaskedLanes(Blackhole bh) {
1258         long[] as = fa.apply(size);
1259         boolean[] ms = fm.apply(size);
1260         long r = 0;
1261         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1262             r = 0;
1263             for (int i = 0; i < as.length; i++) {
1264                 if (ms[i % ms.length])
1265                     r ^= as[i];
1266             }
1267         }
1268         bh.consume(r);
1269     }
1270 
1271     @Benchmark
1272     public void ADDLanes(Blackhole bh) {
1273         long[] as = fa.apply(size);
1274         long r = 0;
1275         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1276             r = 0;
1277             for (int i = 0; i < as.length; i++) {
1278                 r += as[i];
1279             }
1280         }
1281         bh.consume(r);
1282     }
1283 
1284     @Benchmark
1285     public void ADDMaskedLanes(Blackhole bh) {
1286         long[] as = fa.apply(size);
1287         boolean[] ms = fm.apply(size);
1288         long r = 0;
1289         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1290             r = 0;
1291             for (int i = 0; i < as.length; i++) {
1292                 if (ms[i % ms.length])
1293                     r += as[i];
1294             }
1295         }
1296         bh.consume(r);
1297     }
1298 
1299     @Benchmark
1300     public void MULLanes(Blackhole bh) {
1301         long[] as = fa.apply(size);
1302         long r = 1;
1303         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1304             r = 1;
1305             for (int i = 0; i < as.length; i++) {
1306                 r *= as[i];
1307             }
1308         }
1309         bh.consume(r);
1310     }
1311 
1312     @Benchmark
1313     public void MULMaskedLanes(Blackhole bh) {
1314         long[] as = fa.apply(size);
1315         boolean[] ms = fm.apply(size);
1316         long r = 1;
1317         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1318             r = 1;
1319             for (int i = 0; i < as.length; i++) {
1320                 if (ms[i % ms.length])
1321                     r *= as[i];
1322             }
1323         }
1324         bh.consume(r);
1325     }
1326 
1327     @Benchmark
1328     public void anyTrue(Blackhole bh) {
1329         boolean[] ms = fm.apply(size);
1330         boolean r = false;
1331         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1332             r = false;
1333             for (int i = 0; i < ms.length; i++) {
1334                 r |= ms[i];
1335             }
1336         }
1337         bh.consume(r);
1338     }
1339 
1340     @Benchmark
1341     public void allTrue(Blackhole bh) {
1342         boolean[] ms = fm.apply(size);
1343         boolean r = true;
1344         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1345             r = true;
1346             for (int i = 0; i < ms.length; i++) {
1347                 r &= ms[i];
1348             }
1349         }
1350         bh.consume(r);
1351     }
1352 
1353     @Benchmark
1354     public void IS_DEFAULT(Blackhole bh) {
1355         long[] as = fa.apply(size);
1356         boolean r = true;
1357 
1358         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1359             for (int i = 0; i < as.length; i++) {
1360                 long a = as[i];
1361                 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation
1362             }
1363         }
1364 
1365         bh.consume(r);
1366     }
1367 
1368     @Benchmark
1369     public void IS_NEGATIVE(Blackhole bh) {
1370         long[] as = fa.apply(size);
1371         boolean r = true;
1372 
1373         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1374             for (int i = 0; i < as.length; i++) {
1375                 long a = as[i];
1376                 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation
1377             }
1378         }
1379 
1380         bh.consume(r);
1381     }
1382 
1383     @Benchmark
1384     public void LT(Blackhole bh) {
1385         long[] as = fa.apply(size);
1386         long[] bs = fb.apply(size);
1387         boolean r = true;
1388 
1389         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1390             for (int i = 0; i < as.length; i++) {
1391                 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1392             }
1393         }
1394 
1395         bh.consume(r);
1396     }
1397 
1398     @Benchmark
1399     public void GT(Blackhole bh) {
1400         long[] as = fa.apply(size);
1401         long[] bs = fb.apply(size);
1402         boolean r = true;
1403 
1404         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1405             for (int i = 0; i < as.length; i++) {
1406                 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1407             }
1408         }
1409 
1410         bh.consume(r);
1411     }
1412 
1413     @Benchmark
1414     public void EQ(Blackhole bh) {
1415         long[] as = fa.apply(size);
1416         long[] bs = fb.apply(size);
1417         boolean r = true;
1418 
1419         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1420             for (int i = 0; i < as.length; i++) {
1421                 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1422             }
1423         }
1424 
1425         bh.consume(r);
1426     }
1427 
1428     @Benchmark
1429     public void NE(Blackhole bh) {
1430         long[] as = fa.apply(size);
1431         long[] bs = fb.apply(size);
1432         boolean r = true;
1433 
1434         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1435             for (int i = 0; i < as.length; i++) {
1436                 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1437             }
1438         }
1439 
1440         bh.consume(r);
1441     }
1442 
1443     @Benchmark
1444     public void LE(Blackhole bh) {
1445         long[] as = fa.apply(size);
1446         long[] bs = fb.apply(size);
1447         boolean r = true;
1448 
1449         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1450             for (int i = 0; i < as.length; i++) {
1451                 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1452             }
1453         }
1454 
1455         bh.consume(r);
1456     }
1457 
1458     @Benchmark
1459     public void GE(Blackhole bh) {
1460         long[] as = fa.apply(size);
1461         long[] bs = fb.apply(size);
1462         boolean r = true;
1463 
1464         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1465             for (int i = 0; i < as.length; i++) {
1466                 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1467             }
1468         }
1469 
1470         bh.consume(r);
1471     }
1472 
1473     @Benchmark
1474     public void ULT(Blackhole bh) {
1475         long[] as = fa.apply(size);
1476         long[] bs = fb.apply(size);
1477         boolean r = true;
1478 
1479         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1480             for (int i = 0; i < as.length; i++) {
1481                 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1482             }
1483         }
1484 
1485         bh.consume(r);
1486     }
1487 
1488     @Benchmark
1489     public void UGT(Blackhole bh) {
1490         long[] as = fa.apply(size);
1491         long[] bs = fb.apply(size);
1492         boolean r = true;
1493 
1494         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1495             for (int i = 0; i < as.length; i++) {
1496                 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1497             }
1498         }
1499 
1500         bh.consume(r);
1501     }
1502 
1503     @Benchmark
1504     public void ULE(Blackhole bh) {
1505         long[] as = fa.apply(size);
1506         long[] bs = fb.apply(size);
1507         boolean r = true;
1508 
1509         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1510             for (int i = 0; i < as.length; i++) {
1511                 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1512             }
1513         }
1514 
1515         bh.consume(r);
1516     }
1517 
1518     @Benchmark
1519     public void UGE(Blackhole bh) {
1520         long[] as = fa.apply(size);
1521         long[] bs = fb.apply(size);
1522         boolean r = true;
1523 
1524         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1525             for (int i = 0; i < as.length; i++) {
1526                 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1527             }
1528         }
1529 
1530         bh.consume(r);
1531     }
1532 
1533     @Benchmark
1534     public void blend(Blackhole bh) {
1535         long[] as = fa.apply(size);
1536         long[] bs = fb.apply(size);
1537         long[] rs = fr.apply(size);
1538         boolean[] ms = fm.apply(size);
1539 
1540         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1541             for (int i = 0; i < as.length; i++) {
1542                 long a = as[i];
1543                 long b = bs[i];
1544                 boolean m = ms[i % ms.length];
1545                 rs[i] = (m ? b : a);
1546             }
1547         }
1548 
1549         bh.consume(rs);
1550     }
1551 
1552     void rearrangeShared(int window, Blackhole bh) {
1553         long[] as = fa.apply(size);
1554         int[] order = fs.apply(size);
1555         long[] rs = fr.apply(size);
1556 
1557         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1558             for (int i = 0; i < as.length; i += window) {
1559                 for (int j = 0; j < window; j++) {
1560                     long a = as[i+j];
1561                     int pos = order[j];
1562                     rs[i + pos] = a;
1563                 }
1564             }
1565         }
1566 
1567         bh.consume(rs);
1568     }
1569 
1570     @Benchmark
1571     public void rearrange064(Blackhole bh) {
1572         int window = 64 / Long.SIZE;
1573         rearrangeShared(window, bh);
1574     }
1575 
1576     @Benchmark
1577     public void rearrange128(Blackhole bh) {
1578         int window = 128 / Long.SIZE;
1579         rearrangeShared(window, bh);
1580     }
1581 
1582     @Benchmark
1583     public void rearrange256(Blackhole bh) {
1584         int window = 256 / Long.SIZE;
1585         rearrangeShared(window, bh);
1586     }
1587 
1588     @Benchmark
1589     public void rearrange512(Blackhole bh) {
1590         int window = 512 / Long.SIZE;
1591         rearrangeShared(window, bh);
1592     }
1593 
1594     @Benchmark
1595     public void compressScalar(Blackhole bh) {
1596         long[] as = fa.apply(size);
1597         long[] rs = new long[size];
1598         boolean[] im = fmt.apply(size);
1599 
1600         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1601             for (int i = 0, j = 0; i < as.length; i++) {
1602                 if (im[i]) {
1603                     rs[j++] = as[i];
1604                 }
1605             }
1606         }
1607 
1608         bh.consume(rs);
1609     }
1610 
1611     @Benchmark
1612     public void expandScalar(Blackhole bh) {
1613         long[] as = fa.apply(size);
1614         long[] rs = new long[size];
1615         boolean[] im = fmt.apply(size);
1616 
1617         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1618             for (int i = 0, j = 0; i < as.length; i++) {
1619                 if (im[i]) {
1620                     rs[i++] = as[j++];
1621                 }
1622             }
1623         }
1624 
1625         bh.consume(rs);
1626     }
1627 
1628     @Benchmark
1629     public void maskCompressScalar(Blackhole bh) {
1630         boolean[] im = fmt.apply(size);
1631         boolean[] rm = new boolean[size];
1632 
1633         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1634             for (int i = 0, j = 0; i < im.length; i++) {
1635                 if (im[i]) {
1636                     rm[j++] = im[i];
1637                 }
1638             }
1639         }
1640 
1641         bh.consume(rm);
1642     }
1643 
1644     void broadcastShared(int window, Blackhole bh) {
1645         long[] as = fa.apply(size);
1646         long[] rs = fr.apply(size);
1647 
1648         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1649             for (int i = 0; i < as.length; i += window) {
1650                 int idx = i;
1651                 for (int j = 0; j < window; j++) {
1652                     rs[j] = as[idx];
1653                 }
1654             }
1655         }
1656 
1657         bh.consume(rs);
1658     }
1659 
1660     @Benchmark
1661     public void broadcast064(Blackhole bh) {
1662         int window = 64 / Long.SIZE;
1663         broadcastShared(window, bh);
1664     }
1665 
1666     @Benchmark
1667     public void broadcast128(Blackhole bh) {
1668         int window = 128 / Long.SIZE;
1669         broadcastShared(window, bh);
1670     }
1671 
1672     @Benchmark
1673     public void broadcast256(Blackhole bh) {
1674         int window = 256 / Long.SIZE;
1675         broadcastShared(window, bh);
1676     }
1677 
1678     @Benchmark
1679     public void broadcast512(Blackhole bh) {
1680         int window = 512 / Long.SIZE;
1681         broadcastShared(window, bh);
1682     }
1683 
1684     @Benchmark
1685     public void zero(Blackhole bh) {
1686         long[] as = fa.apply(size);
1687 
1688         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1689             for (int i = 0; i < as.length; i++) {
1690                 as[i] = (long)0;
1691             }
1692         }
1693 
1694         bh.consume(as);
1695     }
1696 
1697     @Benchmark
1698     public void BITWISE_BLEND(Blackhole bh) {
1699         long[] as = fa.apply(size);
1700         long[] bs = fb.apply(size);
1701         long[] cs = fc.apply(size);
1702         long[] rs = fr.apply(size);
1703 
1704         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1705             for (int i = 0; i < as.length; i++) {
1706                 long a = as[i];
1707                 long b = bs[i];
1708                 long c = cs[i];
1709                 rs[i] = (long)((a&~(c))|(b&c));
1710             }
1711         }
1712 
1713         bh.consume(rs);
1714     }
1715 
1716     @Benchmark
1717     public void BITWISE_BLENDMasked(Blackhole bh) {
1718         long[] as = fa.apply(size);
1719         long[] bs = fb.apply(size);
1720         long[] cs = fc.apply(size);
1721         long[] rs = fr.apply(size);
1722         boolean[] ms = fm.apply(size);
1723 
1724         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1725             for (int i = 0; i < as.length; i++) {
1726                 long a = as[i];
1727                 long b = bs[i];
1728                 long c = cs[i];
1729                 if (ms[i % ms.length]) {
1730                     rs[i] = (long)((a&~(c))|(b&c));
1731                 } else {
1732                     rs[i] = a;
1733                 }
1734             }
1735         }
1736         bh.consume(rs);
1737     }
1738     @Benchmark
1739     public void NEG(Blackhole bh) {
1740         long[] as = fa.apply(size);
1741         long[] rs = fr.apply(size);
1742 
1743         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1744             for (int i = 0; i < as.length; i++) {
1745                 long a = as[i];
1746                 rs[i] = (long)(-((long)a));
1747             }
1748         }
1749 
1750         bh.consume(rs);
1751     }
1752 
1753     @Benchmark
1754     public void NEGMasked(Blackhole bh) {
1755         long[] as = fa.apply(size);
1756         long[] rs = fr.apply(size);
1757         boolean[] ms = fm.apply(size);
1758 
1759         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1760             for (int i = 0; i < as.length; i++) {
1761                 long a = as[i];
1762                 boolean m = ms[i % ms.length];
1763                 rs[i] = (m ? (long)(-((long)a)) : a);
1764             }
1765         }
1766 
1767         bh.consume(rs);
1768     }
1769     @Benchmark
1770     public void ABS(Blackhole bh) {
1771         long[] as = fa.apply(size);
1772         long[] rs = fr.apply(size);
1773 
1774         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1775             for (int i = 0; i < as.length; i++) {
1776                 long a = as[i];
1777                 rs[i] = (long)(Math.abs((long)a));
1778             }
1779         }
1780 
1781         bh.consume(rs);
1782     }
1783 
1784     @Benchmark
1785     public void ABSMasked(Blackhole bh) {
1786         long[] as = fa.apply(size);
1787         long[] rs = fr.apply(size);
1788         boolean[] ms = fm.apply(size);
1789 
1790         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1791             for (int i = 0; i < as.length; i++) {
1792                 long a = as[i];
1793                 boolean m = ms[i % ms.length];
1794                 rs[i] = (m ? (long)(Math.abs((long)a)) : a);
1795             }
1796         }
1797 
1798         bh.consume(rs);
1799     }
1800     @Benchmark
1801     public void NOT(Blackhole bh) {
1802         long[] as = fa.apply(size);
1803         long[] rs = fr.apply(size);
1804 
1805         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1806             for (int i = 0; i < as.length; i++) {
1807                 long a = as[i];
1808                 rs[i] = (long)(~((long)a));
1809             }
1810         }
1811 
1812         bh.consume(rs);
1813     }
1814 
1815     @Benchmark
1816     public void NOTMasked(Blackhole bh) {
1817         long[] as = fa.apply(size);
1818         long[] rs = fr.apply(size);
1819         boolean[] ms = fm.apply(size);
1820 
1821         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1822             for (int i = 0; i < as.length; i++) {
1823                 long a = as[i];
1824                 boolean m = ms[i % ms.length];
1825                 rs[i] = (m ? (long)(~((long)a)) : a);
1826             }
1827         }
1828 
1829         bh.consume(rs);
1830     }
1831     @Benchmark
1832     public void ZOMO(Blackhole bh) {
1833         long[] as = fa.apply(size);
1834         long[] rs = fr.apply(size);
1835 
1836         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1837             for (int i = 0; i < as.length; i++) {
1838                 long a = as[i];
1839                 rs[i] = (long)((a==0?0:-1));
1840             }
1841         }
1842 
1843         bh.consume(rs);
1844     }
1845 
1846     @Benchmark
1847     public void ZOMOMasked(Blackhole bh) {
1848         long[] as = fa.apply(size);
1849         long[] rs = fr.apply(size);
1850         boolean[] ms = fm.apply(size);
1851 
1852         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1853             for (int i = 0; i < as.length; i++) {
1854                 long a = as[i];
1855                 boolean m = ms[i % ms.length];
1856                 rs[i] = (m ? (long)((a==0?0:-1)) : a);
1857             }
1858         }
1859 
1860         bh.consume(rs);
1861     }
1862     @Benchmark
1863     public void BIT_COUNT(Blackhole bh) {
1864         long[] as = fa.apply(size);
1865         long[] rs = fr.apply(size);
1866 
1867         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1868             for (int i = 0; i < as.length; i++) {
1869                 long a = as[i];
1870                 rs[i] = (long)(Long.bitCount(a));
1871             }
1872         }
1873 
1874         bh.consume(rs);
1875     }
1876 
1877     @Benchmark
1878     public void BIT_COUNTMasked(Blackhole bh) {
1879         long[] as = fa.apply(size);
1880         long[] rs = fr.apply(size);
1881         boolean[] ms = fm.apply(size);
1882 
1883         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1884             for (int i = 0; i < as.length; i++) {
1885                 long a = as[i];
1886                 boolean m = ms[i % ms.length];
1887                 rs[i] = (m ? (long)(Long.bitCount(a)) : a);
1888             }
1889         }
1890 
1891         bh.consume(rs);
1892     }
1893     @Benchmark
1894     public void TRAILING_ZEROS_COUNT(Blackhole bh) {
1895         long[] as = fa.apply(size);
1896         long[] rs = fr.apply(size);
1897 
1898         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1899             for (int i = 0; i < as.length; i++) {
1900                 long a = as[i];
1901                 rs[i] = (long)(TRAILING_ZEROS_COUNT_scalar(a));
1902             }
1903         }
1904 
1905         bh.consume(rs);
1906     }
1907 
1908     @Benchmark
1909     public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) {
1910         long[] as = fa.apply(size);
1911         long[] rs = fr.apply(size);
1912         boolean[] ms = fm.apply(size);
1913 
1914         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1915             for (int i = 0; i < as.length; i++) {
1916                 long a = as[i];
1917                 boolean m = ms[i % ms.length];
1918                 rs[i] = (m ? (long)(TRAILING_ZEROS_COUNT_scalar(a)) : a);
1919             }
1920         }
1921 
1922         bh.consume(rs);
1923     }
1924     @Benchmark
1925     public void LEADING_ZEROS_COUNT(Blackhole bh) {
1926         long[] as = fa.apply(size);
1927         long[] rs = fr.apply(size);
1928 
1929         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1930             for (int i = 0; i < as.length; i++) {
1931                 long a = as[i];
1932                 rs[i] = (long)(LEADING_ZEROS_COUNT_scalar(a));
1933             }
1934         }
1935 
1936         bh.consume(rs);
1937     }
1938 
1939     @Benchmark
1940     public void LEADING_ZEROS_COUNTMasked(Blackhole bh) {
1941         long[] as = fa.apply(size);
1942         long[] rs = fr.apply(size);
1943         boolean[] ms = fm.apply(size);
1944 
1945         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1946             for (int i = 0; i < as.length; i++) {
1947                 long a = as[i];
1948                 boolean m = ms[i % ms.length];
1949                 rs[i] = (m ? (long)(LEADING_ZEROS_COUNT_scalar(a)) : a);
1950             }
1951         }
1952 
1953         bh.consume(rs);
1954     }
1955     @Benchmark
1956     public void REVERSE(Blackhole bh) {
1957         long[] as = fa.apply(size);
1958         long[] rs = fr.apply(size);
1959 
1960         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1961             for (int i = 0; i < as.length; i++) {
1962                 long a = as[i];
1963                 rs[i] = (long)(REVERSE_scalar(a));
1964             }
1965         }
1966 
1967         bh.consume(rs);
1968     }
1969 
1970     @Benchmark
1971     public void REVERSEMasked(Blackhole bh) {
1972         long[] as = fa.apply(size);
1973         long[] rs = fr.apply(size);
1974         boolean[] ms = fm.apply(size);
1975 
1976         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1977             for (int i = 0; i < as.length; i++) {
1978                 long a = as[i];
1979                 boolean m = ms[i % ms.length];
1980                 rs[i] = (m ? (long)(REVERSE_scalar(a)) : a);
1981             }
1982         }
1983 
1984         bh.consume(rs);
1985     }
1986     @Benchmark
1987     public void REVERSE_BYTES(Blackhole bh) {
1988         long[] as = fa.apply(size);
1989         long[] rs = fr.apply(size);
1990 
1991         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1992             for (int i = 0; i < as.length; i++) {
1993                 long a = as[i];
1994                 rs[i] = (long)(Long.reverseBytes(a));
1995             }
1996         }
1997 
1998         bh.consume(rs);
1999     }
2000 
2001     @Benchmark
2002     public void REVERSE_BYTESMasked(Blackhole bh) {
2003         long[] as = fa.apply(size);
2004         long[] rs = fr.apply(size);
2005         boolean[] ms = fm.apply(size);
2006 
2007         for (int ic = 0; ic < INVOC_COUNT; ic++) {
2008             for (int i = 0; i < as.length; i++) {
2009                 long a = as[i];
2010                 boolean m = ms[i % ms.length];
2011                 rs[i] = (m ? (long)(Long.reverseBytes(a)) : a);
2012             }
2013         }
2014 
2015         bh.consume(rs);
2016     }
2017 }