1 /*
   2  * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import java.util.concurrent.TimeUnit;
  29 import java.util.function.IntFunction;
  30 
  31 import org.openjdk.jmh.annotations.*;
  32 import org.openjdk.jmh.infra.Blackhole;
  33 
  34 @BenchmarkMode(Mode.Throughput)
  35 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  36 @State(Scope.Benchmark)
  37 @Warmup(iterations = 3, time = 1)
  38 @Measurement(iterations = 5, time = 1)
  39 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  40 public class LongScalar extends AbstractVectorBenchmark {
  41     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  42 
  43     private static final long CONST_SHIFT = Long.SIZE / 2;
  44 
  45     @Param("1024")
  46     int size;
  47 
  48     long[] fill(IntFunction<Long> f) {
  49         long[] array = new long[size];
  50         for (int i = 0; i < array.length; i++) {
  51             array[i] = f.apply(i);
  52         }
  53         return array;
  54     }
  55 
  56     static long bits(long e) {
  57         return e;
  58     }
  59 
  60     long[] as, bs, cs, rs;
  61     boolean[] ms, mt, rms;
  62     int[] ss;
  63 
  64     @Setup
  65     public void init() {
  66         as = fill(i -> (long)(2*i));
  67         bs = fill(i -> (long)(i+1));
  68         cs = fill(i -> (long)(i+5));
  69         rs = fill(i -> (long)0);
  70         ms = fillMask(size, i -> (i % 2) == 0);
  71         mt = fillMask(size, i -> true);
  72         rms = fillMask(size, i -> false);
  73 
  74         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  75     }
  76 
  77     final IntFunction<long[]> fa = vl -> as;
  78     final IntFunction<long[]> fb = vl -> bs;
  79     final IntFunction<long[]> fc = vl -> cs;
  80     final IntFunction<long[]> fr = vl -> rs;
  81     final IntFunction<boolean[]> fm = vl -> ms;
  82     final IntFunction<boolean[]> fmt = vl -> mt;
  83     final IntFunction<boolean[]> fmr = vl -> rms;
  84     final IntFunction<int[]> fs = vl -> ss;
  85 
  86     static boolean eq(long a, long b) {
  87         return a == b;
  88     }
  89 
  90     static boolean neq(long a, long b) {
  91         return a != b;
  92     }
  93 
  94     static boolean lt(long a, long b) {
  95         return a < b;
  96     }
  97 
  98     static boolean le(long a, long b) {
  99         return a <= b;
 100     }
 101 
 102     static boolean gt(long a, long b) {
 103         return a > b;
 104     }
 105 
 106     static boolean ge(long a, long b) {
 107         return a >= b;
 108     }
 109 
 110     static boolean ult(long a, long b) {
 111         return Long.compareUnsigned(a, b) < 0;
 112     }
 113 
 114     static boolean ule(long a, long b) {
 115         return Long.compareUnsigned(a, b) <= 0;
 116     }
 117 
 118     static boolean ugt(long a, long b) {
 119         return Long.compareUnsigned(a, b) > 0;
 120     }
 121 
 122     static boolean uge(long a, long b) {
 123         return Long.compareUnsigned(a, b) >= 0;
 124     }
 125 
 126     static long ROL_scalar(long a, long b) {
 127         return Long.rotateLeft(a, ((int)b));
 128     }
 129 
 130     static long ROR_scalar(long a, long b) {
 131         return Long.rotateRight(a, ((int)b));
 132     }
 133 
 134     static long TRAILING_ZEROS_COUNT_scalar(long a) {
 135         return Long.numberOfTrailingZeros(a);
 136     }
 137 
 138     static long LEADING_ZEROS_COUNT_scalar(long a) {
 139         return Long.numberOfLeadingZeros(a);
 140     }
 141 
 142     static long REVERSE_scalar(long a) {
 143         return Long.reverse(a);
 144     }
 145 
 146     @Benchmark
 147     public void ADD(Blackhole bh) {
 148         long[] as = fa.apply(size);
 149         long[] bs = fb.apply(size);
 150         long[] rs = fr.apply(size);
 151 
 152         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 153             for (int i = 0; i < as.length; i++) {
 154                 long a = as[i];
 155                 long b = bs[i];
 156                 rs[i] = (long)(a + b);
 157             }
 158         }
 159 
 160         bh.consume(rs);
 161     }
 162 
 163     @Benchmark
 164     public void ADDMasked(Blackhole bh) {
 165         long[] as = fa.apply(size);
 166         long[] bs = fb.apply(size);
 167         long[] rs = fr.apply(size);
 168         boolean[] ms = fm.apply(size);
 169 
 170         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 171             for (int i = 0; i < as.length; i++) {
 172                 long a = as[i];
 173                 long b = bs[i];
 174                 if (ms[i % ms.length]) {
 175                     rs[i] = (long)(a + b);
 176                 } else {
 177                     rs[i] = a;
 178                 }
 179             }
 180         }
 181         bh.consume(rs);
 182     }
 183 
 184     @Benchmark
 185     public void SUB(Blackhole bh) {
 186         long[] as = fa.apply(size);
 187         long[] bs = fb.apply(size);
 188         long[] rs = fr.apply(size);
 189 
 190         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 191             for (int i = 0; i < as.length; i++) {
 192                 long a = as[i];
 193                 long b = bs[i];
 194                 rs[i] = (long)(a - b);
 195             }
 196         }
 197 
 198         bh.consume(rs);
 199     }
 200 
 201     @Benchmark
 202     public void SUBMasked(Blackhole bh) {
 203         long[] as = fa.apply(size);
 204         long[] bs = fb.apply(size);
 205         long[] rs = fr.apply(size);
 206         boolean[] ms = fm.apply(size);
 207 
 208         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 209             for (int i = 0; i < as.length; i++) {
 210                 long a = as[i];
 211                 long b = bs[i];
 212                 if (ms[i % ms.length]) {
 213                     rs[i] = (long)(a - b);
 214                 } else {
 215                     rs[i] = a;
 216                 }
 217             }
 218         }
 219         bh.consume(rs);
 220     }
 221 
 222     @Benchmark
 223     public void MUL(Blackhole bh) {
 224         long[] as = fa.apply(size);
 225         long[] bs = fb.apply(size);
 226         long[] rs = fr.apply(size);
 227 
 228         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 229             for (int i = 0; i < as.length; i++) {
 230                 long a = as[i];
 231                 long b = bs[i];
 232                 rs[i] = (long)(a * b);
 233             }
 234         }
 235 
 236         bh.consume(rs);
 237     }
 238 
 239     @Benchmark
 240     public void MULMasked(Blackhole bh) {
 241         long[] as = fa.apply(size);
 242         long[] bs = fb.apply(size);
 243         long[] rs = fr.apply(size);
 244         boolean[] ms = fm.apply(size);
 245 
 246         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 247             for (int i = 0; i < as.length; i++) {
 248                 long a = as[i];
 249                 long b = bs[i];
 250                 if (ms[i % ms.length]) {
 251                     rs[i] = (long)(a * b);
 252                 } else {
 253                     rs[i] = a;
 254                 }
 255             }
 256         }
 257         bh.consume(rs);
 258     }
 259 
 260     @Benchmark
 261     public void FIRST_NONZERO(Blackhole bh) {
 262         long[] as = fa.apply(size);
 263         long[] bs = fb.apply(size);
 264         long[] rs = fr.apply(size);
 265 
 266         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 267             for (int i = 0; i < as.length; i++) {
 268                 long a = as[i];
 269                 long b = bs[i];
 270                 rs[i] = (long)((a)!=0?a:b);
 271             }
 272         }
 273 
 274         bh.consume(rs);
 275     }
 276 
 277     @Benchmark
 278     public void FIRST_NONZEROMasked(Blackhole bh) {
 279         long[] as = fa.apply(size);
 280         long[] bs = fb.apply(size);
 281         long[] rs = fr.apply(size);
 282         boolean[] ms = fm.apply(size);
 283 
 284         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 285             for (int i = 0; i < as.length; i++) {
 286                 long a = as[i];
 287                 long b = bs[i];
 288                 if (ms[i % ms.length]) {
 289                     rs[i] = (long)((a)!=0?a:b);
 290                 } else {
 291                     rs[i] = a;
 292                 }
 293             }
 294         }
 295         bh.consume(rs);
 296     }
 297 
 298     @Benchmark
 299     public void AND(Blackhole bh) {
 300         long[] as = fa.apply(size);
 301         long[] bs = fb.apply(size);
 302         long[] rs = fr.apply(size);
 303 
 304         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 305             for (int i = 0; i < as.length; i++) {
 306                 long a = as[i];
 307                 long b = bs[i];
 308                 rs[i] = (long)(a & b);
 309             }
 310         }
 311 
 312         bh.consume(rs);
 313     }
 314 
 315     @Benchmark
 316     public void ANDMasked(Blackhole bh) {
 317         long[] as = fa.apply(size);
 318         long[] bs = fb.apply(size);
 319         long[] rs = fr.apply(size);
 320         boolean[] ms = fm.apply(size);
 321 
 322         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 323             for (int i = 0; i < as.length; i++) {
 324                 long a = as[i];
 325                 long b = bs[i];
 326                 if (ms[i % ms.length]) {
 327                     rs[i] = (long)(a & b);
 328                 } else {
 329                     rs[i] = a;
 330                 }
 331             }
 332         }
 333         bh.consume(rs);
 334     }
 335 
 336     @Benchmark
 337     public void AND_NOT(Blackhole bh) {
 338         long[] as = fa.apply(size);
 339         long[] bs = fb.apply(size);
 340         long[] rs = fr.apply(size);
 341 
 342         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 343             for (int i = 0; i < as.length; i++) {
 344                 long a = as[i];
 345                 long b = bs[i];
 346                 rs[i] = (long)(a & ~b);
 347             }
 348         }
 349 
 350         bh.consume(rs);
 351     }
 352 
 353     @Benchmark
 354     public void AND_NOTMasked(Blackhole bh) {
 355         long[] as = fa.apply(size);
 356         long[] bs = fb.apply(size);
 357         long[] rs = fr.apply(size);
 358         boolean[] ms = fm.apply(size);
 359 
 360         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 361             for (int i = 0; i < as.length; i++) {
 362                 long a = as[i];
 363                 long b = bs[i];
 364                 if (ms[i % ms.length]) {
 365                     rs[i] = (long)(a & ~b);
 366                 } else {
 367                     rs[i] = a;
 368                 }
 369             }
 370         }
 371         bh.consume(rs);
 372     }
 373 
 374     @Benchmark
 375     public void OR(Blackhole bh) {
 376         long[] as = fa.apply(size);
 377         long[] bs = fb.apply(size);
 378         long[] rs = fr.apply(size);
 379 
 380         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 381             for (int i = 0; i < as.length; i++) {
 382                 long a = as[i];
 383                 long b = bs[i];
 384                 rs[i] = (long)(a | b);
 385             }
 386         }
 387 
 388         bh.consume(rs);
 389     }
 390 
 391     @Benchmark
 392     public void ORMasked(Blackhole bh) {
 393         long[] as = fa.apply(size);
 394         long[] bs = fb.apply(size);
 395         long[] rs = fr.apply(size);
 396         boolean[] ms = fm.apply(size);
 397 
 398         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 399             for (int i = 0; i < as.length; i++) {
 400                 long a = as[i];
 401                 long b = bs[i];
 402                 if (ms[i % ms.length]) {
 403                     rs[i] = (long)(a | b);
 404                 } else {
 405                     rs[i] = a;
 406                 }
 407             }
 408         }
 409         bh.consume(rs);
 410     }
 411 
 412     @Benchmark
 413     public void XOR(Blackhole bh) {
 414         long[] as = fa.apply(size);
 415         long[] bs = fb.apply(size);
 416         long[] rs = fr.apply(size);
 417 
 418         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 419             for (int i = 0; i < as.length; i++) {
 420                 long a = as[i];
 421                 long b = bs[i];
 422                 rs[i] = (long)(a ^ b);
 423             }
 424         }
 425 
 426         bh.consume(rs);
 427     }
 428 
 429     @Benchmark
 430     public void XORMasked(Blackhole bh) {
 431         long[] as = fa.apply(size);
 432         long[] bs = fb.apply(size);
 433         long[] rs = fr.apply(size);
 434         boolean[] ms = fm.apply(size);
 435 
 436         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 437             for (int i = 0; i < as.length; i++) {
 438                 long a = as[i];
 439                 long b = bs[i];
 440                 if (ms[i % ms.length]) {
 441                     rs[i] = (long)(a ^ b);
 442                 } else {
 443                     rs[i] = a;
 444                 }
 445             }
 446         }
 447         bh.consume(rs);
 448     }
 449 
 450     @Benchmark
 451     public void COMPRESS_BITS(Blackhole bh) {
 452         long[] as = fa.apply(size);
 453         long[] bs = fb.apply(size);
 454         long[] rs = fr.apply(size);
 455 
 456         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 457             for (int i = 0; i < as.length; i++) {
 458                 long a = as[i];
 459                 long b = bs[i];
 460                 rs[i] = (long)(Long.compress(a, b));
 461             }
 462         }
 463 
 464         bh.consume(rs);
 465     }
 466 
 467     @Benchmark
 468     public void COMPRESS_BITSMasked(Blackhole bh) {
 469         long[] as = fa.apply(size);
 470         long[] bs = fb.apply(size);
 471         long[] rs = fr.apply(size);
 472         boolean[] ms = fm.apply(size);
 473 
 474         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 475             for (int i = 0; i < as.length; i++) {
 476                 long a = as[i];
 477                 long b = bs[i];
 478                 if (ms[i % ms.length]) {
 479                     rs[i] = (long)(Long.compress(a, b));
 480                 } else {
 481                     rs[i] = a;
 482                 }
 483             }
 484         }
 485         bh.consume(rs);
 486     }
 487 
 488     @Benchmark
 489     public void EXPAND_BITS(Blackhole bh) {
 490         long[] as = fa.apply(size);
 491         long[] bs = fb.apply(size);
 492         long[] rs = fr.apply(size);
 493 
 494         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 495             for (int i = 0; i < as.length; i++) {
 496                 long a = as[i];
 497                 long b = bs[i];
 498                 rs[i] = (long)(Long.expand(a, b));
 499             }
 500         }
 501 
 502         bh.consume(rs);
 503     }
 504 
 505     @Benchmark
 506     public void EXPAND_BITSMasked(Blackhole bh) {
 507         long[] as = fa.apply(size);
 508         long[] bs = fb.apply(size);
 509         long[] rs = fr.apply(size);
 510         boolean[] ms = fm.apply(size);
 511 
 512         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 513             for (int i = 0; i < as.length; i++) {
 514                 long a = as[i];
 515                 long b = bs[i];
 516                 if (ms[i % ms.length]) {
 517                     rs[i] = (long)(Long.expand(a, b));
 518                 } else {
 519                     rs[i] = a;
 520                 }
 521             }
 522         }
 523         bh.consume(rs);
 524     }
 525 
 526     @Benchmark
 527     public void LSHL(Blackhole bh) {
 528         long[] as = fa.apply(size);
 529         long[] bs = fb.apply(size);
 530         long[] rs = fr.apply(size);
 531 
 532         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 533             for (int i = 0; i < as.length; i++) {
 534                 long a = as[i];
 535                 long b = bs[i];
 536                 rs[i] = (long)((a << b));
 537             }
 538         }
 539 
 540         bh.consume(rs);
 541     }
 542 
 543     @Benchmark
 544     public void LSHLMasked(Blackhole bh) {
 545         long[] as = fa.apply(size);
 546         long[] bs = fb.apply(size);
 547         long[] rs = fr.apply(size);
 548         boolean[] ms = fm.apply(size);
 549 
 550         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 551             for (int i = 0; i < as.length; i++) {
 552                 long a = as[i];
 553                 long b = bs[i];
 554                 if (ms[i % ms.length]) {
 555                     rs[i] = (long)((a << b));
 556                 } else {
 557                     rs[i] = a;
 558                 }
 559             }
 560         }
 561         bh.consume(rs);
 562     }
 563 
 564     @Benchmark
 565     public void ASHR(Blackhole bh) {
 566         long[] as = fa.apply(size);
 567         long[] bs = fb.apply(size);
 568         long[] rs = fr.apply(size);
 569 
 570         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 571             for (int i = 0; i < as.length; i++) {
 572                 long a = as[i];
 573                 long b = bs[i];
 574                 rs[i] = (long)((a >> b));
 575             }
 576         }
 577 
 578         bh.consume(rs);
 579     }
 580 
 581     @Benchmark
 582     public void ASHRMasked(Blackhole bh) {
 583         long[] as = fa.apply(size);
 584         long[] bs = fb.apply(size);
 585         long[] rs = fr.apply(size);
 586         boolean[] ms = fm.apply(size);
 587 
 588         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 589             for (int i = 0; i < as.length; i++) {
 590                 long a = as[i];
 591                 long b = bs[i];
 592                 if (ms[i % ms.length]) {
 593                     rs[i] = (long)((a >> b));
 594                 } else {
 595                     rs[i] = a;
 596                 }
 597             }
 598         }
 599         bh.consume(rs);
 600     }
 601 
 602     @Benchmark
 603     public void LSHR(Blackhole bh) {
 604         long[] as = fa.apply(size);
 605         long[] bs = fb.apply(size);
 606         long[] rs = fr.apply(size);
 607 
 608         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 609             for (int i = 0; i < as.length; i++) {
 610                 long a = as[i];
 611                 long b = bs[i];
 612                 rs[i] = (long)((a >>> b));
 613             }
 614         }
 615 
 616         bh.consume(rs);
 617     }
 618 
 619     @Benchmark
 620     public void LSHRMasked(Blackhole bh) {
 621         long[] as = fa.apply(size);
 622         long[] bs = fb.apply(size);
 623         long[] rs = fr.apply(size);
 624         boolean[] ms = fm.apply(size);
 625 
 626         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 627             for (int i = 0; i < as.length; i++) {
 628                 long a = as[i];
 629                 long b = bs[i];
 630                 if (ms[i % ms.length]) {
 631                     rs[i] = (long)((a >>> b));
 632                 } else {
 633                     rs[i] = a;
 634                 }
 635             }
 636         }
 637         bh.consume(rs);
 638     }
 639 
 640     @Benchmark
 641     public void LSHLShift(Blackhole bh) {
 642         long[] as = fa.apply(size);
 643         long[] bs = fb.apply(size);
 644         long[] rs = fr.apply(size);
 645 
 646         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 647             for (int i = 0; i < as.length; i++) {
 648                 long a = as[i];
 649                 long b = bs[i];
 650                 rs[i] = (long)((a << b));
 651             }
 652         }
 653 
 654         bh.consume(rs);
 655     }
 656 
 657     @Benchmark
 658     public void LSHLMaskedShift(Blackhole bh) {
 659         long[] as = fa.apply(size);
 660         long[] bs = fb.apply(size);
 661         long[] rs = fr.apply(size);
 662         boolean[] ms = fm.apply(size);
 663 
 664         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 665             for (int i = 0; i < as.length; i++) {
 666                 long a = as[i];
 667                 long b = bs[i];
 668                 boolean m = ms[i % ms.length];
 669                 rs[i] = (m ? (long)((a << b)) : a);
 670             }
 671         }
 672 
 673         bh.consume(rs);
 674     }
 675 
 676     @Benchmark
 677     public void LSHRShift(Blackhole bh) {
 678         long[] as = fa.apply(size);
 679         long[] bs = fb.apply(size);
 680         long[] rs = fr.apply(size);
 681 
 682         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 683             for (int i = 0; i < as.length; i++) {
 684                 long a = as[i];
 685                 long b = bs[i];
 686                 rs[i] = (long)((a >>> b));
 687             }
 688         }
 689 
 690         bh.consume(rs);
 691     }
 692 
 693     @Benchmark
 694     public void LSHRMaskedShift(Blackhole bh) {
 695         long[] as = fa.apply(size);
 696         long[] bs = fb.apply(size);
 697         long[] rs = fr.apply(size);
 698         boolean[] ms = fm.apply(size);
 699 
 700         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 701             for (int i = 0; i < as.length; i++) {
 702                 long a = as[i];
 703                 long b = bs[i];
 704                 boolean m = ms[i % ms.length];
 705                 rs[i] = (m ? (long)((a >>> b)) : a);
 706             }
 707         }
 708 
 709         bh.consume(rs);
 710     }
 711 
 712     @Benchmark
 713     public void ASHRShift(Blackhole bh) {
 714         long[] as = fa.apply(size);
 715         long[] bs = fb.apply(size);
 716         long[] rs = fr.apply(size);
 717 
 718         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 719             for (int i = 0; i < as.length; i++) {
 720                 long a = as[i];
 721                 long b = bs[i];
 722                 rs[i] = (long)((a >> b));
 723             }
 724         }
 725 
 726         bh.consume(rs);
 727     }
 728 
 729     @Benchmark
 730     public void ASHRMaskedShift(Blackhole bh) {
 731         long[] as = fa.apply(size);
 732         long[] bs = fb.apply(size);
 733         long[] rs = fr.apply(size);
 734         boolean[] ms = fm.apply(size);
 735 
 736         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 737             for (int i = 0; i < as.length; i++) {
 738                 long a = as[i];
 739                 long b = bs[i];
 740                 boolean m = ms[i % ms.length];
 741                 rs[i] = (m ? (long)((a >> b)) : a);
 742             }
 743         }
 744 
 745         bh.consume(rs);
 746     }
 747 
 748     @Benchmark
 749     public void ROR(Blackhole bh) {
 750         long[] as = fa.apply(size);
 751         long[] bs = fb.apply(size);
 752         long[] rs = fr.apply(size);
 753 
 754         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 755             for (int i = 0; i < as.length; i++) {
 756                 long a = as[i];
 757                 long b = bs[i];
 758                 rs[i] = (long)(ROR_scalar(a,b));
 759             }
 760         }
 761 
 762         bh.consume(rs);
 763     }
 764 
 765     @Benchmark
 766     public void RORMasked(Blackhole bh) {
 767         long[] as = fa.apply(size);
 768         long[] bs = fb.apply(size);
 769         long[] rs = fr.apply(size);
 770         boolean[] ms = fm.apply(size);
 771 
 772         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 773             for (int i = 0; i < as.length; i++) {
 774                 long a = as[i];
 775                 long b = bs[i];
 776                 if (ms[i % ms.length]) {
 777                     rs[i] = (long)(ROR_scalar(a,b));
 778                 } else {
 779                     rs[i] = a;
 780                 }
 781             }
 782         }
 783         bh.consume(rs);
 784     }
 785 
 786     @Benchmark
 787     public void ROL(Blackhole bh) {
 788         long[] as = fa.apply(size);
 789         long[] bs = fb.apply(size);
 790         long[] rs = fr.apply(size);
 791 
 792         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 793             for (int i = 0; i < as.length; i++) {
 794                 long a = as[i];
 795                 long b = bs[i];
 796                 rs[i] = (long)(ROL_scalar(a,b));
 797             }
 798         }
 799 
 800         bh.consume(rs);
 801     }
 802 
 803     @Benchmark
 804     public void ROLMasked(Blackhole bh) {
 805         long[] as = fa.apply(size);
 806         long[] bs = fb.apply(size);
 807         long[] rs = fr.apply(size);
 808         boolean[] ms = fm.apply(size);
 809 
 810         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 811             for (int i = 0; i < as.length; i++) {
 812                 long a = as[i];
 813                 long b = bs[i];
 814                 if (ms[i % ms.length]) {
 815                     rs[i] = (long)(ROL_scalar(a,b));
 816                 } else {
 817                     rs[i] = a;
 818                 }
 819             }
 820         }
 821         bh.consume(rs);
 822     }
 823 
 824     @Benchmark
 825     public void RORShift(Blackhole bh) {
 826         long[] as = fa.apply(size);
 827         long[] bs = fb.apply(size);
 828         long[] rs = fr.apply(size);
 829 
 830         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 831             for (int i = 0; i < as.length; i++) {
 832                 long a = as[i];
 833                 long b = bs[i];
 834                 rs[i] = (long)(ROR_scalar(a, b));
 835             }
 836         }
 837 
 838         bh.consume(rs);
 839     }
 840 
 841     @Benchmark
 842     public void RORMaskedShift(Blackhole bh) {
 843         long[] as = fa.apply(size);
 844         long[] bs = fb.apply(size);
 845         long[] rs = fr.apply(size);
 846         boolean[] ms = fm.apply(size);
 847 
 848         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 849             for (int i = 0; i < as.length; i++) {
 850                 long a = as[i];
 851                 long b = bs[i];
 852                 boolean m = ms[i % ms.length];
 853                 rs[i] = (m ? (long)(ROR_scalar(a, b)) : a);
 854             }
 855         }
 856 
 857         bh.consume(rs);
 858     }
 859 
 860     @Benchmark
 861     public void ROLShift(Blackhole bh) {
 862         long[] as = fa.apply(size);
 863         long[] bs = fb.apply(size);
 864         long[] rs = fr.apply(size);
 865 
 866         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 867             for (int i = 0; i < as.length; i++) {
 868                 long a = as[i];
 869                 long b = bs[i];
 870                 rs[i] = (long)(ROL_scalar(a, b));
 871             }
 872         }
 873 
 874         bh.consume(rs);
 875     }
 876 
 877     @Benchmark
 878     public void ROLMaskedShift(Blackhole bh) {
 879         long[] as = fa.apply(size);
 880         long[] bs = fb.apply(size);
 881         long[] rs = fr.apply(size);
 882         boolean[] ms = fm.apply(size);
 883 
 884         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 885             for (int i = 0; i < as.length; i++) {
 886                 long a = as[i];
 887                 long b = bs[i];
 888                 boolean m = ms[i % ms.length];
 889                 rs[i] = (m ? (long)(ROL_scalar(a, b)) : a);
 890             }
 891         }
 892 
 893         bh.consume(rs);
 894     }
 895 
 896     @Benchmark
 897     public void LSHRShiftConst(Blackhole bh) {
 898         long[] as = fa.apply(size);
 899         long[] bs = fb.apply(size);
 900         long[] rs = fr.apply(size);
 901 
 902         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 903             for (int i = 0; i < as.length; i++) {
 904                 long a = as[i];
 905                 long b = bs[i];
 906                 rs[i] = (long)((a >>> CONST_SHIFT));
 907             }
 908         }
 909 
 910         bh.consume(rs);
 911     }
 912 
 913     @Benchmark
 914     public void LSHRMaskedShiftConst(Blackhole bh) {
 915         long[] as = fa.apply(size);
 916         long[] bs = fb.apply(size);
 917         long[] rs = fr.apply(size);
 918         boolean[] ms = fm.apply(size);
 919 
 920         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 921             for (int i = 0; i < as.length; i++) {
 922                 long a = as[i];
 923                 long b = bs[i];
 924                 boolean m = ms[i % ms.length];
 925                 rs[i] = (m ? (long)((a >>> CONST_SHIFT)) : a);
 926             }
 927         }
 928 
 929         bh.consume(rs);
 930     }
 931 
 932     @Benchmark
 933     public void LSHLShiftConst(Blackhole bh) {
 934         long[] as = fa.apply(size);
 935         long[] bs = fb.apply(size);
 936         long[] rs = fr.apply(size);
 937 
 938         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 939             for (int i = 0; i < as.length; i++) {
 940                 long a = as[i];
 941                 long b = bs[i];
 942                 rs[i] = (long)((a << CONST_SHIFT));
 943             }
 944         }
 945 
 946         bh.consume(rs);
 947     }
 948 
 949     @Benchmark
 950     public void LSHLMaskedShiftConst(Blackhole bh) {
 951         long[] as = fa.apply(size);
 952         long[] bs = fb.apply(size);
 953         long[] rs = fr.apply(size);
 954         boolean[] ms = fm.apply(size);
 955 
 956         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 957             for (int i = 0; i < as.length; i++) {
 958                 long a = as[i];
 959                 long b = bs[i];
 960                 boolean m = ms[i % ms.length];
 961                 rs[i] = (m ? (long)((a << CONST_SHIFT)) : a);
 962             }
 963         }
 964 
 965         bh.consume(rs);
 966     }
 967 
 968     @Benchmark
 969     public void ASHRShiftConst(Blackhole bh) {
 970         long[] as = fa.apply(size);
 971         long[] bs = fb.apply(size);
 972         long[] rs = fr.apply(size);
 973 
 974         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 975             for (int i = 0; i < as.length; i++) {
 976                 long a = as[i];
 977                 long b = bs[i];
 978                 rs[i] = (long)((a >> CONST_SHIFT));
 979             }
 980         }
 981 
 982         bh.consume(rs);
 983     }
 984 
 985     @Benchmark
 986     public void ASHRMaskedShiftConst(Blackhole bh) {
 987         long[] as = fa.apply(size);
 988         long[] bs = fb.apply(size);
 989         long[] rs = fr.apply(size);
 990         boolean[] ms = fm.apply(size);
 991 
 992         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 993             for (int i = 0; i < as.length; i++) {
 994                 long a = as[i];
 995                 long b = bs[i];
 996                 boolean m = ms[i % ms.length];
 997                 rs[i] = (m ? (long)((a >> CONST_SHIFT)) : a);
 998             }
 999         }
1000 
1001         bh.consume(rs);
1002     }
1003 
1004     @Benchmark
1005     public void RORShiftConst(Blackhole bh) {
1006         long[] as = fa.apply(size);
1007         long[] bs = fb.apply(size);
1008         long[] rs = fr.apply(size);
1009 
1010         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1011             for (int i = 0; i < as.length; i++) {
1012                 long a = as[i];
1013                 long b = bs[i];
1014                 rs[i] = (long)(ROR_scalar(a, CONST_SHIFT));
1015             }
1016         }
1017 
1018         bh.consume(rs);
1019     }
1020 
1021     @Benchmark
1022     public void RORMaskedShiftConst(Blackhole bh) {
1023         long[] as = fa.apply(size);
1024         long[] bs = fb.apply(size);
1025         long[] rs = fr.apply(size);
1026         boolean[] ms = fm.apply(size);
1027 
1028         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1029             for (int i = 0; i < as.length; i++) {
1030                 long a = as[i];
1031                 long b = bs[i];
1032                 boolean m = ms[i % ms.length];
1033                 rs[i] = (m ? (long)(ROR_scalar(a, CONST_SHIFT)) : a);
1034             }
1035         }
1036 
1037         bh.consume(rs);
1038     }
1039 
1040     @Benchmark
1041     public void ROLShiftConst(Blackhole bh) {
1042         long[] as = fa.apply(size);
1043         long[] bs = fb.apply(size);
1044         long[] rs = fr.apply(size);
1045 
1046         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1047             for (int i = 0; i < as.length; i++) {
1048                 long a = as[i];
1049                 long b = bs[i];
1050                 rs[i] = (long)(ROL_scalar(a, CONST_SHIFT));
1051             }
1052         }
1053 
1054         bh.consume(rs);
1055     }
1056 
1057     @Benchmark
1058     public void ROLMaskedShiftConst(Blackhole bh) {
1059         long[] as = fa.apply(size);
1060         long[] bs = fb.apply(size);
1061         long[] rs = fr.apply(size);
1062         boolean[] ms = fm.apply(size);
1063 
1064         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1065             for (int i = 0; i < as.length; i++) {
1066                 long a = as[i];
1067                 long b = bs[i];
1068                 boolean m = ms[i % ms.length];
1069                 rs[i] = (m ? (long)(ROL_scalar(a, CONST_SHIFT)) : a);
1070             }
1071         }
1072 
1073         bh.consume(rs);
1074     }
1075 
1076     @Benchmark
1077     public void MIN(Blackhole bh) {
1078         long[] as = fa.apply(size);
1079         long[] bs = fb.apply(size);
1080         long[] rs = fr.apply(size);
1081 
1082         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1083             for (int i = 0; i < as.length; i++) {
1084                 long a = as[i];
1085                 long b = bs[i];
1086                 rs[i] = (long)(Math.min(a, b));
1087             }
1088         }
1089 
1090         bh.consume(rs);
1091     }
1092 
1093     @Benchmark
1094     public void MAX(Blackhole bh) {
1095         long[] as = fa.apply(size);
1096         long[] bs = fb.apply(size);
1097         long[] rs = fr.apply(size);
1098 
1099         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1100             for (int i = 0; i < as.length; i++) {
1101                 long a = as[i];
1102                 long b = bs[i];
1103                 rs[i] = (long)(Math.max(a, b));
1104             }
1105         }
1106 
1107         bh.consume(rs);
1108     }
1109 
1110     @Benchmark
1111     public void ANDLanes(Blackhole bh) {
1112         long[] as = fa.apply(size);
1113         long r = -1;
1114         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1115             r = -1;
1116             for (int i = 0; i < as.length; i++) {
1117                 r &= as[i];
1118             }
1119         }
1120         bh.consume(r);
1121     }
1122 
1123     @Benchmark
1124     public void ANDMaskedLanes(Blackhole bh) {
1125         long[] as = fa.apply(size);
1126         boolean[] ms = fm.apply(size);
1127         long r = -1;
1128         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1129             r = -1;
1130             for (int i = 0; i < as.length; i++) {
1131                 if (ms[i % ms.length])
1132                     r &= as[i];
1133             }
1134         }
1135         bh.consume(r);
1136     }
1137 
1138     @Benchmark
1139     public void ORLanes(Blackhole bh) {
1140         long[] as = fa.apply(size);
1141         long r = 0;
1142         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1143             r = 0;
1144             for (int i = 0; i < as.length; i++) {
1145                 r |= as[i];
1146             }
1147         }
1148         bh.consume(r);
1149     }
1150 
1151     @Benchmark
1152     public void ORMaskedLanes(Blackhole bh) {
1153         long[] as = fa.apply(size);
1154         boolean[] ms = fm.apply(size);
1155         long r = 0;
1156         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1157             r = 0;
1158             for (int i = 0; i < as.length; i++) {
1159                 if (ms[i % ms.length])
1160                     r |= as[i];
1161             }
1162         }
1163         bh.consume(r);
1164     }
1165 
1166     @Benchmark
1167     public void XORLanes(Blackhole bh) {
1168         long[] as = fa.apply(size);
1169         long r = 0;
1170         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1171             r = 0;
1172             for (int i = 0; i < as.length; i++) {
1173                 r ^= as[i];
1174             }
1175         }
1176         bh.consume(r);
1177     }
1178 
1179     @Benchmark
1180     public void XORMaskedLanes(Blackhole bh) {
1181         long[] as = fa.apply(size);
1182         boolean[] ms = fm.apply(size);
1183         long r = 0;
1184         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1185             r = 0;
1186             for (int i = 0; i < as.length; i++) {
1187                 if (ms[i % ms.length])
1188                     r ^= as[i];
1189             }
1190         }
1191         bh.consume(r);
1192     }
1193 
1194     @Benchmark
1195     public void ADDLanes(Blackhole bh) {
1196         long[] as = fa.apply(size);
1197         long r = 0;
1198         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1199             r = 0;
1200             for (int i = 0; i < as.length; i++) {
1201                 r += as[i];
1202             }
1203         }
1204         bh.consume(r);
1205     }
1206 
1207     @Benchmark
1208     public void ADDMaskedLanes(Blackhole bh) {
1209         long[] as = fa.apply(size);
1210         boolean[] ms = fm.apply(size);
1211         long r = 0;
1212         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1213             r = 0;
1214             for (int i = 0; i < as.length; i++) {
1215                 if (ms[i % ms.length])
1216                     r += as[i];
1217             }
1218         }
1219         bh.consume(r);
1220     }
1221 
1222     @Benchmark
1223     public void MULLanes(Blackhole bh) {
1224         long[] as = fa.apply(size);
1225         long r = 1;
1226         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1227             r = 1;
1228             for (int i = 0; i < as.length; i++) {
1229                 r *= as[i];
1230             }
1231         }
1232         bh.consume(r);
1233     }
1234 
1235     @Benchmark
1236     public void MULMaskedLanes(Blackhole bh) {
1237         long[] as = fa.apply(size);
1238         boolean[] ms = fm.apply(size);
1239         long r = 1;
1240         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1241             r = 1;
1242             for (int i = 0; i < as.length; i++) {
1243                 if (ms[i % ms.length])
1244                     r *= as[i];
1245             }
1246         }
1247         bh.consume(r);
1248     }
1249 
1250     @Benchmark
1251     public void anyTrue(Blackhole bh) {
1252         boolean[] ms = fm.apply(size);
1253         boolean r = false;
1254         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1255             r = false;
1256             for (int i = 0; i < ms.length; i++) {
1257                 r |= ms[i];
1258             }
1259         }
1260         bh.consume(r);
1261     }
1262 
1263     @Benchmark
1264     public void allTrue(Blackhole bh) {
1265         boolean[] ms = fm.apply(size);
1266         boolean r = true;
1267         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1268             r = true;
1269             for (int i = 0; i < ms.length; i++) {
1270                 r &= ms[i];
1271             }
1272         }
1273         bh.consume(r);
1274     }
1275 
1276     @Benchmark
1277     public void IS_DEFAULT(Blackhole bh) {
1278         long[] as = fa.apply(size);
1279         boolean r = true;
1280 
1281         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1282             for (int i = 0; i < as.length; i++) {
1283                 long a = as[i];
1284                 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation
1285             }
1286         }
1287 
1288         bh.consume(r);
1289     }
1290 
1291     @Benchmark
1292     public void IS_NEGATIVE(Blackhole bh) {
1293         long[] as = fa.apply(size);
1294         boolean r = true;
1295 
1296         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1297             for (int i = 0; i < as.length; i++) {
1298                 long a = as[i];
1299                 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation
1300             }
1301         }
1302 
1303         bh.consume(r);
1304     }
1305 
1306     @Benchmark
1307     public void LT(Blackhole bh) {
1308         long[] as = fa.apply(size);
1309         long[] bs = fb.apply(size);
1310         boolean r = true;
1311 
1312         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1313             for (int i = 0; i < as.length; i++) {
1314                 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1315             }
1316         }
1317 
1318         bh.consume(r);
1319     }
1320 
1321     @Benchmark
1322     public void GT(Blackhole bh) {
1323         long[] as = fa.apply(size);
1324         long[] bs = fb.apply(size);
1325         boolean r = true;
1326 
1327         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1328             for (int i = 0; i < as.length; i++) {
1329                 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1330             }
1331         }
1332 
1333         bh.consume(r);
1334     }
1335 
1336     @Benchmark
1337     public void EQ(Blackhole bh) {
1338         long[] as = fa.apply(size);
1339         long[] bs = fb.apply(size);
1340         boolean r = true;
1341 
1342         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1343             for (int i = 0; i < as.length; i++) {
1344                 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1345             }
1346         }
1347 
1348         bh.consume(r);
1349     }
1350 
1351     @Benchmark
1352     public void NE(Blackhole bh) {
1353         long[] as = fa.apply(size);
1354         long[] bs = fb.apply(size);
1355         boolean r = true;
1356 
1357         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1358             for (int i = 0; i < as.length; i++) {
1359                 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1360             }
1361         }
1362 
1363         bh.consume(r);
1364     }
1365 
1366     @Benchmark
1367     public void LE(Blackhole bh) {
1368         long[] as = fa.apply(size);
1369         long[] bs = fb.apply(size);
1370         boolean r = true;
1371 
1372         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1373             for (int i = 0; i < as.length; i++) {
1374                 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1375             }
1376         }
1377 
1378         bh.consume(r);
1379     }
1380 
1381     @Benchmark
1382     public void GE(Blackhole bh) {
1383         long[] as = fa.apply(size);
1384         long[] bs = fb.apply(size);
1385         boolean r = true;
1386 
1387         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1388             for (int i = 0; i < as.length; i++) {
1389                 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1390             }
1391         }
1392 
1393         bh.consume(r);
1394     }
1395 
1396     @Benchmark
1397     public void UNSIGNED_LT(Blackhole bh) {
1398         long[] as = fa.apply(size);
1399         long[] bs = fb.apply(size);
1400         boolean r = true;
1401 
1402         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1403             for (int i = 0; i < as.length; i++) {
1404                 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1405             }
1406         }
1407 
1408         bh.consume(r);
1409     }
1410 
1411     @Benchmark
1412     public void UNSIGNED_GT(Blackhole bh) {
1413         long[] as = fa.apply(size);
1414         long[] bs = fb.apply(size);
1415         boolean r = true;
1416 
1417         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1418             for (int i = 0; i < as.length; i++) {
1419                 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1420             }
1421         }
1422 
1423         bh.consume(r);
1424     }
1425 
1426     @Benchmark
1427     public void UNSIGNED_LE(Blackhole bh) {
1428         long[] as = fa.apply(size);
1429         long[] bs = fb.apply(size);
1430         boolean r = true;
1431 
1432         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1433             for (int i = 0; i < as.length; i++) {
1434                 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1435             }
1436         }
1437 
1438         bh.consume(r);
1439     }
1440 
1441     @Benchmark
1442     public void UNSIGNED_GE(Blackhole bh) {
1443         long[] as = fa.apply(size);
1444         long[] bs = fb.apply(size);
1445         boolean r = true;
1446 
1447         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1448             for (int i = 0; i < as.length; i++) {
1449                 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1450             }
1451         }
1452 
1453         bh.consume(r);
1454     }
1455 
1456     @Benchmark
1457     public void blend(Blackhole bh) {
1458         long[] as = fa.apply(size);
1459         long[] bs = fb.apply(size);
1460         long[] rs = fr.apply(size);
1461         boolean[] ms = fm.apply(size);
1462 
1463         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1464             for (int i = 0; i < as.length; i++) {
1465                 long a = as[i];
1466                 long b = bs[i];
1467                 boolean m = ms[i % ms.length];
1468                 rs[i] = (m ? b : a);
1469             }
1470         }
1471 
1472         bh.consume(rs);
1473     }
1474 
1475     void rearrangeShared(int window, Blackhole bh) {
1476         long[] as = fa.apply(size);
1477         int[] order = fs.apply(size);
1478         long[] rs = fr.apply(size);
1479 
1480         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1481             for (int i = 0; i < as.length; i += window) {
1482                 for (int j = 0; j < window; j++) {
1483                     long a = as[i+j];
1484                     int pos = order[j];
1485                     rs[i + pos] = a;
1486                 }
1487             }
1488         }
1489 
1490         bh.consume(rs);
1491     }
1492 
1493     @Benchmark
1494     public void rearrange064(Blackhole bh) {
1495         int window = 64 / Long.SIZE;
1496         rearrangeShared(window, bh);
1497     }
1498 
1499     @Benchmark
1500     public void rearrange128(Blackhole bh) {
1501         int window = 128 / Long.SIZE;
1502         rearrangeShared(window, bh);
1503     }
1504 
1505     @Benchmark
1506     public void rearrange256(Blackhole bh) {
1507         int window = 256 / Long.SIZE;
1508         rearrangeShared(window, bh);
1509     }
1510 
1511     @Benchmark
1512     public void rearrange512(Blackhole bh) {
1513         int window = 512 / Long.SIZE;
1514         rearrangeShared(window, bh);
1515     }
1516 
1517     @Benchmark
1518     public void compressScalar(Blackhole bh) {
1519         long[] as = fa.apply(size);
1520         long[] rs = new long[size];
1521         boolean[] im = fmt.apply(size);
1522 
1523         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1524             for (int i = 0, j = 0; i < as.length; i++) {
1525                 if (im[i]) {
1526                     rs[j++] = as[i];
1527                 }
1528             }
1529         }
1530 
1531         bh.consume(rs);
1532     }
1533 
1534     @Benchmark
1535     public void expandScalar(Blackhole bh) {
1536         long[] as = fa.apply(size);
1537         long[] rs = new long[size];
1538         boolean[] im = fmt.apply(size);
1539 
1540         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1541             for (int i = 0, j = 0; i < as.length; i++) {
1542                 if (im[i]) {
1543                     rs[i++] = as[j++];
1544                 }
1545             }
1546         }
1547 
1548         bh.consume(rs);
1549     }
1550 
1551     @Benchmark
1552     public void maskCompressScalar(Blackhole bh) {
1553         boolean[] im = fmt.apply(size);
1554         boolean[] rm = new boolean[size];
1555 
1556         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1557             for (int i = 0, j = 0; i < im.length; i++) {
1558                 if (im[i]) {
1559                     rm[j++] = im[i];
1560                 }
1561             }
1562         }
1563 
1564         bh.consume(rm);
1565     }
1566 
1567     void broadcastShared(int window, Blackhole bh) {
1568         long[] as = fa.apply(size);
1569         long[] rs = fr.apply(size);
1570 
1571         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1572             for (int i = 0; i < as.length; i += window) {
1573                 int idx = i;
1574                 for (int j = 0; j < window; j++) {
1575                     rs[j] = as[idx];
1576                 }
1577             }
1578         }
1579 
1580         bh.consume(rs);
1581     }
1582 
1583     @Benchmark
1584     public void broadcast064(Blackhole bh) {
1585         int window = 64 / Long.SIZE;
1586         broadcastShared(window, bh);
1587     }
1588 
1589     @Benchmark
1590     public void broadcast128(Blackhole bh) {
1591         int window = 128 / Long.SIZE;
1592         broadcastShared(window, bh);
1593     }
1594 
1595     @Benchmark
1596     public void broadcast256(Blackhole bh) {
1597         int window = 256 / Long.SIZE;
1598         broadcastShared(window, bh);
1599     }
1600 
1601     @Benchmark
1602     public void broadcast512(Blackhole bh) {
1603         int window = 512 / Long.SIZE;
1604         broadcastShared(window, bh);
1605     }
1606 
1607     @Benchmark
1608     public void zero(Blackhole bh) {
1609         long[] as = fa.apply(size);
1610 
1611         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1612             for (int i = 0; i < as.length; i++) {
1613                 as[i] = (long)0;
1614             }
1615         }
1616 
1617         bh.consume(as);
1618     }
1619 
1620     @Benchmark
1621     public void BITWISE_BLEND(Blackhole bh) {
1622         long[] as = fa.apply(size);
1623         long[] bs = fb.apply(size);
1624         long[] cs = fc.apply(size);
1625         long[] rs = fr.apply(size);
1626 
1627         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1628             for (int i = 0; i < as.length; i++) {
1629                 long a = as[i];
1630                 long b = bs[i];
1631                 long c = cs[i];
1632                 rs[i] = (long)((a&~(c))|(b&c));
1633             }
1634         }
1635 
1636         bh.consume(rs);
1637     }
1638 
1639     @Benchmark
1640     public void BITWISE_BLENDMasked(Blackhole bh) {
1641         long[] as = fa.apply(size);
1642         long[] bs = fb.apply(size);
1643         long[] cs = fc.apply(size);
1644         long[] rs = fr.apply(size);
1645         boolean[] ms = fm.apply(size);
1646 
1647         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1648             for (int i = 0; i < as.length; i++) {
1649                 long a = as[i];
1650                 long b = bs[i];
1651                 long c = cs[i];
1652                 if (ms[i % ms.length]) {
1653                     rs[i] = (long)((a&~(c))|(b&c));
1654                 } else {
1655                     rs[i] = a;
1656                 }
1657             }
1658         }
1659         bh.consume(rs);
1660     }
1661     @Benchmark
1662     public void NEG(Blackhole bh) {
1663         long[] as = fa.apply(size);
1664         long[] rs = fr.apply(size);
1665 
1666         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1667             for (int i = 0; i < as.length; i++) {
1668                 long a = as[i];
1669                 rs[i] = (long)(-((long)a));
1670             }
1671         }
1672 
1673         bh.consume(rs);
1674     }
1675 
1676     @Benchmark
1677     public void NEGMasked(Blackhole bh) {
1678         long[] as = fa.apply(size);
1679         long[] rs = fr.apply(size);
1680         boolean[] ms = fm.apply(size);
1681 
1682         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1683             for (int i = 0; i < as.length; i++) {
1684                 long a = as[i];
1685                 boolean m = ms[i % ms.length];
1686                 rs[i] = (m ? (long)(-((long)a)) : a);
1687             }
1688         }
1689 
1690         bh.consume(rs);
1691     }
1692     @Benchmark
1693     public void ABS(Blackhole bh) {
1694         long[] as = fa.apply(size);
1695         long[] rs = fr.apply(size);
1696 
1697         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1698             for (int i = 0; i < as.length; i++) {
1699                 long a = as[i];
1700                 rs[i] = (long)(Math.abs((long)a));
1701             }
1702         }
1703 
1704         bh.consume(rs);
1705     }
1706 
1707     @Benchmark
1708     public void ABSMasked(Blackhole bh) {
1709         long[] as = fa.apply(size);
1710         long[] rs = fr.apply(size);
1711         boolean[] ms = fm.apply(size);
1712 
1713         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1714             for (int i = 0; i < as.length; i++) {
1715                 long a = as[i];
1716                 boolean m = ms[i % ms.length];
1717                 rs[i] = (m ? (long)(Math.abs((long)a)) : a);
1718             }
1719         }
1720 
1721         bh.consume(rs);
1722     }
1723     @Benchmark
1724     public void NOT(Blackhole bh) {
1725         long[] as = fa.apply(size);
1726         long[] rs = fr.apply(size);
1727 
1728         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1729             for (int i = 0; i < as.length; i++) {
1730                 long a = as[i];
1731                 rs[i] = (long)(~((long)a));
1732             }
1733         }
1734 
1735         bh.consume(rs);
1736     }
1737 
1738     @Benchmark
1739     public void NOTMasked(Blackhole bh) {
1740         long[] as = fa.apply(size);
1741         long[] rs = fr.apply(size);
1742         boolean[] ms = fm.apply(size);
1743 
1744         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1745             for (int i = 0; i < as.length; i++) {
1746                 long a = as[i];
1747                 boolean m = ms[i % ms.length];
1748                 rs[i] = (m ? (long)(~((long)a)) : a);
1749             }
1750         }
1751 
1752         bh.consume(rs);
1753     }
1754     @Benchmark
1755     public void ZOMO(Blackhole bh) {
1756         long[] as = fa.apply(size);
1757         long[] rs = fr.apply(size);
1758 
1759         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1760             for (int i = 0; i < as.length; i++) {
1761                 long a = as[i];
1762                 rs[i] = (long)((a==0?0:-1));
1763             }
1764         }
1765 
1766         bh.consume(rs);
1767     }
1768 
1769     @Benchmark
1770     public void ZOMOMasked(Blackhole bh) {
1771         long[] as = fa.apply(size);
1772         long[] rs = fr.apply(size);
1773         boolean[] ms = fm.apply(size);
1774 
1775         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1776             for (int i = 0; i < as.length; i++) {
1777                 long a = as[i];
1778                 boolean m = ms[i % ms.length];
1779                 rs[i] = (m ? (long)((a==0?0:-1)) : a);
1780             }
1781         }
1782 
1783         bh.consume(rs);
1784     }
1785     @Benchmark
1786     public void BIT_COUNT(Blackhole bh) {
1787         long[] as = fa.apply(size);
1788         long[] rs = fr.apply(size);
1789 
1790         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1791             for (int i = 0; i < as.length; i++) {
1792                 long a = as[i];
1793                 rs[i] = (long)(Long.bitCount(a));
1794             }
1795         }
1796 
1797         bh.consume(rs);
1798     }
1799 
1800     @Benchmark
1801     public void BIT_COUNTMasked(Blackhole bh) {
1802         long[] as = fa.apply(size);
1803         long[] rs = fr.apply(size);
1804         boolean[] ms = fm.apply(size);
1805 
1806         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1807             for (int i = 0; i < as.length; i++) {
1808                 long a = as[i];
1809                 boolean m = ms[i % ms.length];
1810                 rs[i] = (m ? (long)(Long.bitCount(a)) : a);
1811             }
1812         }
1813 
1814         bh.consume(rs);
1815     }
1816     @Benchmark
1817     public void TRAILING_ZEROS_COUNT(Blackhole bh) {
1818         long[] as = fa.apply(size);
1819         long[] rs = fr.apply(size);
1820 
1821         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1822             for (int i = 0; i < as.length; i++) {
1823                 long a = as[i];
1824                 rs[i] = (long)(TRAILING_ZEROS_COUNT_scalar(a));
1825             }
1826         }
1827 
1828         bh.consume(rs);
1829     }
1830 
1831     @Benchmark
1832     public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) {
1833         long[] as = fa.apply(size);
1834         long[] rs = fr.apply(size);
1835         boolean[] ms = fm.apply(size);
1836 
1837         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1838             for (int i = 0; i < as.length; i++) {
1839                 long a = as[i];
1840                 boolean m = ms[i % ms.length];
1841                 rs[i] = (m ? (long)(TRAILING_ZEROS_COUNT_scalar(a)) : a);
1842             }
1843         }
1844 
1845         bh.consume(rs);
1846     }
1847     @Benchmark
1848     public void LEADING_ZEROS_COUNT(Blackhole bh) {
1849         long[] as = fa.apply(size);
1850         long[] rs = fr.apply(size);
1851 
1852         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1853             for (int i = 0; i < as.length; i++) {
1854                 long a = as[i];
1855                 rs[i] = (long)(LEADING_ZEROS_COUNT_scalar(a));
1856             }
1857         }
1858 
1859         bh.consume(rs);
1860     }
1861 
1862     @Benchmark
1863     public void LEADING_ZEROS_COUNTMasked(Blackhole bh) {
1864         long[] as = fa.apply(size);
1865         long[] rs = fr.apply(size);
1866         boolean[] ms = fm.apply(size);
1867 
1868         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1869             for (int i = 0; i < as.length; i++) {
1870                 long a = as[i];
1871                 boolean m = ms[i % ms.length];
1872                 rs[i] = (m ? (long)(LEADING_ZEROS_COUNT_scalar(a)) : a);
1873             }
1874         }
1875 
1876         bh.consume(rs);
1877     }
1878     @Benchmark
1879     public void REVERSE(Blackhole bh) {
1880         long[] as = fa.apply(size);
1881         long[] rs = fr.apply(size);
1882 
1883         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1884             for (int i = 0; i < as.length; i++) {
1885                 long a = as[i];
1886                 rs[i] = (long)(REVERSE_scalar(a));
1887             }
1888         }
1889 
1890         bh.consume(rs);
1891     }
1892 
1893     @Benchmark
1894     public void REVERSEMasked(Blackhole bh) {
1895         long[] as = fa.apply(size);
1896         long[] rs = fr.apply(size);
1897         boolean[] ms = fm.apply(size);
1898 
1899         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1900             for (int i = 0; i < as.length; i++) {
1901                 long a = as[i];
1902                 boolean m = ms[i % ms.length];
1903                 rs[i] = (m ? (long)(REVERSE_scalar(a)) : a);
1904             }
1905         }
1906 
1907         bh.consume(rs);
1908     }
1909     @Benchmark
1910     public void REVERSE_BYTES(Blackhole bh) {
1911         long[] as = fa.apply(size);
1912         long[] rs = fr.apply(size);
1913 
1914         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1915             for (int i = 0; i < as.length; i++) {
1916                 long a = as[i];
1917                 rs[i] = (long)(Long.reverseBytes(a));
1918             }
1919         }
1920 
1921         bh.consume(rs);
1922     }
1923 
1924     @Benchmark
1925     public void REVERSE_BYTESMasked(Blackhole bh) {
1926         long[] as = fa.apply(size);
1927         long[] rs = fr.apply(size);
1928         boolean[] ms = fm.apply(size);
1929 
1930         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1931             for (int i = 0; i < as.length; i++) {
1932                 long a = as[i];
1933                 boolean m = ms[i % ms.length];
1934                 rs[i] = (m ? (long)(Long.reverseBytes(a)) : a);
1935             }
1936         }
1937 
1938         bh.consume(rs);
1939     }
1940 }