1 /*
   2  * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import java.util.concurrent.TimeUnit;
  29 import java.util.function.IntFunction;
  30 
  31 import org.openjdk.jmh.annotations.*;
  32 import org.openjdk.jmh.infra.Blackhole;
  33 
  34 @BenchmarkMode(Mode.Throughput)
  35 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  36 @State(Scope.Benchmark)
  37 @Warmup(iterations = 3, time = 1)
  38 @Measurement(iterations = 5, time = 1)
  39 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  40 public class LongScalar extends AbstractVectorBenchmark {
  41     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  42 
  43     @Param("1024")
  44     int size;
  45 
  46     long[] fill(IntFunction<Long> f) {
  47         long[] array = new long[size];
  48         for (int i = 0; i < array.length; i++) {
  49             array[i] = f.apply(i);
  50         }
  51         return array;
  52     }
  53 
  54     static long bits(long e) {
  55         return e;
  56     }
  57 
  58     long[] as, bs, cs, rs;
  59     boolean[] ms, mt, rms;
  60     int[] ss;
  61 
  62     @Setup
  63     public void init() {
  64         as = fill(i -> (long)(2*i));
  65         bs = fill(i -> (long)(i+1));
  66         cs = fill(i -> (long)(i+5));
  67         rs = fill(i -> (long)0);
  68         ms = fillMask(size, i -> (i % 2) == 0);
  69         mt = fillMask(size, i -> true);
  70         rms = fillMask(size, i -> false);
  71 
  72         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  73     }
  74 
  75     final IntFunction<long[]> fa = vl -> as;
  76     final IntFunction<long[]> fb = vl -> bs;
  77     final IntFunction<long[]> fc = vl -> cs;
  78     final IntFunction<long[]> fr = vl -> rs;
  79     final IntFunction<boolean[]> fm = vl -> ms;
  80     final IntFunction<boolean[]> fmt = vl -> mt;
  81     final IntFunction<boolean[]> fmr = vl -> rms;
  82     final IntFunction<int[]> fs = vl -> ss;
  83 
  84     static boolean eq(long a, long b) {
  85         return a == b;
  86     }
  87 
  88     static boolean neq(long a, long b) {
  89         return a != b;
  90     }
  91 
  92     static boolean lt(long a, long b) {
  93         return a < b;
  94     }
  95 
  96     static boolean le(long a, long b) {
  97         return a <= b;
  98     }
  99 
 100     static boolean gt(long a, long b) {
 101         return a > b;
 102     }
 103 
 104     static boolean ge(long a, long b) {
 105         return a >= b;
 106     }
 107 
 108     static boolean ult(long a, long b) {
 109         return Long.compareUnsigned(a, b) < 0;
 110     }
 111 
 112     static boolean ule(long a, long b) {
 113         return Long.compareUnsigned(a, b) <= 0;
 114     }
 115 
 116     static boolean ugt(long a, long b) {
 117         return Long.compareUnsigned(a, b) > 0;
 118     }
 119 
 120     static boolean uge(long a, long b) {
 121         return Long.compareUnsigned(a, b) >= 0;
 122     }
 123 
 124     @Benchmark
 125     public void ADD(Blackhole bh) {
 126         long[] as = fa.apply(size);
 127         long[] bs = fb.apply(size);
 128         long[] rs = fr.apply(size);
 129 
 130         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 131             for (int i = 0; i < as.length; i++) {
 132                 long a = as[i];
 133                 long b = bs[i];
 134                 rs[i] = (long)(a + b);
 135             }
 136         }
 137 
 138         bh.consume(rs);
 139     }
 140 
 141     @Benchmark
 142     public void ADDMasked(Blackhole bh) {
 143         long[] as = fa.apply(size);
 144         long[] bs = fb.apply(size);
 145         long[] rs = fr.apply(size);
 146         boolean[] ms = fm.apply(size);
 147 
 148         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 149             for (int i = 0; i < as.length; i++) {
 150                 long a = as[i];
 151                 long b = bs[i];
 152                 if (ms[i % ms.length]) {
 153                     rs[i] = (long)(a + b);
 154                 } else {
 155                     rs[i] = a;
 156                 }
 157             }
 158         }
 159         bh.consume(rs);
 160     }
 161 
 162     @Benchmark
 163     public void SUB(Blackhole bh) {
 164         long[] as = fa.apply(size);
 165         long[] bs = fb.apply(size);
 166         long[] rs = fr.apply(size);
 167 
 168         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 169             for (int i = 0; i < as.length; i++) {
 170                 long a = as[i];
 171                 long b = bs[i];
 172                 rs[i] = (long)(a - b);
 173             }
 174         }
 175 
 176         bh.consume(rs);
 177     }
 178 
 179     @Benchmark
 180     public void SUBMasked(Blackhole bh) {
 181         long[] as = fa.apply(size);
 182         long[] bs = fb.apply(size);
 183         long[] rs = fr.apply(size);
 184         boolean[] ms = fm.apply(size);
 185 
 186         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 187             for (int i = 0; i < as.length; i++) {
 188                 long a = as[i];
 189                 long b = bs[i];
 190                 if (ms[i % ms.length]) {
 191                     rs[i] = (long)(a - b);
 192                 } else {
 193                     rs[i] = a;
 194                 }
 195             }
 196         }
 197         bh.consume(rs);
 198     }
 199 
 200     @Benchmark
 201     public void MUL(Blackhole bh) {
 202         long[] as = fa.apply(size);
 203         long[] bs = fb.apply(size);
 204         long[] rs = fr.apply(size);
 205 
 206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 207             for (int i = 0; i < as.length; i++) {
 208                 long a = as[i];
 209                 long b = bs[i];
 210                 rs[i] = (long)(a * b);
 211             }
 212         }
 213 
 214         bh.consume(rs);
 215     }
 216 
 217     @Benchmark
 218     public void MULMasked(Blackhole bh) {
 219         long[] as = fa.apply(size);
 220         long[] bs = fb.apply(size);
 221         long[] rs = fr.apply(size);
 222         boolean[] ms = fm.apply(size);
 223 
 224         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 225             for (int i = 0; i < as.length; i++) {
 226                 long a = as[i];
 227                 long b = bs[i];
 228                 if (ms[i % ms.length]) {
 229                     rs[i] = (long)(a * b);
 230                 } else {
 231                     rs[i] = a;
 232                 }
 233             }
 234         }
 235         bh.consume(rs);
 236     }
 237 
 238 
 239 
 240     @Benchmark
 241     public void FIRST_NONZERO(Blackhole bh) {
 242         long[] as = fa.apply(size);
 243         long[] bs = fb.apply(size);
 244         long[] rs = fr.apply(size);
 245 
 246         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 247             for (int i = 0; i < as.length; i++) {
 248                 long a = as[i];
 249                 long b = bs[i];
 250                 rs[i] = (long)((a)!=0?a:b);
 251             }
 252         }
 253 
 254         bh.consume(rs);
 255     }
 256 
 257     @Benchmark
 258     public void FIRST_NONZEROMasked(Blackhole bh) {
 259         long[] as = fa.apply(size);
 260         long[] bs = fb.apply(size);
 261         long[] rs = fr.apply(size);
 262         boolean[] ms = fm.apply(size);
 263 
 264         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 265             for (int i = 0; i < as.length; i++) {
 266                 long a = as[i];
 267                 long b = bs[i];
 268                 if (ms[i % ms.length]) {
 269                     rs[i] = (long)((a)!=0?a:b);
 270                 } else {
 271                     rs[i] = a;
 272                 }
 273             }
 274         }
 275         bh.consume(rs);
 276     }
 277 
 278 
 279     @Benchmark
 280     public void AND(Blackhole bh) {
 281         long[] as = fa.apply(size);
 282         long[] bs = fb.apply(size);
 283         long[] rs = fr.apply(size);
 284 
 285         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 286             for (int i = 0; i < as.length; i++) {
 287                 long a = as[i];
 288                 long b = bs[i];
 289                 rs[i] = (long)(a & b);
 290             }
 291         }
 292 
 293         bh.consume(rs);
 294     }
 295 
 296 
 297 
 298     @Benchmark
 299     public void ANDMasked(Blackhole bh) {
 300         long[] as = fa.apply(size);
 301         long[] bs = fb.apply(size);
 302         long[] rs = fr.apply(size);
 303         boolean[] ms = fm.apply(size);
 304 
 305         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 306             for (int i = 0; i < as.length; i++) {
 307                 long a = as[i];
 308                 long b = bs[i];
 309                 if (ms[i % ms.length]) {
 310                     rs[i] = (long)(a & b);
 311                 } else {
 312                     rs[i] = a;
 313                 }
 314             }
 315         }
 316         bh.consume(rs);
 317     }
 318 
 319 
 320 
 321     @Benchmark
 322     public void AND_NOT(Blackhole bh) {
 323         long[] as = fa.apply(size);
 324         long[] bs = fb.apply(size);
 325         long[] rs = fr.apply(size);
 326 
 327         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 328             for (int i = 0; i < as.length; i++) {
 329                 long a = as[i];
 330                 long b = bs[i];
 331                 rs[i] = (long)(a & ~b);
 332             }
 333         }
 334 
 335         bh.consume(rs);
 336     }
 337 
 338 
 339 
 340     @Benchmark
 341     public void AND_NOTMasked(Blackhole bh) {
 342         long[] as = fa.apply(size);
 343         long[] bs = fb.apply(size);
 344         long[] rs = fr.apply(size);
 345         boolean[] ms = fm.apply(size);
 346 
 347         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 348             for (int i = 0; i < as.length; i++) {
 349                 long a = as[i];
 350                 long b = bs[i];
 351                 if (ms[i % ms.length]) {
 352                     rs[i] = (long)(a & ~b);
 353                 } else {
 354                     rs[i] = a;
 355                 }
 356             }
 357         }
 358         bh.consume(rs);
 359     }
 360 
 361 
 362 
 363     @Benchmark
 364     public void OR(Blackhole bh) {
 365         long[] as = fa.apply(size);
 366         long[] bs = fb.apply(size);
 367         long[] rs = fr.apply(size);
 368 
 369         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 370             for (int i = 0; i < as.length; i++) {
 371                 long a = as[i];
 372                 long b = bs[i];
 373                 rs[i] = (long)(a | b);
 374             }
 375         }
 376 
 377         bh.consume(rs);
 378     }
 379 
 380 
 381 
 382     @Benchmark
 383     public void ORMasked(Blackhole bh) {
 384         long[] as = fa.apply(size);
 385         long[] bs = fb.apply(size);
 386         long[] rs = fr.apply(size);
 387         boolean[] ms = fm.apply(size);
 388 
 389         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 390             for (int i = 0; i < as.length; i++) {
 391                 long a = as[i];
 392                 long b = bs[i];
 393                 if (ms[i % ms.length]) {
 394                     rs[i] = (long)(a | b);
 395                 } else {
 396                     rs[i] = a;
 397                 }
 398             }
 399         }
 400         bh.consume(rs);
 401     }
 402 
 403 
 404 
 405     @Benchmark
 406     public void XOR(Blackhole bh) {
 407         long[] as = fa.apply(size);
 408         long[] bs = fb.apply(size);
 409         long[] rs = fr.apply(size);
 410 
 411         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 412             for (int i = 0; i < as.length; i++) {
 413                 long a = as[i];
 414                 long b = bs[i];
 415                 rs[i] = (long)(a ^ b);
 416             }
 417         }
 418 
 419         bh.consume(rs);
 420     }
 421 
 422 
 423 
 424     @Benchmark
 425     public void XORMasked(Blackhole bh) {
 426         long[] as = fa.apply(size);
 427         long[] bs = fb.apply(size);
 428         long[] rs = fr.apply(size);
 429         boolean[] ms = fm.apply(size);
 430 
 431         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 432             for (int i = 0; i < as.length; i++) {
 433                 long a = as[i];
 434                 long b = bs[i];
 435                 if (ms[i % ms.length]) {
 436                     rs[i] = (long)(a ^ b);
 437                 } else {
 438                     rs[i] = a;
 439                 }
 440             }
 441         }
 442         bh.consume(rs);
 443     }
 444 
 445 
 446 
 447     @Benchmark
 448     public void LSHL(Blackhole bh) {
 449         long[] as = fa.apply(size);
 450         long[] bs = fb.apply(size);
 451         long[] rs = fr.apply(size);
 452 
 453         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 454             for (int i = 0; i < as.length; i++) {
 455                 long a = as[i];
 456                 long b = bs[i];
 457                 rs[i] = (long)((a << b));
 458             }
 459         }
 460 
 461         bh.consume(rs);
 462     }
 463 
 464 
 465 
 466     @Benchmark
 467     public void LSHLMasked(Blackhole bh) {
 468         long[] as = fa.apply(size);
 469         long[] bs = fb.apply(size);
 470         long[] rs = fr.apply(size);
 471         boolean[] ms = fm.apply(size);
 472 
 473         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 474             for (int i = 0; i < as.length; i++) {
 475                 long a = as[i];
 476                 long b = bs[i];
 477                 if (ms[i % ms.length]) {
 478                     rs[i] = (long)((a << b));
 479                 } else {
 480                     rs[i] = a;
 481                 }
 482             }
 483         }
 484         bh.consume(rs);
 485     }
 486 
 487 
 488 
 489 
 490 
 491 
 492 
 493     @Benchmark
 494     public void ASHR(Blackhole bh) {
 495         long[] as = fa.apply(size);
 496         long[] bs = fb.apply(size);
 497         long[] rs = fr.apply(size);
 498 
 499         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 500             for (int i = 0; i < as.length; i++) {
 501                 long a = as[i];
 502                 long b = bs[i];
 503                 rs[i] = (long)((a >> b));
 504             }
 505         }
 506 
 507         bh.consume(rs);
 508     }
 509 
 510 
 511 
 512     @Benchmark
 513     public void ASHRMasked(Blackhole bh) {
 514         long[] as = fa.apply(size);
 515         long[] bs = fb.apply(size);
 516         long[] rs = fr.apply(size);
 517         boolean[] ms = fm.apply(size);
 518 
 519         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 520             for (int i = 0; i < as.length; i++) {
 521                 long a = as[i];
 522                 long b = bs[i];
 523                 if (ms[i % ms.length]) {
 524                     rs[i] = (long)((a >> b));
 525                 } else {
 526                     rs[i] = a;
 527                 }
 528             }
 529         }
 530         bh.consume(rs);
 531     }
 532 
 533 
 534 
 535 
 536 
 537 
 538 
 539     @Benchmark
 540     public void LSHR(Blackhole bh) {
 541         long[] as = fa.apply(size);
 542         long[] bs = fb.apply(size);
 543         long[] rs = fr.apply(size);
 544 
 545         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 546             for (int i = 0; i < as.length; i++) {
 547                 long a = as[i];
 548                 long b = bs[i];
 549                 rs[i] = (long)((a >>> b));
 550             }
 551         }
 552 
 553         bh.consume(rs);
 554     }
 555 
 556 
 557 
 558     @Benchmark
 559     public void LSHRMasked(Blackhole bh) {
 560         long[] as = fa.apply(size);
 561         long[] bs = fb.apply(size);
 562         long[] rs = fr.apply(size);
 563         boolean[] ms = fm.apply(size);
 564 
 565         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 566             for (int i = 0; i < as.length; i++) {
 567                 long a = as[i];
 568                 long b = bs[i];
 569                 if (ms[i % ms.length]) {
 570                     rs[i] = (long)((a >>> b));
 571                 } else {
 572                     rs[i] = a;
 573                 }
 574             }
 575         }
 576         bh.consume(rs);
 577     }
 578 
 579 
 580 
 581 
 582 
 583 
 584 
 585     @Benchmark
 586     public void LSHLShift(Blackhole bh) {
 587         long[] as = fa.apply(size);
 588         long[] bs = fb.apply(size);
 589         long[] rs = fr.apply(size);
 590 
 591         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 592             for (int i = 0; i < as.length; i++) {
 593                 long a = as[i];
 594                 long b = bs[i];
 595                 rs[i] = (long)((a << b));
 596             }
 597         }
 598 
 599         bh.consume(rs);
 600     }
 601 
 602 
 603 
 604     @Benchmark
 605     public void LSHLMaskedShift(Blackhole bh) {
 606         long[] as = fa.apply(size);
 607         long[] bs = fb.apply(size);
 608         long[] rs = fr.apply(size);
 609         boolean[] ms = fm.apply(size);
 610 
 611         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 612             for (int i = 0; i < as.length; i++) {
 613                 long a = as[i];
 614                 long b = bs[i];
 615                 boolean m = ms[i % ms.length];
 616                 rs[i] = (m ? (long)((a << b)) : a);
 617             }
 618         }
 619 
 620         bh.consume(rs);
 621     }
 622 
 623 
 624 
 625 
 626 
 627 
 628 
 629     @Benchmark
 630     public void LSHRShift(Blackhole bh) {
 631         long[] as = fa.apply(size);
 632         long[] bs = fb.apply(size);
 633         long[] rs = fr.apply(size);
 634 
 635         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 636             for (int i = 0; i < as.length; i++) {
 637                 long a = as[i];
 638                 long b = bs[i];
 639                 rs[i] = (long)((a >>> b));
 640             }
 641         }
 642 
 643         bh.consume(rs);
 644     }
 645 
 646 
 647 
 648     @Benchmark
 649     public void LSHRMaskedShift(Blackhole bh) {
 650         long[] as = fa.apply(size);
 651         long[] bs = fb.apply(size);
 652         long[] rs = fr.apply(size);
 653         boolean[] ms = fm.apply(size);
 654 
 655         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 656             for (int i = 0; i < as.length; i++) {
 657                 long a = as[i];
 658                 long b = bs[i];
 659                 boolean m = ms[i % ms.length];
 660                 rs[i] = (m ? (long)((a >>> b)) : a);
 661             }
 662         }
 663 
 664         bh.consume(rs);
 665     }
 666 
 667 
 668 
 669 
 670 
 671 
 672 
 673     @Benchmark
 674     public void ASHRShift(Blackhole bh) {
 675         long[] as = fa.apply(size);
 676         long[] bs = fb.apply(size);
 677         long[] rs = fr.apply(size);
 678 
 679         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 680             for (int i = 0; i < as.length; i++) {
 681                 long a = as[i];
 682                 long b = bs[i];
 683                 rs[i] = (long)((a >> b));
 684             }
 685         }
 686 
 687         bh.consume(rs);
 688     }
 689 
 690 
 691 
 692     @Benchmark
 693     public void ASHRMaskedShift(Blackhole bh) {
 694         long[] as = fa.apply(size);
 695         long[] bs = fb.apply(size);
 696         long[] rs = fr.apply(size);
 697         boolean[] ms = fm.apply(size);
 698 
 699         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 700             for (int i = 0; i < as.length; i++) {
 701                 long a = as[i];
 702                 long b = bs[i];
 703                 boolean m = ms[i % ms.length];
 704                 rs[i] = (m ? (long)((a >> b)) : a);
 705             }
 706         }
 707 
 708         bh.consume(rs);
 709     }
 710 
 711 
 712 
 713 
 714 
 715 
 716     @Benchmark
 717     public void MIN(Blackhole bh) {
 718         long[] as = fa.apply(size);
 719         long[] bs = fb.apply(size);
 720         long[] rs = fr.apply(size);
 721 
 722         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 723             for (int i = 0; i < as.length; i++) {
 724                 long a = as[i];
 725                 long b = bs[i];
 726                 rs[i] = (long)(Math.min(a, b));
 727             }
 728         }
 729 
 730         bh.consume(rs);
 731     }
 732 
 733     @Benchmark
 734     public void MAX(Blackhole bh) {
 735         long[] as = fa.apply(size);
 736         long[] bs = fb.apply(size);
 737         long[] rs = fr.apply(size);
 738 
 739         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 740             for (int i = 0; i < as.length; i++) {
 741                 long a = as[i];
 742                 long b = bs[i];
 743                 rs[i] = (long)(Math.max(a, b));
 744             }
 745         }
 746 
 747         bh.consume(rs);
 748     }
 749 
 750 
 751     @Benchmark
 752     public void ANDLanes(Blackhole bh) {
 753         long[] as = fa.apply(size);
 754         long r = -1;
 755         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 756             r = -1;
 757             for (int i = 0; i < as.length; i++) {
 758                 r &= as[i];
 759             }
 760         }
 761         bh.consume(r);
 762     }
 763 
 764 
 765 
 766     @Benchmark
 767     public void ANDMaskedLanes(Blackhole bh) {
 768         long[] as = fa.apply(size);
 769         boolean[] ms = fm.apply(size);
 770         long r = -1;
 771         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 772             r = -1;
 773             for (int i = 0; i < as.length; i++) {
 774                 if (ms[i % ms.length])
 775                     r &= as[i];
 776             }
 777         }
 778         bh.consume(r);
 779     }
 780 
 781 
 782 
 783     @Benchmark
 784     public void ORLanes(Blackhole bh) {
 785         long[] as = fa.apply(size);
 786         long r = 0;
 787         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 788             r = 0;
 789             for (int i = 0; i < as.length; i++) {
 790                 r |= as[i];
 791             }
 792         }
 793         bh.consume(r);
 794     }
 795 
 796 
 797 
 798     @Benchmark
 799     public void ORMaskedLanes(Blackhole bh) {
 800         long[] as = fa.apply(size);
 801         boolean[] ms = fm.apply(size);
 802         long r = 0;
 803         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 804             r = 0;
 805             for (int i = 0; i < as.length; i++) {
 806                 if (ms[i % ms.length])
 807                     r |= as[i];
 808             }
 809         }
 810         bh.consume(r);
 811     }
 812 
 813 
 814 
 815     @Benchmark
 816     public void XORLanes(Blackhole bh) {
 817         long[] as = fa.apply(size);
 818         long r = 0;
 819         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 820             r = 0;
 821             for (int i = 0; i < as.length; i++) {
 822                 r ^= as[i];
 823             }
 824         }
 825         bh.consume(r);
 826     }
 827 
 828 
 829 
 830     @Benchmark
 831     public void XORMaskedLanes(Blackhole bh) {
 832         long[] as = fa.apply(size);
 833         boolean[] ms = fm.apply(size);
 834         long r = 0;
 835         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 836             r = 0;
 837             for (int i = 0; i < as.length; i++) {
 838                 if (ms[i % ms.length])
 839                     r ^= as[i];
 840             }
 841         }
 842         bh.consume(r);
 843     }
 844 
 845 
 846     @Benchmark
 847     public void ADDLanes(Blackhole bh) {
 848         long[] as = fa.apply(size);
 849         long r = 0;
 850         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 851             r = 0;
 852             for (int i = 0; i < as.length; i++) {
 853                 r += as[i];
 854             }
 855         }
 856         bh.consume(r);
 857     }
 858 
 859     @Benchmark
 860     public void ADDMaskedLanes(Blackhole bh) {
 861         long[] as = fa.apply(size);
 862         boolean[] ms = fm.apply(size);
 863         long r = 0;
 864         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 865             r = 0;
 866             for (int i = 0; i < as.length; i++) {
 867                 if (ms[i % ms.length])
 868                     r += as[i];
 869             }
 870         }
 871         bh.consume(r);
 872     }
 873 
 874     @Benchmark
 875     public void MULLanes(Blackhole bh) {
 876         long[] as = fa.apply(size);
 877         long r = 1;
 878         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 879             r = 1;
 880             for (int i = 0; i < as.length; i++) {
 881                 r *= as[i];
 882             }
 883         }
 884         bh.consume(r);
 885     }
 886 
 887     @Benchmark
 888     public void MULMaskedLanes(Blackhole bh) {
 889         long[] as = fa.apply(size);
 890         boolean[] ms = fm.apply(size);
 891         long r = 1;
 892         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 893             r = 1;
 894             for (int i = 0; i < as.length; i++) {
 895                 if (ms[i % ms.length])
 896                     r *= as[i];
 897             }
 898         }
 899         bh.consume(r);
 900     }
 901 
 902     @Benchmark
 903     public void MINLanes(Blackhole bh) {
 904         long[] as = fa.apply(size);
 905         long r = Long.MAX_VALUE;
 906         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 907             r = Long.MAX_VALUE;
 908             for (int i = 0; i < as.length; i++) {
 909                 r = (long)Math.min(r, as[i]);
 910             }
 911         }
 912         bh.consume(r);
 913     }
 914 
 915     @Benchmark
 916     public void MINMaskedLanes(Blackhole bh) {
 917         long[] as = fa.apply(size);
 918         boolean[] ms = fm.apply(size);
 919         long r = Long.MAX_VALUE;
 920         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 921             r = Long.MAX_VALUE;
 922             for (int i = 0; i < as.length; i++) {
 923                 if (ms[i % ms.length])
 924                     r = (long)Math.min(r, as[i]);
 925             }
 926         }
 927         bh.consume(r);
 928     }
 929 
 930     @Benchmark
 931     public void MAXLanes(Blackhole bh) {
 932         long[] as = fa.apply(size);
 933         long r = Long.MIN_VALUE;
 934         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 935             r = Long.MIN_VALUE;
 936             for (int i = 0; i < as.length; i++) {
 937                 r = (long)Math.max(r, as[i]);
 938             }
 939         }
 940         bh.consume(r);
 941     }
 942 
 943     @Benchmark
 944     public void MAXMaskedLanes(Blackhole bh) {
 945         long[] as = fa.apply(size);
 946         boolean[] ms = fm.apply(size);
 947         long r = Long.MIN_VALUE;
 948         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 949             r = Long.MIN_VALUE;
 950             for (int i = 0; i < as.length; i++) {
 951                 if (ms[i % ms.length])
 952                     r = (long)Math.max(r, as[i]);
 953             }
 954         }
 955         bh.consume(r);
 956     }
 957 
 958 
 959     @Benchmark
 960     public void anyTrue(Blackhole bh) {
 961         boolean[] ms = fm.apply(size);
 962         boolean r = false;
 963         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 964             r = false;
 965             for (int i = 0; i < ms.length; i++) {
 966                 r |= ms[i];
 967             }
 968         }
 969         bh.consume(r);
 970     }
 971 
 972 
 973 
 974     @Benchmark
 975     public void allTrue(Blackhole bh) {
 976         boolean[] ms = fm.apply(size);
 977         boolean r = true;
 978         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 979             r = true;
 980             for (int i = 0; i < ms.length; i++) {
 981                 r &= ms[i];
 982             }
 983         }
 984         bh.consume(r);
 985     }
 986 
 987 
 988     @Benchmark
 989     public void IS_DEFAULT(Blackhole bh) {
 990         long[] as = fa.apply(size);
 991         boolean r = true;
 992 
 993         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 994             for (int i = 0; i < as.length; i++) {
 995                 long a = as[i];
 996                 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation
 997             }
 998         }
 999 
1000         bh.consume(r);
1001     }
1002 
1003     @Benchmark
1004     public void IS_NEGATIVE(Blackhole bh) {
1005         long[] as = fa.apply(size);
1006         boolean r = true;
1007 
1008         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1009             for (int i = 0; i < as.length; i++) {
1010                 long a = as[i];
1011                 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation
1012             }
1013         }
1014 
1015         bh.consume(r);
1016     }
1017 
1018 
1019 
1020 
1021     @Benchmark
1022     public void LT(Blackhole bh) {
1023         long[] as = fa.apply(size);
1024         long[] bs = fb.apply(size);
1025         boolean r = true;
1026 
1027         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1028             for (int i = 0; i < as.length; i++) {
1029                 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1030             }
1031         }
1032 
1033         bh.consume(r);
1034     }
1035 
1036     @Benchmark
1037     public void GT(Blackhole bh) {
1038         long[] as = fa.apply(size);
1039         long[] bs = fb.apply(size);
1040         boolean r = true;
1041 
1042         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1043             for (int i = 0; i < as.length; i++) {
1044                 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1045             }
1046         }
1047 
1048         bh.consume(r);
1049     }
1050 
1051     @Benchmark
1052     public void EQ(Blackhole bh) {
1053         long[] as = fa.apply(size);
1054         long[] bs = fb.apply(size);
1055         boolean r = true;
1056 
1057         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1058             for (int i = 0; i < as.length; i++) {
1059                 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1060             }
1061         }
1062 
1063         bh.consume(r);
1064     }
1065 
1066     @Benchmark
1067     public void NE(Blackhole bh) {
1068         long[] as = fa.apply(size);
1069         long[] bs = fb.apply(size);
1070         boolean r = true;
1071 
1072         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1073             for (int i = 0; i < as.length; i++) {
1074                 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1075             }
1076         }
1077 
1078         bh.consume(r);
1079     }
1080 
1081     @Benchmark
1082     public void LE(Blackhole bh) {
1083         long[] as = fa.apply(size);
1084         long[] bs = fb.apply(size);
1085         boolean r = true;
1086 
1087         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1088             for (int i = 0; i < as.length; i++) {
1089                 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1090             }
1091         }
1092 
1093         bh.consume(r);
1094     }
1095 
1096     @Benchmark
1097     public void GE(Blackhole bh) {
1098         long[] as = fa.apply(size);
1099         long[] bs = fb.apply(size);
1100         boolean r = true;
1101 
1102         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1103             for (int i = 0; i < as.length; i++) {
1104                 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1105             }
1106         }
1107 
1108         bh.consume(r);
1109     }
1110 
1111 
1112     @Benchmark
1113     public void UNSIGNED_LT(Blackhole bh) {
1114         long[] as = fa.apply(size);
1115         long[] bs = fb.apply(size);
1116         boolean r = true;
1117 
1118         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1119             for (int i = 0; i < as.length; i++) {
1120                 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1121             }
1122         }
1123 
1124         bh.consume(r);
1125     }
1126 
1127 
1128 
1129     @Benchmark
1130     public void UNSIGNED_GT(Blackhole bh) {
1131         long[] as = fa.apply(size);
1132         long[] bs = fb.apply(size);
1133         boolean r = true;
1134 
1135         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1136             for (int i = 0; i < as.length; i++) {
1137                 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1138             }
1139         }
1140 
1141         bh.consume(r);
1142     }
1143 
1144 
1145 
1146     @Benchmark
1147     public void UNSIGNED_LE(Blackhole bh) {
1148         long[] as = fa.apply(size);
1149         long[] bs = fb.apply(size);
1150         boolean r = true;
1151 
1152         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1153             for (int i = 0; i < as.length; i++) {
1154                 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1155             }
1156         }
1157 
1158         bh.consume(r);
1159     }
1160 
1161 
1162 
1163     @Benchmark
1164     public void UNSIGNED_GE(Blackhole bh) {
1165         long[] as = fa.apply(size);
1166         long[] bs = fb.apply(size);
1167         boolean r = true;
1168 
1169         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1170             for (int i = 0; i < as.length; i++) {
1171                 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1172             }
1173         }
1174 
1175         bh.consume(r);
1176     }
1177 
1178 
1179     @Benchmark
1180     public void blend(Blackhole bh) {
1181         long[] as = fa.apply(size);
1182         long[] bs = fb.apply(size);
1183         long[] rs = fr.apply(size);
1184         boolean[] ms = fm.apply(size);
1185 
1186         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1187             for (int i = 0; i < as.length; i++) {
1188                 long a = as[i];
1189                 long b = bs[i];
1190                 boolean m = ms[i % ms.length];
1191                 rs[i] = (m ? b : a);
1192             }
1193         }
1194 
1195         bh.consume(rs);
1196     }
1197     void rearrangeShared(int window, Blackhole bh) {
1198         long[] as = fa.apply(size);
1199         int[] order = fs.apply(size);
1200         long[] rs = fr.apply(size);
1201 
1202         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1203             for (int i = 0; i < as.length; i += window) {
1204                 for (int j = 0; j < window; j++) {
1205                     long a = as[i+j];
1206                     int pos = order[j];
1207                     rs[i + pos] = a;
1208                 }
1209             }
1210         }
1211 
1212         bh.consume(rs);
1213     }
1214 
1215     @Benchmark
1216     public void rearrange064(Blackhole bh) {
1217         int window = 64 / Long.SIZE;
1218         rearrangeShared(window, bh);
1219     }
1220 
1221     @Benchmark
1222     public void rearrange128(Blackhole bh) {
1223         int window = 128 / Long.SIZE;
1224         rearrangeShared(window, bh);
1225     }
1226 
1227     @Benchmark
1228     public void rearrange256(Blackhole bh) {
1229         int window = 256 / Long.SIZE;
1230         rearrangeShared(window, bh);
1231     }
1232 
1233     @Benchmark
1234     public void rearrange512(Blackhole bh) {
1235         int window = 512 / Long.SIZE;
1236         rearrangeShared(window, bh);
1237     }
1238     void broadcastShared(int window, Blackhole bh) {
1239         long[] as = fa.apply(size);
1240         long[] rs = fr.apply(size);
1241 
1242         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1243             for (int i = 0; i < as.length; i += window) {
1244                 int idx = i;
1245                 for (int j = 0; j < window; j++) {
1246                     rs[j] = as[idx];
1247                 }
1248             }
1249         }
1250 
1251         bh.consume(rs);
1252     }
1253 
1254     @Benchmark
1255     public void broadcast064(Blackhole bh) {
1256         int window = 64 / Long.SIZE;
1257         broadcastShared(window, bh);
1258     }
1259 
1260     @Benchmark
1261     public void broadcast128(Blackhole bh) {
1262         int window = 128 / Long.SIZE;
1263         broadcastShared(window, bh);
1264     }
1265 
1266     @Benchmark
1267     public void broadcast256(Blackhole bh) {
1268         int window = 256 / Long.SIZE;
1269         broadcastShared(window, bh);
1270     }
1271 
1272     @Benchmark
1273     public void broadcast512(Blackhole bh) {
1274         int window = 512 / Long.SIZE;
1275         broadcastShared(window, bh);
1276     }
1277 
1278     @Benchmark
1279     public void zero(Blackhole bh) {
1280         long[] as = fa.apply(size);
1281 
1282         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1283             for (int i = 0; i < as.length; i++) {
1284                 as[i] = (long)0;
1285             }
1286         }
1287 
1288         bh.consume(as);
1289     }
1290 
1291 
1292 
1293 
1294 
1295 
1296 
1297 
1298 
1299 
1300 
1301 
1302 
1303 
1304 
1305 
1306 
1307 
1308 
1309 
1310 
1311 
1312     @Benchmark
1313     public void BITWISE_BLEND(Blackhole bh) {
1314         long[] as = fa.apply(size);
1315         long[] bs = fb.apply(size);
1316         long[] cs = fc.apply(size);
1317         long[] rs = fr.apply(size);
1318 
1319         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1320             for (int i = 0; i < as.length; i++) {
1321                 long a = as[i];
1322                 long b = bs[i];
1323                 long c = cs[i];
1324                 rs[i] = (long)((a&~(c))|(b&c));
1325             }
1326         }
1327 
1328         bh.consume(rs);
1329     }
1330 
1331 
1332 
1333 
1334     @Benchmark
1335     public void BITWISE_BLENDMasked(Blackhole bh) {
1336         long[] as = fa.apply(size);
1337         long[] bs = fb.apply(size);
1338         long[] cs = fc.apply(size);
1339         long[] rs = fr.apply(size);
1340         boolean[] ms = fm.apply(size);
1341 
1342         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1343             for (int i = 0; i < as.length; i++) {
1344                 long a = as[i];
1345                 long b = bs[i];
1346                 long c = cs[i];
1347                 if (ms[i % ms.length]) {
1348                     rs[i] = (long)((a&~(c))|(b&c));
1349                 } else {
1350                     rs[i] = a;
1351                 }
1352             }
1353         }
1354         bh.consume(rs);
1355     }
1356 
1357 
1358     @Benchmark
1359     public void NEG(Blackhole bh) {
1360         long[] as = fa.apply(size);
1361         long[] rs = fr.apply(size);
1362 
1363         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1364             for (int i = 0; i < as.length; i++) {
1365                 long a = as[i];
1366                 rs[i] = (long)(-((long)a));
1367             }
1368         }
1369 
1370         bh.consume(rs);
1371     }
1372 
1373     @Benchmark
1374     public void NEGMasked(Blackhole bh) {
1375         long[] as = fa.apply(size);
1376         long[] rs = fr.apply(size);
1377         boolean[] ms = fm.apply(size);
1378 
1379         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1380             for (int i = 0; i < as.length; i++) {
1381                 long a = as[i];
1382                 boolean m = ms[i % ms.length];
1383                 rs[i] = (m ? (long)(-((long)a)) : a);
1384             }
1385         }
1386 
1387         bh.consume(rs);
1388     }
1389 
1390     @Benchmark
1391     public void ABS(Blackhole bh) {
1392         long[] as = fa.apply(size);
1393         long[] rs = fr.apply(size);
1394 
1395         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1396             for (int i = 0; i < as.length; i++) {
1397                 long a = as[i];
1398                 rs[i] = (long)(Math.abs((long)a));
1399             }
1400         }
1401 
1402         bh.consume(rs);
1403     }
1404 
1405     @Benchmark
1406     public void ABSMasked(Blackhole bh) {
1407         long[] as = fa.apply(size);
1408         long[] rs = fr.apply(size);
1409         boolean[] ms = fm.apply(size);
1410 
1411         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1412             for (int i = 0; i < as.length; i++) {
1413                 long a = as[i];
1414                 boolean m = ms[i % ms.length];
1415                 rs[i] = (m ? (long)(Math.abs((long)a)) : a);
1416             }
1417         }
1418 
1419         bh.consume(rs);
1420     }
1421 
1422 
1423     @Benchmark
1424     public void NOT(Blackhole bh) {
1425         long[] as = fa.apply(size);
1426         long[] rs = fr.apply(size);
1427 
1428         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1429             for (int i = 0; i < as.length; i++) {
1430                 long a = as[i];
1431                 rs[i] = (long)(~((long)a));
1432             }
1433         }
1434 
1435         bh.consume(rs);
1436     }
1437 
1438 
1439 
1440     @Benchmark
1441     public void NOTMasked(Blackhole bh) {
1442         long[] as = fa.apply(size);
1443         long[] rs = fr.apply(size);
1444         boolean[] ms = fm.apply(size);
1445 
1446         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1447             for (int i = 0; i < as.length; i++) {
1448                 long a = as[i];
1449                 boolean m = ms[i % ms.length];
1450                 rs[i] = (m ? (long)(~((long)a)) : a);
1451             }
1452         }
1453 
1454         bh.consume(rs);
1455     }
1456 
1457 
1458 
1459     @Benchmark
1460     public void ZOMO(Blackhole bh) {
1461         long[] as = fa.apply(size);
1462         long[] rs = fr.apply(size);
1463 
1464         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1465             for (int i = 0; i < as.length; i++) {
1466                 long a = as[i];
1467                 rs[i] = (long)((a==0?0:-1));
1468             }
1469         }
1470 
1471         bh.consume(rs);
1472     }
1473 
1474 
1475 
1476     @Benchmark
1477     public void ZOMOMasked(Blackhole bh) {
1478         long[] as = fa.apply(size);
1479         long[] rs = fr.apply(size);
1480         boolean[] ms = fm.apply(size);
1481 
1482         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1483             for (int i = 0; i < as.length; i++) {
1484                 long a = as[i];
1485                 boolean m = ms[i % ms.length];
1486                 rs[i] = (m ? (long)((a==0?0:-1)) : a);
1487             }
1488         }
1489 
1490         bh.consume(rs);
1491     }
1492 
1493 
1494 
1495     @Benchmark
1496     public void gatherBase0(Blackhole bh) {
1497         long[] as = fa.apply(size);
1498         int[] is    = fs.apply(size);
1499         long[] rs = fr.apply(size);
1500 
1501         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1502             for (int i = 0; i < as.length; i++) {
1503                 int ix = 0 + is[i];
1504                 rs[i] = as[ix];
1505             }
1506         }
1507 
1508         bh.consume(rs);
1509     }
1510 
1511 
1512     void gather(int window, Blackhole bh) {
1513         long[] as = fa.apply(size);
1514         int[] is    = fs.apply(size);
1515         long[] rs = fr.apply(size);
1516 
1517         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1518             for (int i = 0; i < as.length; i += window) {
1519                 for (int j = 0; j < window; j++) {
1520                     int ix = is[i + j];
1521                     rs[i + j] = as[ix];
1522                 }
1523             }
1524         }
1525 
1526         bh.consume(rs);
1527     }
1528 
1529     @Benchmark
1530     public void gather064(Blackhole bh) {
1531         int window = 64 / Long.SIZE;
1532         gather(window, bh);
1533     }
1534 
1535     @Benchmark
1536     public void gather128(Blackhole bh) {
1537         int window = 128 / Long.SIZE;
1538         gather(window, bh);
1539     }
1540 
1541     @Benchmark
1542     public void gather256(Blackhole bh) {
1543         int window = 256 / Long.SIZE;
1544         gather(window, bh);
1545     }
1546 
1547     @Benchmark
1548     public void gather512(Blackhole bh) {
1549         int window = 512 / Long.SIZE;
1550         gather(window, bh);
1551     }
1552 
1553     @Benchmark
1554     public void scatterBase0(Blackhole bh) {
1555         long[] as = fa.apply(size);
1556         int[] is    = fs.apply(size);
1557         long[] rs = fr.apply(size);
1558 
1559         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1560             for (int i = 0; i < as.length; i++) {
1561                 int ix = 0 + is[i];
1562                 rs[ix] = as[i];
1563             }
1564         }
1565 
1566         bh.consume(rs);
1567     }
1568 
1569     void scatter(int window, Blackhole bh) {
1570         long[] as = fa.apply(size);
1571         int[] is    = fs.apply(size);
1572         long[] rs = fr.apply(size);
1573 
1574         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1575             for (int i = 0; i < as.length; i += window) {
1576                 for (int j = 0; j < window; j++) {
1577                     int ix = is[i + j];
1578                     rs[ix] = as[i + j];
1579                 }
1580             }
1581         }
1582 
1583         bh.consume(rs);
1584     }
1585 
1586     @Benchmark
1587     public void scatter064(Blackhole bh) {
1588         int window = 64 / Long.SIZE;
1589         scatter(window, bh);
1590     }
1591 
1592     @Benchmark
1593     public void scatter128(Blackhole bh) {
1594         int window = 128 / Long.SIZE;
1595         scatter(window, bh);
1596     }
1597 
1598     @Benchmark
1599     public void scatter256(Blackhole bh) {
1600         int window = 256 / Long.SIZE;
1601         scatter(window, bh);
1602     }
1603 
1604     @Benchmark
1605     public void scatter512(Blackhole bh) {
1606         int window = 512 / Long.SIZE;
1607         scatter(window, bh);
1608     }
1609 }
1610