1 /*
   2  * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package org.openjdk.bench.jdk.incubator.vector.operation;
  25 
  26 // -- This file was mechanically generated: Do not edit! -- //
  27 
  28 import java.util.concurrent.TimeUnit;
  29 import java.util.function.IntFunction;
  30 
  31 import org.openjdk.jmh.annotations.*;
  32 import org.openjdk.jmh.infra.Blackhole;
  33 
  34 @BenchmarkMode(Mode.Throughput)
  35 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  36 @State(Scope.Benchmark)
  37 @Warmup(iterations = 3, time = 1)
  38 @Measurement(iterations = 5, time = 1)
  39 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  40 public class ByteScalar extends AbstractVectorBenchmark {
  41     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  42 
  43     @Param("1024")
  44     int size;
  45 
  46     byte[] fill(IntFunction<Byte> f) {
  47         byte[] array = new byte[size];
  48         for (int i = 0; i < array.length; i++) {
  49             array[i] = f.apply(i);
  50         }
  51         return array;
  52     }
  53 
  54     static byte bits(byte e) {
  55         return e;
  56     }
  57 
  58     byte[] as, bs, cs, rs;
  59     boolean[] ms, mt, rms;
  60     int[] ss;
  61 
  62     @Setup
  63     public void init() {
  64         as = fill(i -> (byte)(2*i));
  65         bs = fill(i -> (byte)(i+1));
  66         cs = fill(i -> (byte)(i+5));
  67         rs = fill(i -> (byte)0);
  68         ms = fillMask(size, i -> (i % 2) == 0);
  69         mt = fillMask(size, i -> true);
  70         rms = fillMask(size, i -> false);
  71 
  72         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  73     }
  74 
  75     final IntFunction<byte[]> fa = vl -> as;
  76     final IntFunction<byte[]> fb = vl -> bs;
  77     final IntFunction<byte[]> fc = vl -> cs;
  78     final IntFunction<byte[]> fr = vl -> rs;
  79     final IntFunction<boolean[]> fm = vl -> ms;
  80     final IntFunction<boolean[]> fmt = vl -> mt;
  81     final IntFunction<boolean[]> fmr = vl -> rms;
  82     final IntFunction<int[]> fs = vl -> ss;
  83 
  84     static boolean eq(byte a, byte b) {
  85         return a == b;
  86     }
  87 
  88     static boolean neq(byte a, byte b) {
  89         return a != b;
  90     }
  91 
  92     static boolean lt(byte a, byte b) {
  93         return a < b;
  94     }
  95 
  96     static boolean le(byte a, byte b) {
  97         return a <= b;
  98     }
  99 
 100     static boolean gt(byte a, byte b) {
 101         return a > b;
 102     }
 103 
 104     static boolean ge(byte a, byte b) {
 105         return a >= b;
 106     }
 107 
 108     static boolean ult(byte a, byte b) {
 109         return Byte.compareUnsigned(a, b) < 0;
 110     }
 111 
 112     static boolean ule(byte a, byte b) {
 113         return Byte.compareUnsigned(a, b) <= 0;
 114     }
 115 
 116     static boolean ugt(byte a, byte b) {
 117         return Byte.compareUnsigned(a, b) > 0;
 118     }
 119 
 120     static boolean uge(byte a, byte b) {
 121         return Byte.compareUnsigned(a, b) >= 0;
 122     }
 123 
 124     @Benchmark
 125     public void ADD(Blackhole bh) {
 126         byte[] as = fa.apply(size);
 127         byte[] bs = fb.apply(size);
 128         byte[] rs = fr.apply(size);
 129 
 130         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 131             for (int i = 0; i < as.length; i++) {
 132                 byte a = as[i];
 133                 byte b = bs[i];
 134                 rs[i] = (byte)(a + b);
 135             }
 136         }
 137 
 138         bh.consume(rs);
 139     }
 140 
 141     @Benchmark
 142     public void ADDMasked(Blackhole bh) {
 143         byte[] as = fa.apply(size);
 144         byte[] bs = fb.apply(size);
 145         byte[] rs = fr.apply(size);
 146         boolean[] ms = fm.apply(size);
 147 
 148         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 149             for (int i = 0; i < as.length; i++) {
 150                 byte a = as[i];
 151                 byte b = bs[i];
 152                 if (ms[i % ms.length]) {
 153                     rs[i] = (byte)(a + b);
 154                 } else {
 155                     rs[i] = a;
 156                 }
 157             }
 158         }
 159         bh.consume(rs);
 160     }
 161 
 162     @Benchmark
 163     public void SUB(Blackhole bh) {
 164         byte[] as = fa.apply(size);
 165         byte[] bs = fb.apply(size);
 166         byte[] rs = fr.apply(size);
 167 
 168         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 169             for (int i = 0; i < as.length; i++) {
 170                 byte a = as[i];
 171                 byte b = bs[i];
 172                 rs[i] = (byte)(a - b);
 173             }
 174         }
 175 
 176         bh.consume(rs);
 177     }
 178 
 179     @Benchmark
 180     public void SUBMasked(Blackhole bh) {
 181         byte[] as = fa.apply(size);
 182         byte[] bs = fb.apply(size);
 183         byte[] rs = fr.apply(size);
 184         boolean[] ms = fm.apply(size);
 185 
 186         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 187             for (int i = 0; i < as.length; i++) {
 188                 byte a = as[i];
 189                 byte b = bs[i];
 190                 if (ms[i % ms.length]) {
 191                     rs[i] = (byte)(a - b);
 192                 } else {
 193                     rs[i] = a;
 194                 }
 195             }
 196         }
 197         bh.consume(rs);
 198     }
 199 
 200     @Benchmark
 201     public void MUL(Blackhole bh) {
 202         byte[] as = fa.apply(size);
 203         byte[] bs = fb.apply(size);
 204         byte[] rs = fr.apply(size);
 205 
 206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 207             for (int i = 0; i < as.length; i++) {
 208                 byte a = as[i];
 209                 byte b = bs[i];
 210                 rs[i] = (byte)(a * b);
 211             }
 212         }
 213 
 214         bh.consume(rs);
 215     }
 216 
 217     @Benchmark
 218     public void MULMasked(Blackhole bh) {
 219         byte[] as = fa.apply(size);
 220         byte[] bs = fb.apply(size);
 221         byte[] rs = fr.apply(size);
 222         boolean[] ms = fm.apply(size);
 223 
 224         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 225             for (int i = 0; i < as.length; i++) {
 226                 byte a = as[i];
 227                 byte b = bs[i];
 228                 if (ms[i % ms.length]) {
 229                     rs[i] = (byte)(a * b);
 230                 } else {
 231                     rs[i] = a;
 232                 }
 233             }
 234         }
 235         bh.consume(rs);
 236     }
 237 
 238 
 239 
 240     @Benchmark
 241     public void FIRST_NONZERO(Blackhole bh) {
 242         byte[] as = fa.apply(size);
 243         byte[] bs = fb.apply(size);
 244         byte[] rs = fr.apply(size);
 245 
 246         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 247             for (int i = 0; i < as.length; i++) {
 248                 byte a = as[i];
 249                 byte b = bs[i];
 250                 rs[i] = (byte)((a)!=0?a:b);
 251             }
 252         }
 253 
 254         bh.consume(rs);
 255     }
 256 
 257     @Benchmark
 258     public void FIRST_NONZEROMasked(Blackhole bh) {
 259         byte[] as = fa.apply(size);
 260         byte[] bs = fb.apply(size);
 261         byte[] rs = fr.apply(size);
 262         boolean[] ms = fm.apply(size);
 263 
 264         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 265             for (int i = 0; i < as.length; i++) {
 266                 byte a = as[i];
 267                 byte b = bs[i];
 268                 if (ms[i % ms.length]) {
 269                     rs[i] = (byte)((a)!=0?a:b);
 270                 } else {
 271                     rs[i] = a;
 272                 }
 273             }
 274         }
 275         bh.consume(rs);
 276     }
 277 
 278 
 279     @Benchmark
 280     public void AND(Blackhole bh) {
 281         byte[] as = fa.apply(size);
 282         byte[] bs = fb.apply(size);
 283         byte[] rs = fr.apply(size);
 284 
 285         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 286             for (int i = 0; i < as.length; i++) {
 287                 byte a = as[i];
 288                 byte b = bs[i];
 289                 rs[i] = (byte)(a & b);
 290             }
 291         }
 292 
 293         bh.consume(rs);
 294     }
 295 
 296 
 297 
 298     @Benchmark
 299     public void ANDMasked(Blackhole bh) {
 300         byte[] as = fa.apply(size);
 301         byte[] bs = fb.apply(size);
 302         byte[] rs = fr.apply(size);
 303         boolean[] ms = fm.apply(size);
 304 
 305         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 306             for (int i = 0; i < as.length; i++) {
 307                 byte a = as[i];
 308                 byte b = bs[i];
 309                 if (ms[i % ms.length]) {
 310                     rs[i] = (byte)(a & b);
 311                 } else {
 312                     rs[i] = a;
 313                 }
 314             }
 315         }
 316         bh.consume(rs);
 317     }
 318 
 319 
 320 
 321     @Benchmark
 322     public void AND_NOT(Blackhole bh) {
 323         byte[] as = fa.apply(size);
 324         byte[] bs = fb.apply(size);
 325         byte[] rs = fr.apply(size);
 326 
 327         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 328             for (int i = 0; i < as.length; i++) {
 329                 byte a = as[i];
 330                 byte b = bs[i];
 331                 rs[i] = (byte)(a & ~b);
 332             }
 333         }
 334 
 335         bh.consume(rs);
 336     }
 337 
 338 
 339 
 340     @Benchmark
 341     public void AND_NOTMasked(Blackhole bh) {
 342         byte[] as = fa.apply(size);
 343         byte[] bs = fb.apply(size);
 344         byte[] rs = fr.apply(size);
 345         boolean[] ms = fm.apply(size);
 346 
 347         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 348             for (int i = 0; i < as.length; i++) {
 349                 byte a = as[i];
 350                 byte b = bs[i];
 351                 if (ms[i % ms.length]) {
 352                     rs[i] = (byte)(a & ~b);
 353                 } else {
 354                     rs[i] = a;
 355                 }
 356             }
 357         }
 358         bh.consume(rs);
 359     }
 360 
 361 
 362 
 363     @Benchmark
 364     public void OR(Blackhole bh) {
 365         byte[] as = fa.apply(size);
 366         byte[] bs = fb.apply(size);
 367         byte[] rs = fr.apply(size);
 368 
 369         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 370             for (int i = 0; i < as.length; i++) {
 371                 byte a = as[i];
 372                 byte b = bs[i];
 373                 rs[i] = (byte)(a | b);
 374             }
 375         }
 376 
 377         bh.consume(rs);
 378     }
 379 
 380 
 381 
 382     @Benchmark
 383     public void ORMasked(Blackhole bh) {
 384         byte[] as = fa.apply(size);
 385         byte[] bs = fb.apply(size);
 386         byte[] rs = fr.apply(size);
 387         boolean[] ms = fm.apply(size);
 388 
 389         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 390             for (int i = 0; i < as.length; i++) {
 391                 byte a = as[i];
 392                 byte b = bs[i];
 393                 if (ms[i % ms.length]) {
 394                     rs[i] = (byte)(a | b);
 395                 } else {
 396                     rs[i] = a;
 397                 }
 398             }
 399         }
 400         bh.consume(rs);
 401     }
 402 
 403 
 404 
 405     @Benchmark
 406     public void XOR(Blackhole bh) {
 407         byte[] as = fa.apply(size);
 408         byte[] bs = fb.apply(size);
 409         byte[] rs = fr.apply(size);
 410 
 411         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 412             for (int i = 0; i < as.length; i++) {
 413                 byte a = as[i];
 414                 byte b = bs[i];
 415                 rs[i] = (byte)(a ^ b);
 416             }
 417         }
 418 
 419         bh.consume(rs);
 420     }
 421 
 422 
 423 
 424     @Benchmark
 425     public void XORMasked(Blackhole bh) {
 426         byte[] as = fa.apply(size);
 427         byte[] bs = fb.apply(size);
 428         byte[] rs = fr.apply(size);
 429         boolean[] ms = fm.apply(size);
 430 
 431         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 432             for (int i = 0; i < as.length; i++) {
 433                 byte a = as[i];
 434                 byte b = bs[i];
 435                 if (ms[i % ms.length]) {
 436                     rs[i] = (byte)(a ^ b);
 437                 } else {
 438                     rs[i] = a;
 439                 }
 440             }
 441         }
 442         bh.consume(rs);
 443     }
 444 
 445 
 446 
 447 
 448 
 449     @Benchmark
 450     public void LSHL(Blackhole bh) {
 451         byte[] as = fa.apply(size);
 452         byte[] bs = fb.apply(size);
 453         byte[] rs = fr.apply(size);
 454 
 455         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 456             for (int i = 0; i < as.length; i++) {
 457                 byte a = as[i];
 458                 byte b = bs[i];
 459                 rs[i] = (byte)((a << (b & 0x7)));
 460             }
 461         }
 462 
 463         bh.consume(rs);
 464     }
 465 
 466 
 467 
 468     @Benchmark
 469     public void LSHLMasked(Blackhole bh) {
 470         byte[] as = fa.apply(size);
 471         byte[] bs = fb.apply(size);
 472         byte[] rs = fr.apply(size);
 473         boolean[] ms = fm.apply(size);
 474 
 475         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 476             for (int i = 0; i < as.length; i++) {
 477                 byte a = as[i];
 478                 byte b = bs[i];
 479                 if (ms[i % ms.length]) {
 480                     rs[i] = (byte)((a << (b & 0x7)));
 481                 } else {
 482                     rs[i] = a;
 483                 }
 484             }
 485         }
 486         bh.consume(rs);
 487     }
 488 
 489 
 490 
 491 
 492 
 493 
 494 
 495     @Benchmark
 496     public void ASHR(Blackhole bh) {
 497         byte[] as = fa.apply(size);
 498         byte[] bs = fb.apply(size);
 499         byte[] rs = fr.apply(size);
 500 
 501         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 502             for (int i = 0; i < as.length; i++) {
 503                 byte a = as[i];
 504                 byte b = bs[i];
 505                 rs[i] = (byte)((a >> (b & 0x7)));
 506             }
 507         }
 508 
 509         bh.consume(rs);
 510     }
 511 
 512 
 513 
 514     @Benchmark
 515     public void ASHRMasked(Blackhole bh) {
 516         byte[] as = fa.apply(size);
 517         byte[] bs = fb.apply(size);
 518         byte[] rs = fr.apply(size);
 519         boolean[] ms = fm.apply(size);
 520 
 521         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 522             for (int i = 0; i < as.length; i++) {
 523                 byte a = as[i];
 524                 byte b = bs[i];
 525                 if (ms[i % ms.length]) {
 526                     rs[i] = (byte)((a >> (b & 0x7)));
 527                 } else {
 528                     rs[i] = a;
 529                 }
 530             }
 531         }
 532         bh.consume(rs);
 533     }
 534 
 535 
 536 
 537 
 538 
 539 
 540 
 541     @Benchmark
 542     public void LSHR(Blackhole bh) {
 543         byte[] as = fa.apply(size);
 544         byte[] bs = fb.apply(size);
 545         byte[] rs = fr.apply(size);
 546 
 547         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 548             for (int i = 0; i < as.length; i++) {
 549                 byte a = as[i];
 550                 byte b = bs[i];
 551                 rs[i] = (byte)(((a & 0xFF) >>> (b & 0x7)));
 552             }
 553         }
 554 
 555         bh.consume(rs);
 556     }
 557 
 558 
 559 
 560     @Benchmark
 561     public void LSHRMasked(Blackhole bh) {
 562         byte[] as = fa.apply(size);
 563         byte[] bs = fb.apply(size);
 564         byte[] rs = fr.apply(size);
 565         boolean[] ms = fm.apply(size);
 566 
 567         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 568             for (int i = 0; i < as.length; i++) {
 569                 byte a = as[i];
 570                 byte b = bs[i];
 571                 if (ms[i % ms.length]) {
 572                     rs[i] = (byte)(((a & 0xFF) >>> (b & 0x7)));
 573                 } else {
 574                     rs[i] = a;
 575                 }
 576             }
 577         }
 578         bh.consume(rs);
 579     }
 580 
 581 
 582 
 583 
 584 
 585 
 586 
 587     @Benchmark
 588     public void LSHLShift(Blackhole bh) {
 589         byte[] as = fa.apply(size);
 590         byte[] bs = fb.apply(size);
 591         byte[] rs = fr.apply(size);
 592 
 593         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 594             for (int i = 0; i < as.length; i++) {
 595                 byte a = as[i];
 596                 byte b = bs[i];
 597                 rs[i] = (byte)((a << (b & 7)));
 598             }
 599         }
 600 
 601         bh.consume(rs);
 602     }
 603 
 604 
 605 
 606     @Benchmark
 607     public void LSHLMaskedShift(Blackhole bh) {
 608         byte[] as = fa.apply(size);
 609         byte[] bs = fb.apply(size);
 610         byte[] rs = fr.apply(size);
 611         boolean[] ms = fm.apply(size);
 612 
 613         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 614             for (int i = 0; i < as.length; i++) {
 615                 byte a = as[i];
 616                 byte b = bs[i];
 617                 boolean m = ms[i % ms.length];
 618                 rs[i] = (m ? (byte)((a << (b & 7))) : a);
 619             }
 620         }
 621 
 622         bh.consume(rs);
 623     }
 624 
 625 
 626 
 627 
 628 
 629 
 630 
 631     @Benchmark
 632     public void LSHRShift(Blackhole bh) {
 633         byte[] as = fa.apply(size);
 634         byte[] bs = fb.apply(size);
 635         byte[] rs = fr.apply(size);
 636 
 637         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 638             for (int i = 0; i < as.length; i++) {
 639                 byte a = as[i];
 640                 byte b = bs[i];
 641                 rs[i] = (byte)(((a & 0xFF) >>> (b & 7)));
 642             }
 643         }
 644 
 645         bh.consume(rs);
 646     }
 647 
 648 
 649 
 650     @Benchmark
 651     public void LSHRMaskedShift(Blackhole bh) {
 652         byte[] as = fa.apply(size);
 653         byte[] bs = fb.apply(size);
 654         byte[] rs = fr.apply(size);
 655         boolean[] ms = fm.apply(size);
 656 
 657         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 658             for (int i = 0; i < as.length; i++) {
 659                 byte a = as[i];
 660                 byte b = bs[i];
 661                 boolean m = ms[i % ms.length];
 662                 rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a);
 663             }
 664         }
 665 
 666         bh.consume(rs);
 667     }
 668 
 669 
 670 
 671 
 672 
 673 
 674 
 675     @Benchmark
 676     public void ASHRShift(Blackhole bh) {
 677         byte[] as = fa.apply(size);
 678         byte[] bs = fb.apply(size);
 679         byte[] rs = fr.apply(size);
 680 
 681         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 682             for (int i = 0; i < as.length; i++) {
 683                 byte a = as[i];
 684                 byte b = bs[i];
 685                 rs[i] = (byte)((a >> (b & 7)));
 686             }
 687         }
 688 
 689         bh.consume(rs);
 690     }
 691 
 692 
 693 
 694     @Benchmark
 695     public void ASHRMaskedShift(Blackhole bh) {
 696         byte[] as = fa.apply(size);
 697         byte[] bs = fb.apply(size);
 698         byte[] rs = fr.apply(size);
 699         boolean[] ms = fm.apply(size);
 700 
 701         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 702             for (int i = 0; i < as.length; i++) {
 703                 byte a = as[i];
 704                 byte b = bs[i];
 705                 boolean m = ms[i % ms.length];
 706                 rs[i] = (m ? (byte)((a >> (b & 7))) : a);
 707             }
 708         }
 709 
 710         bh.consume(rs);
 711     }
 712 
 713 
 714 
 715 
 716     @Benchmark
 717     public void MIN(Blackhole bh) {
 718         byte[] as = fa.apply(size);
 719         byte[] bs = fb.apply(size);
 720         byte[] rs = fr.apply(size);
 721 
 722         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 723             for (int i = 0; i < as.length; i++) {
 724                 byte a = as[i];
 725                 byte b = bs[i];
 726                 rs[i] = (byte)(Math.min(a, b));
 727             }
 728         }
 729 
 730         bh.consume(rs);
 731     }
 732 
 733     @Benchmark
 734     public void MAX(Blackhole bh) {
 735         byte[] as = fa.apply(size);
 736         byte[] bs = fb.apply(size);
 737         byte[] rs = fr.apply(size);
 738 
 739         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 740             for (int i = 0; i < as.length; i++) {
 741                 byte a = as[i];
 742                 byte b = bs[i];
 743                 rs[i] = (byte)(Math.max(a, b));
 744             }
 745         }
 746 
 747         bh.consume(rs);
 748     }
 749 
 750 
 751     @Benchmark
 752     public void ANDLanes(Blackhole bh) {
 753         byte[] as = fa.apply(size);
 754         byte r = -1;
 755         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 756             r = -1;
 757             for (int i = 0; i < as.length; i++) {
 758                 r &= as[i];
 759             }
 760         }
 761         bh.consume(r);
 762     }
 763 
 764 
 765 
 766     @Benchmark
 767     public void ANDMaskedLanes(Blackhole bh) {
 768         byte[] as = fa.apply(size);
 769         boolean[] ms = fm.apply(size);
 770         byte r = -1;
 771         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 772             r = -1;
 773             for (int i = 0; i < as.length; i++) {
 774                 if (ms[i % ms.length])
 775                     r &= as[i];
 776             }
 777         }
 778         bh.consume(r);
 779     }
 780 
 781 
 782 
 783     @Benchmark
 784     public void ORLanes(Blackhole bh) {
 785         byte[] as = fa.apply(size);
 786         byte r = 0;
 787         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 788             r = 0;
 789             for (int i = 0; i < as.length; i++) {
 790                 r |= as[i];
 791             }
 792         }
 793         bh.consume(r);
 794     }
 795 
 796 
 797 
 798     @Benchmark
 799     public void ORMaskedLanes(Blackhole bh) {
 800         byte[] as = fa.apply(size);
 801         boolean[] ms = fm.apply(size);
 802         byte r = 0;
 803         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 804             r = 0;
 805             for (int i = 0; i < as.length; i++) {
 806                 if (ms[i % ms.length])
 807                     r |= as[i];
 808             }
 809         }
 810         bh.consume(r);
 811     }
 812 
 813 
 814 
 815     @Benchmark
 816     public void XORLanes(Blackhole bh) {
 817         byte[] as = fa.apply(size);
 818         byte r = 0;
 819         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 820             r = 0;
 821             for (int i = 0; i < as.length; i++) {
 822                 r ^= as[i];
 823             }
 824         }
 825         bh.consume(r);
 826     }
 827 
 828 
 829 
 830     @Benchmark
 831     public void XORMaskedLanes(Blackhole bh) {
 832         byte[] as = fa.apply(size);
 833         boolean[] ms = fm.apply(size);
 834         byte r = 0;
 835         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 836             r = 0;
 837             for (int i = 0; i < as.length; i++) {
 838                 if (ms[i % ms.length])
 839                     r ^= as[i];
 840             }
 841         }
 842         bh.consume(r);
 843     }
 844 
 845 
 846     @Benchmark
 847     public void ADDLanes(Blackhole bh) {
 848         byte[] as = fa.apply(size);
 849         byte r = 0;
 850         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 851             r = 0;
 852             for (int i = 0; i < as.length; i++) {
 853                 r += as[i];
 854             }
 855         }
 856         bh.consume(r);
 857     }
 858 
 859     @Benchmark
 860     public void ADDMaskedLanes(Blackhole bh) {
 861         byte[] as = fa.apply(size);
 862         boolean[] ms = fm.apply(size);
 863         byte r = 0;
 864         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 865             r = 0;
 866             for (int i = 0; i < as.length; i++) {
 867                 if (ms[i % ms.length])
 868                     r += as[i];
 869             }
 870         }
 871         bh.consume(r);
 872     }
 873 
 874     @Benchmark
 875     public void MULLanes(Blackhole bh) {
 876         byte[] as = fa.apply(size);
 877         byte r = 1;
 878         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 879             r = 1;
 880             for (int i = 0; i < as.length; i++) {
 881                 r *= as[i];
 882             }
 883         }
 884         bh.consume(r);
 885     }
 886 
 887     @Benchmark
 888     public void MULMaskedLanes(Blackhole bh) {
 889         byte[] as = fa.apply(size);
 890         boolean[] ms = fm.apply(size);
 891         byte r = 1;
 892         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 893             r = 1;
 894             for (int i = 0; i < as.length; i++) {
 895                 if (ms[i % ms.length])
 896                     r *= as[i];
 897             }
 898         }
 899         bh.consume(r);
 900     }
 901 
 902     @Benchmark
 903     public void MINLanes(Blackhole bh) {
 904         byte[] as = fa.apply(size);
 905         byte r = Byte.MAX_VALUE;
 906         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 907             r = Byte.MAX_VALUE;
 908             for (int i = 0; i < as.length; i++) {
 909                 r = (byte)Math.min(r, as[i]);
 910             }
 911         }
 912         bh.consume(r);
 913     }
 914 
 915     @Benchmark
 916     public void MINMaskedLanes(Blackhole bh) {
 917         byte[] as = fa.apply(size);
 918         boolean[] ms = fm.apply(size);
 919         byte r = Byte.MAX_VALUE;
 920         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 921             r = Byte.MAX_VALUE;
 922             for (int i = 0; i < as.length; i++) {
 923                 if (ms[i % ms.length])
 924                     r = (byte)Math.min(r, as[i]);
 925             }
 926         }
 927         bh.consume(r);
 928     }
 929 
 930     @Benchmark
 931     public void MAXLanes(Blackhole bh) {
 932         byte[] as = fa.apply(size);
 933         byte r = Byte.MIN_VALUE;
 934         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 935             r = Byte.MIN_VALUE;
 936             for (int i = 0; i < as.length; i++) {
 937                 r = (byte)Math.max(r, as[i]);
 938             }
 939         }
 940         bh.consume(r);
 941     }
 942 
 943     @Benchmark
 944     public void MAXMaskedLanes(Blackhole bh) {
 945         byte[] as = fa.apply(size);
 946         boolean[] ms = fm.apply(size);
 947         byte r = Byte.MIN_VALUE;
 948         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 949             r = Byte.MIN_VALUE;
 950             for (int i = 0; i < as.length; i++) {
 951                 if (ms[i % ms.length])
 952                     r = (byte)Math.max(r, as[i]);
 953             }
 954         }
 955         bh.consume(r);
 956     }
 957 
 958 
 959     @Benchmark
 960     public void anyTrue(Blackhole bh) {
 961         boolean[] ms = fm.apply(size);
 962         boolean r = false;
 963         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 964             r = false;
 965             for (int i = 0; i < ms.length; i++) {
 966                 r |= ms[i];
 967             }
 968         }
 969         bh.consume(r);
 970     }
 971 
 972 
 973 
 974     @Benchmark
 975     public void allTrue(Blackhole bh) {
 976         boolean[] ms = fm.apply(size);
 977         boolean r = true;
 978         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 979             r = true;
 980             for (int i = 0; i < ms.length; i++) {
 981                 r &= ms[i];
 982             }
 983         }
 984         bh.consume(r);
 985     }
 986 
 987 
 988     @Benchmark
 989     public void IS_DEFAULT(Blackhole bh) {
 990         byte[] as = fa.apply(size);
 991         boolean r = true;
 992 
 993         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 994             for (int i = 0; i < as.length; i++) {
 995                 byte a = as[i];
 996                 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation
 997             }
 998         }
 999 
1000         bh.consume(r);
1001     }
1002 
1003     @Benchmark
1004     public void IS_NEGATIVE(Blackhole bh) {
1005         byte[] as = fa.apply(size);
1006         boolean r = true;
1007 
1008         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1009             for (int i = 0; i < as.length; i++) {
1010                 byte a = as[i];
1011                 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation
1012             }
1013         }
1014 
1015         bh.consume(r);
1016     }
1017 
1018 
1019 
1020 
1021     @Benchmark
1022     public void LT(Blackhole bh) {
1023         byte[] as = fa.apply(size);
1024         byte[] bs = fb.apply(size);
1025         boolean r = true;
1026 
1027         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1028             for (int i = 0; i < as.length; i++) {
1029                 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1030             }
1031         }
1032 
1033         bh.consume(r);
1034     }
1035 
1036     @Benchmark
1037     public void GT(Blackhole bh) {
1038         byte[] as = fa.apply(size);
1039         byte[] bs = fb.apply(size);
1040         boolean r = true;
1041 
1042         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1043             for (int i = 0; i < as.length; i++) {
1044                 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1045             }
1046         }
1047 
1048         bh.consume(r);
1049     }
1050 
1051     @Benchmark
1052     public void EQ(Blackhole bh) {
1053         byte[] as = fa.apply(size);
1054         byte[] bs = fb.apply(size);
1055         boolean r = true;
1056 
1057         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1058             for (int i = 0; i < as.length; i++) {
1059                 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1060             }
1061         }
1062 
1063         bh.consume(r);
1064     }
1065 
1066     @Benchmark
1067     public void NE(Blackhole bh) {
1068         byte[] as = fa.apply(size);
1069         byte[] bs = fb.apply(size);
1070         boolean r = true;
1071 
1072         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1073             for (int i = 0; i < as.length; i++) {
1074                 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1075             }
1076         }
1077 
1078         bh.consume(r);
1079     }
1080 
1081     @Benchmark
1082     public void LE(Blackhole bh) {
1083         byte[] as = fa.apply(size);
1084         byte[] bs = fb.apply(size);
1085         boolean r = true;
1086 
1087         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1088             for (int i = 0; i < as.length; i++) {
1089                 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1090             }
1091         }
1092 
1093         bh.consume(r);
1094     }
1095 
1096     @Benchmark
1097     public void GE(Blackhole bh) {
1098         byte[] as = fa.apply(size);
1099         byte[] bs = fb.apply(size);
1100         boolean r = true;
1101 
1102         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1103             for (int i = 0; i < as.length; i++) {
1104                 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1105             }
1106         }
1107 
1108         bh.consume(r);
1109     }
1110 
1111 
1112     @Benchmark
1113     public void UNSIGNED_LT(Blackhole bh) {
1114         byte[] as = fa.apply(size);
1115         byte[] bs = fb.apply(size);
1116         boolean r = true;
1117 
1118         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1119             for (int i = 0; i < as.length; i++) {
1120                 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1121             }
1122         }
1123 
1124         bh.consume(r);
1125     }
1126 
1127 
1128 
1129     @Benchmark
1130     public void UNSIGNED_GT(Blackhole bh) {
1131         byte[] as = fa.apply(size);
1132         byte[] bs = fb.apply(size);
1133         boolean r = true;
1134 
1135         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1136             for (int i = 0; i < as.length; i++) {
1137                 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1138             }
1139         }
1140 
1141         bh.consume(r);
1142     }
1143 
1144 
1145 
1146     @Benchmark
1147     public void UNSIGNED_LE(Blackhole bh) {
1148         byte[] as = fa.apply(size);
1149         byte[] bs = fb.apply(size);
1150         boolean r = true;
1151 
1152         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1153             for (int i = 0; i < as.length; i++) {
1154                 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1155             }
1156         }
1157 
1158         bh.consume(r);
1159     }
1160 
1161 
1162 
1163     @Benchmark
1164     public void UNSIGNED_GE(Blackhole bh) {
1165         byte[] as = fa.apply(size);
1166         byte[] bs = fb.apply(size);
1167         boolean r = true;
1168 
1169         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1170             for (int i = 0; i < as.length; i++) {
1171                 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1172             }
1173         }
1174 
1175         bh.consume(r);
1176     }
1177 
1178 
1179     @Benchmark
1180     public void blend(Blackhole bh) {
1181         byte[] as = fa.apply(size);
1182         byte[] bs = fb.apply(size);
1183         byte[] rs = fr.apply(size);
1184         boolean[] ms = fm.apply(size);
1185 
1186         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1187             for (int i = 0; i < as.length; i++) {
1188                 byte a = as[i];
1189                 byte b = bs[i];
1190                 boolean m = ms[i % ms.length];
1191                 rs[i] = (m ? b : a);
1192             }
1193         }
1194 
1195         bh.consume(rs);
1196     }
1197     void rearrangeShared(int window, Blackhole bh) {
1198         byte[] as = fa.apply(size);
1199         int[] order = fs.apply(size);
1200         byte[] rs = fr.apply(size);
1201 
1202         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1203             for (int i = 0; i < as.length; i += window) {
1204                 for (int j = 0; j < window; j++) {
1205                     byte a = as[i+j];
1206                     int pos = order[j];
1207                     rs[i + pos] = a;
1208                 }
1209             }
1210         }
1211 
1212         bh.consume(rs);
1213     }
1214 
1215     @Benchmark
1216     public void rearrange064(Blackhole bh) {
1217         int window = 64 / Byte.SIZE;
1218         rearrangeShared(window, bh);
1219     }
1220 
1221     @Benchmark
1222     public void rearrange128(Blackhole bh) {
1223         int window = 128 / Byte.SIZE;
1224         rearrangeShared(window, bh);
1225     }
1226 
1227     @Benchmark
1228     public void rearrange256(Blackhole bh) {
1229         int window = 256 / Byte.SIZE;
1230         rearrangeShared(window, bh);
1231     }
1232 
1233     @Benchmark
1234     public void rearrange512(Blackhole bh) {
1235         int window = 512 / Byte.SIZE;
1236         rearrangeShared(window, bh);
1237     }
1238     void broadcastShared(int window, Blackhole bh) {
1239         byte[] as = fa.apply(size);
1240         byte[] rs = fr.apply(size);
1241 
1242         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1243             for (int i = 0; i < as.length; i += window) {
1244                 int idx = i;
1245                 for (int j = 0; j < window; j++) {
1246                     rs[j] = as[idx];
1247                 }
1248             }
1249         }
1250 
1251         bh.consume(rs);
1252     }
1253 
1254     @Benchmark
1255     public void broadcast064(Blackhole bh) {
1256         int window = 64 / Byte.SIZE;
1257         broadcastShared(window, bh);
1258     }
1259 
1260     @Benchmark
1261     public void broadcast128(Blackhole bh) {
1262         int window = 128 / Byte.SIZE;
1263         broadcastShared(window, bh);
1264     }
1265 
1266     @Benchmark
1267     public void broadcast256(Blackhole bh) {
1268         int window = 256 / Byte.SIZE;
1269         broadcastShared(window, bh);
1270     }
1271 
1272     @Benchmark
1273     public void broadcast512(Blackhole bh) {
1274         int window = 512 / Byte.SIZE;
1275         broadcastShared(window, bh);
1276     }
1277 
1278     @Benchmark
1279     public void zero(Blackhole bh) {
1280         byte[] as = fa.apply(size);
1281 
1282         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1283             for (int i = 0; i < as.length; i++) {
1284                 as[i] = (byte)0;
1285             }
1286         }
1287 
1288         bh.consume(as);
1289     }
1290 
1291 
1292 
1293 
1294 
1295 
1296 
1297 
1298 
1299 
1300 
1301 
1302 
1303 
1304 
1305 
1306 
1307 
1308 
1309 
1310 
1311 
1312     @Benchmark
1313     public void BITWISE_BLEND(Blackhole bh) {
1314         byte[] as = fa.apply(size);
1315         byte[] bs = fb.apply(size);
1316         byte[] cs = fc.apply(size);
1317         byte[] rs = fr.apply(size);
1318 
1319         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1320             for (int i = 0; i < as.length; i++) {
1321                 byte a = as[i];
1322                 byte b = bs[i];
1323                 byte c = cs[i];
1324                 rs[i] = (byte)((a&~(c))|(b&c));
1325             }
1326         }
1327 
1328         bh.consume(rs);
1329     }
1330 
1331 
1332 
1333 
1334     @Benchmark
1335     public void BITWISE_BLENDMasked(Blackhole bh) {
1336         byte[] as = fa.apply(size);
1337         byte[] bs = fb.apply(size);
1338         byte[] cs = fc.apply(size);
1339         byte[] rs = fr.apply(size);
1340         boolean[] ms = fm.apply(size);
1341 
1342         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1343             for (int i = 0; i < as.length; i++) {
1344                 byte a = as[i];
1345                 byte b = bs[i];
1346                 byte c = cs[i];
1347                 if (ms[i % ms.length]) {
1348                     rs[i] = (byte)((a&~(c))|(b&c));
1349                 } else {
1350                     rs[i] = a;
1351                 }
1352             }
1353         }
1354         bh.consume(rs);
1355     }
1356 
1357 
1358     @Benchmark
1359     public void NEG(Blackhole bh) {
1360         byte[] as = fa.apply(size);
1361         byte[] rs = fr.apply(size);
1362 
1363         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1364             for (int i = 0; i < as.length; i++) {
1365                 byte a = as[i];
1366                 rs[i] = (byte)(-((byte)a));
1367             }
1368         }
1369 
1370         bh.consume(rs);
1371     }
1372 
1373     @Benchmark
1374     public void NEGMasked(Blackhole bh) {
1375         byte[] as = fa.apply(size);
1376         byte[] rs = fr.apply(size);
1377         boolean[] ms = fm.apply(size);
1378 
1379         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1380             for (int i = 0; i < as.length; i++) {
1381                 byte a = as[i];
1382                 boolean m = ms[i % ms.length];
1383                 rs[i] = (m ? (byte)(-((byte)a)) : a);
1384             }
1385         }
1386 
1387         bh.consume(rs);
1388     }
1389 
1390     @Benchmark
1391     public void ABS(Blackhole bh) {
1392         byte[] as = fa.apply(size);
1393         byte[] rs = fr.apply(size);
1394 
1395         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1396             for (int i = 0; i < as.length; i++) {
1397                 byte a = as[i];
1398                 rs[i] = (byte)(Math.abs((byte)a));
1399             }
1400         }
1401 
1402         bh.consume(rs);
1403     }
1404 
1405     @Benchmark
1406     public void ABSMasked(Blackhole bh) {
1407         byte[] as = fa.apply(size);
1408         byte[] rs = fr.apply(size);
1409         boolean[] ms = fm.apply(size);
1410 
1411         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1412             for (int i = 0; i < as.length; i++) {
1413                 byte a = as[i];
1414                 boolean m = ms[i % ms.length];
1415                 rs[i] = (m ? (byte)(Math.abs((byte)a)) : a);
1416             }
1417         }
1418 
1419         bh.consume(rs);
1420     }
1421 
1422 
1423     @Benchmark
1424     public void NOT(Blackhole bh) {
1425         byte[] as = fa.apply(size);
1426         byte[] rs = fr.apply(size);
1427 
1428         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1429             for (int i = 0; i < as.length; i++) {
1430                 byte a = as[i];
1431                 rs[i] = (byte)(~((byte)a));
1432             }
1433         }
1434 
1435         bh.consume(rs);
1436     }
1437 
1438 
1439 
1440     @Benchmark
1441     public void NOTMasked(Blackhole bh) {
1442         byte[] as = fa.apply(size);
1443         byte[] rs = fr.apply(size);
1444         boolean[] ms = fm.apply(size);
1445 
1446         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1447             for (int i = 0; i < as.length; i++) {
1448                 byte a = as[i];
1449                 boolean m = ms[i % ms.length];
1450                 rs[i] = (m ? (byte)(~((byte)a)) : a);
1451             }
1452         }
1453 
1454         bh.consume(rs);
1455     }
1456 
1457 
1458 
1459     @Benchmark
1460     public void ZOMO(Blackhole bh) {
1461         byte[] as = fa.apply(size);
1462         byte[] rs = fr.apply(size);
1463 
1464         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1465             for (int i = 0; i < as.length; i++) {
1466                 byte a = as[i];
1467                 rs[i] = (byte)((a==0?0:-1));
1468             }
1469         }
1470 
1471         bh.consume(rs);
1472     }
1473 
1474 
1475 
1476     @Benchmark
1477     public void ZOMOMasked(Blackhole bh) {
1478         byte[] as = fa.apply(size);
1479         byte[] rs = fr.apply(size);
1480         boolean[] ms = fm.apply(size);
1481 
1482         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1483             for (int i = 0; i < as.length; i++) {
1484                 byte a = as[i];
1485                 boolean m = ms[i % ms.length];
1486                 rs[i] = (m ? (byte)((a==0?0:-1)) : a);
1487             }
1488         }
1489 
1490         bh.consume(rs);
1491     }
1492 
1493 
1494 
1495     @Benchmark
1496     public void gatherBase0(Blackhole bh) {
1497         byte[] as = fa.apply(size);
1498         int[] is    = fs.apply(size);
1499         byte[] rs = fr.apply(size);
1500 
1501         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1502             for (int i = 0; i < as.length; i++) {
1503                 int ix = 0 + is[i];
1504                 rs[i] = as[ix];
1505             }
1506         }
1507 
1508         bh.consume(rs);
1509     }
1510 
1511 
1512     void gather(int window, Blackhole bh) {
1513         byte[] as = fa.apply(size);
1514         int[] is    = fs.apply(size);
1515         byte[] rs = fr.apply(size);
1516 
1517         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1518             for (int i = 0; i < as.length; i += window) {
1519                 for (int j = 0; j < window; j++) {
1520                     int ix = is[i + j];
1521                     rs[i + j] = as[ix];
1522                 }
1523             }
1524         }
1525 
1526         bh.consume(rs);
1527     }
1528 
1529     @Benchmark
1530     public void gather064(Blackhole bh) {
1531         int window = 64 / Byte.SIZE;
1532         gather(window, bh);
1533     }
1534 
1535     @Benchmark
1536     public void gather128(Blackhole bh) {
1537         int window = 128 / Byte.SIZE;
1538         gather(window, bh);
1539     }
1540 
1541     @Benchmark
1542     public void gather256(Blackhole bh) {
1543         int window = 256 / Byte.SIZE;
1544         gather(window, bh);
1545     }
1546 
1547     @Benchmark
1548     public void gather512(Blackhole bh) {
1549         int window = 512 / Byte.SIZE;
1550         gather(window, bh);
1551     }
1552 
1553     @Benchmark
1554     public void scatterBase0(Blackhole bh) {
1555         byte[] as = fa.apply(size);
1556         int[] is    = fs.apply(size);
1557         byte[] rs = fr.apply(size);
1558 
1559         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1560             for (int i = 0; i < as.length; i++) {
1561                 int ix = 0 + is[i];
1562                 rs[ix] = as[i];
1563             }
1564         }
1565 
1566         bh.consume(rs);
1567     }
1568 
1569     void scatter(int window, Blackhole bh) {
1570         byte[] as = fa.apply(size);
1571         int[] is    = fs.apply(size);
1572         byte[] rs = fr.apply(size);
1573 
1574         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1575             for (int i = 0; i < as.length; i += window) {
1576                 for (int j = 0; j < window; j++) {
1577                     int ix = is[i + j];
1578                     rs[ix] = as[i + j];
1579                 }
1580             }
1581         }
1582 
1583         bh.consume(rs);
1584     }
1585 
1586     @Benchmark
1587     public void scatter064(Blackhole bh) {
1588         int window = 64 / Byte.SIZE;
1589         scatter(window, bh);
1590     }
1591 
1592     @Benchmark
1593     public void scatter128(Blackhole bh) {
1594         int window = 128 / Byte.SIZE;
1595         scatter(window, bh);
1596     }
1597 
1598     @Benchmark
1599     public void scatter256(Blackhole bh) {
1600         int window = 256 / Byte.SIZE;
1601         scatter(window, bh);
1602     }
1603 
1604     @Benchmark
1605     public void scatter512(Blackhole bh) {
1606         int window = 512 / Byte.SIZE;
1607         scatter(window, bh);
1608     }
1609 }
1610