1 /* 2 * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package org.openjdk.bench.jdk.incubator.vector.operation; 25 26 // -- This file was mechanically generated: Do not edit! -- // 27 28 import java.util.concurrent.TimeUnit; 29 import java.util.function.IntFunction; 30 import jdk.incubator.vector.VectorMath; 31 32 import org.openjdk.jmh.annotations.*; 33 import org.openjdk.jmh.infra.Blackhole; 34 35 @BenchmarkMode(Mode.Throughput) 36 @OutputTimeUnit(TimeUnit.MILLISECONDS) 37 @State(Scope.Benchmark) 38 @Warmup(iterations = 3, time = 1) 39 @Measurement(iterations = 5, time = 1) 40 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 41 public class ByteScalar extends AbstractVectorBenchmark { 42 static final int INVOC_COUNT = 1; // To align with vector benchmarks. 43 44 private static final byte CONST_SHIFT = Byte.SIZE / 2; 45 46 @Param("1024") 47 int size; 48 49 byte[] fill(IntFunction<Byte> f) { 50 byte[] array = new byte[size]; 51 for (int i = 0; i < array.length; i++) { 52 array[i] = f.apply(i); 53 } 54 return array; 55 } 56 57 static byte bits(byte e) { 58 return e; 59 } 60 61 byte[] as, bs, cs, rs; 62 boolean[] ms, mt, rms; 63 int[] ss; 64 65 @Setup 66 public void init() { 67 as = fill(i -> (byte)(2*i)); 68 bs = fill(i -> (byte)(i+1)); 69 cs = fill(i -> (byte)(i+5)); 70 rs = fill(i -> (byte)0); 71 ms = fillMask(size, i -> (i % 2) == 0); 72 mt = fillMask(size, i -> true); 73 rms = fillMask(size, i -> false); 74 75 ss = fillInt(size, i -> RAND.nextInt(Math.max(i,1))); 76 } 77 78 final IntFunction<byte[]> fa = vl -> as; 79 final IntFunction<byte[]> fb = vl -> bs; 80 final IntFunction<byte[]> fc = vl -> cs; 81 final IntFunction<byte[]> fr = vl -> rs; 82 final IntFunction<boolean[]> fm = vl -> ms; 83 final IntFunction<boolean[]> fmt = vl -> mt; 84 final IntFunction<boolean[]> fmr = vl -> rms; 85 final IntFunction<int[]> fs = vl -> ss; 86 87 static boolean eq(byte a, byte b) { 88 return a == b; 89 } 90 91 static boolean neq(byte a, byte b) { 92 return a != b; 93 } 94 95 static boolean lt(byte a, byte b) { 96 return a < b; 97 } 98 99 static boolean le(byte a, byte b) { 100 return a <= b; 101 } 102 103 static boolean gt(byte a, byte b) { 104 return a > b; 105 } 106 107 static boolean ge(byte a, byte b) { 108 return a >= b; 109 } 110 111 static boolean ult(byte a, byte b) { 112 return Byte.compareUnsigned(a, b) < 0; 113 } 114 115 static boolean ule(byte a, byte b) { 116 return Byte.compareUnsigned(a, b) <= 0; 117 } 118 119 static boolean ugt(byte a, byte b) { 120 return Byte.compareUnsigned(a, b) > 0; 121 } 122 123 static boolean uge(byte a, byte b) { 124 return Byte.compareUnsigned(a, b) >= 0; 125 } 126 127 static byte ROL_scalar(byte a, byte b) { 128 return (byte)(((((byte)a) & 0xFF) << (b & 7)) | ((((byte)a) & 0xFF) >>> (8 - (b & 7)))); 129 } 130 131 static byte ROR_scalar(byte a, byte b) { 132 return (byte)(((((byte)a) & 0xFF) >>> (b & 7)) | ((((byte)a) & 0xFF) << (8 - (b & 7)))); 133 } 134 135 static byte TRAILING_ZEROS_COUNT_scalar(byte a) { 136 return (byte) (a != 0 ? Integer.numberOfTrailingZeros(a) : 8); 137 } 138 139 static byte LEADING_ZEROS_COUNT_scalar(byte a) { 140 return (byte) (a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0); 141 } 142 143 static byte REVERSE_scalar(byte a) { 144 byte b = ROL_scalar(a, (byte) 4); 145 b = (byte)(((b & 0x55) << 1) | ((b & 0xAA) >>> 1)); 146 b = (byte)(((b & 0x33) << 2) | ((b & 0xCC) >>> 2)); 147 return b; 148 } 149 150 @Benchmark 151 public void ADD(Blackhole bh) { 152 byte[] as = fa.apply(size); 153 byte[] bs = fb.apply(size); 154 byte[] rs = fr.apply(size); 155 156 for (int ic = 0; ic < INVOC_COUNT; ic++) { 157 for (int i = 0; i < as.length; i++) { 158 byte a = as[i]; 159 byte b = bs[i]; 160 rs[i] = (byte)(a + b); 161 } 162 } 163 164 bh.consume(rs); 165 } 166 167 @Benchmark 168 public void ADDMasked(Blackhole bh) { 169 byte[] as = fa.apply(size); 170 byte[] bs = fb.apply(size); 171 byte[] rs = fr.apply(size); 172 boolean[] ms = fm.apply(size); 173 174 for (int ic = 0; ic < INVOC_COUNT; ic++) { 175 for (int i = 0; i < as.length; i++) { 176 byte a = as[i]; 177 byte b = bs[i]; 178 if (ms[i % ms.length]) { 179 rs[i] = (byte)(a + b); 180 } else { 181 rs[i] = a; 182 } 183 } 184 } 185 bh.consume(rs); 186 } 187 188 @Benchmark 189 public void SUB(Blackhole bh) { 190 byte[] as = fa.apply(size); 191 byte[] bs = fb.apply(size); 192 byte[] rs = fr.apply(size); 193 194 for (int ic = 0; ic < INVOC_COUNT; ic++) { 195 for (int i = 0; i < as.length; i++) { 196 byte a = as[i]; 197 byte b = bs[i]; 198 rs[i] = (byte)(a - b); 199 } 200 } 201 202 bh.consume(rs); 203 } 204 205 @Benchmark 206 public void SUBMasked(Blackhole bh) { 207 byte[] as = fa.apply(size); 208 byte[] bs = fb.apply(size); 209 byte[] rs = fr.apply(size); 210 boolean[] ms = fm.apply(size); 211 212 for (int ic = 0; ic < INVOC_COUNT; ic++) { 213 for (int i = 0; i < as.length; i++) { 214 byte a = as[i]; 215 byte b = bs[i]; 216 if (ms[i % ms.length]) { 217 rs[i] = (byte)(a - b); 218 } else { 219 rs[i] = a; 220 } 221 } 222 } 223 bh.consume(rs); 224 } 225 226 @Benchmark 227 public void MUL(Blackhole bh) { 228 byte[] as = fa.apply(size); 229 byte[] bs = fb.apply(size); 230 byte[] rs = fr.apply(size); 231 232 for (int ic = 0; ic < INVOC_COUNT; ic++) { 233 for (int i = 0; i < as.length; i++) { 234 byte a = as[i]; 235 byte b = bs[i]; 236 rs[i] = (byte)(a * b); 237 } 238 } 239 240 bh.consume(rs); 241 } 242 243 @Benchmark 244 public void MULMasked(Blackhole bh) { 245 byte[] as = fa.apply(size); 246 byte[] bs = fb.apply(size); 247 byte[] rs = fr.apply(size); 248 boolean[] ms = fm.apply(size); 249 250 for (int ic = 0; ic < INVOC_COUNT; ic++) { 251 for (int i = 0; i < as.length; i++) { 252 byte a = as[i]; 253 byte b = bs[i]; 254 if (ms[i % ms.length]) { 255 rs[i] = (byte)(a * b); 256 } else { 257 rs[i] = a; 258 } 259 } 260 } 261 bh.consume(rs); 262 } 263 264 @Benchmark 265 public void FIRST_NONZERO(Blackhole bh) { 266 byte[] as = fa.apply(size); 267 byte[] bs = fb.apply(size); 268 byte[] rs = fr.apply(size); 269 270 for (int ic = 0; ic < INVOC_COUNT; ic++) { 271 for (int i = 0; i < as.length; i++) { 272 byte a = as[i]; 273 byte b = bs[i]; 274 rs[i] = (byte)((a)!=0?a:b); 275 } 276 } 277 278 bh.consume(rs); 279 } 280 281 @Benchmark 282 public void FIRST_NONZEROMasked(Blackhole bh) { 283 byte[] as = fa.apply(size); 284 byte[] bs = fb.apply(size); 285 byte[] rs = fr.apply(size); 286 boolean[] ms = fm.apply(size); 287 288 for (int ic = 0; ic < INVOC_COUNT; ic++) { 289 for (int i = 0; i < as.length; i++) { 290 byte a = as[i]; 291 byte b = bs[i]; 292 if (ms[i % ms.length]) { 293 rs[i] = (byte)((a)!=0?a:b); 294 } else { 295 rs[i] = a; 296 } 297 } 298 } 299 bh.consume(rs); 300 } 301 302 @Benchmark 303 public void AND(Blackhole bh) { 304 byte[] as = fa.apply(size); 305 byte[] bs = fb.apply(size); 306 byte[] rs = fr.apply(size); 307 308 for (int ic = 0; ic < INVOC_COUNT; ic++) { 309 for (int i = 0; i < as.length; i++) { 310 byte a = as[i]; 311 byte b = bs[i]; 312 rs[i] = (byte)(a & b); 313 } 314 } 315 316 bh.consume(rs); 317 } 318 319 @Benchmark 320 public void ANDMasked(Blackhole bh) { 321 byte[] as = fa.apply(size); 322 byte[] bs = fb.apply(size); 323 byte[] rs = fr.apply(size); 324 boolean[] ms = fm.apply(size); 325 326 for (int ic = 0; ic < INVOC_COUNT; ic++) { 327 for (int i = 0; i < as.length; i++) { 328 byte a = as[i]; 329 byte b = bs[i]; 330 if (ms[i % ms.length]) { 331 rs[i] = (byte)(a & b); 332 } else { 333 rs[i] = a; 334 } 335 } 336 } 337 bh.consume(rs); 338 } 339 340 @Benchmark 341 public void AND_NOT(Blackhole bh) { 342 byte[] as = fa.apply(size); 343 byte[] bs = fb.apply(size); 344 byte[] rs = fr.apply(size); 345 346 for (int ic = 0; ic < INVOC_COUNT; ic++) { 347 for (int i = 0; i < as.length; i++) { 348 byte a = as[i]; 349 byte b = bs[i]; 350 rs[i] = (byte)(a & ~b); 351 } 352 } 353 354 bh.consume(rs); 355 } 356 357 @Benchmark 358 public void AND_NOTMasked(Blackhole bh) { 359 byte[] as = fa.apply(size); 360 byte[] bs = fb.apply(size); 361 byte[] rs = fr.apply(size); 362 boolean[] ms = fm.apply(size); 363 364 for (int ic = 0; ic < INVOC_COUNT; ic++) { 365 for (int i = 0; i < as.length; i++) { 366 byte a = as[i]; 367 byte b = bs[i]; 368 if (ms[i % ms.length]) { 369 rs[i] = (byte)(a & ~b); 370 } else { 371 rs[i] = a; 372 } 373 } 374 } 375 bh.consume(rs); 376 } 377 378 @Benchmark 379 public void OR(Blackhole bh) { 380 byte[] as = fa.apply(size); 381 byte[] bs = fb.apply(size); 382 byte[] rs = fr.apply(size); 383 384 for (int ic = 0; ic < INVOC_COUNT; ic++) { 385 for (int i = 0; i < as.length; i++) { 386 byte a = as[i]; 387 byte b = bs[i]; 388 rs[i] = (byte)(a | b); 389 } 390 } 391 392 bh.consume(rs); 393 } 394 395 @Benchmark 396 public void ORMasked(Blackhole bh) { 397 byte[] as = fa.apply(size); 398 byte[] bs = fb.apply(size); 399 byte[] rs = fr.apply(size); 400 boolean[] ms = fm.apply(size); 401 402 for (int ic = 0; ic < INVOC_COUNT; ic++) { 403 for (int i = 0; i < as.length; i++) { 404 byte a = as[i]; 405 byte b = bs[i]; 406 if (ms[i % ms.length]) { 407 rs[i] = (byte)(a | b); 408 } else { 409 rs[i] = a; 410 } 411 } 412 } 413 bh.consume(rs); 414 } 415 416 @Benchmark 417 public void XOR(Blackhole bh) { 418 byte[] as = fa.apply(size); 419 byte[] bs = fb.apply(size); 420 byte[] rs = fr.apply(size); 421 422 for (int ic = 0; ic < INVOC_COUNT; ic++) { 423 for (int i = 0; i < as.length; i++) { 424 byte a = as[i]; 425 byte b = bs[i]; 426 rs[i] = (byte)(a ^ b); 427 } 428 } 429 430 bh.consume(rs); 431 } 432 433 @Benchmark 434 public void XORMasked(Blackhole bh) { 435 byte[] as = fa.apply(size); 436 byte[] bs = fb.apply(size); 437 byte[] rs = fr.apply(size); 438 boolean[] ms = fm.apply(size); 439 440 for (int ic = 0; ic < INVOC_COUNT; ic++) { 441 for (int i = 0; i < as.length; i++) { 442 byte a = as[i]; 443 byte b = bs[i]; 444 if (ms[i % ms.length]) { 445 rs[i] = (byte)(a ^ b); 446 } else { 447 rs[i] = a; 448 } 449 } 450 } 451 bh.consume(rs); 452 } 453 454 @Benchmark 455 public void LSHL(Blackhole bh) { 456 byte[] as = fa.apply(size); 457 byte[] bs = fb.apply(size); 458 byte[] rs = fr.apply(size); 459 460 for (int ic = 0; ic < INVOC_COUNT; ic++) { 461 for (int i = 0; i < as.length; i++) { 462 byte a = as[i]; 463 byte b = bs[i]; 464 rs[i] = (byte)((a << (b & 0x7))); 465 } 466 } 467 468 bh.consume(rs); 469 } 470 471 @Benchmark 472 public void LSHLMasked(Blackhole bh) { 473 byte[] as = fa.apply(size); 474 byte[] bs = fb.apply(size); 475 byte[] rs = fr.apply(size); 476 boolean[] ms = fm.apply(size); 477 478 for (int ic = 0; ic < INVOC_COUNT; ic++) { 479 for (int i = 0; i < as.length; i++) { 480 byte a = as[i]; 481 byte b = bs[i]; 482 if (ms[i % ms.length]) { 483 rs[i] = (byte)((a << (b & 0x7))); 484 } else { 485 rs[i] = a; 486 } 487 } 488 } 489 bh.consume(rs); 490 } 491 492 @Benchmark 493 public void ASHR(Blackhole bh) { 494 byte[] as = fa.apply(size); 495 byte[] bs = fb.apply(size); 496 byte[] rs = fr.apply(size); 497 498 for (int ic = 0; ic < INVOC_COUNT; ic++) { 499 for (int i = 0; i < as.length; i++) { 500 byte a = as[i]; 501 byte b = bs[i]; 502 rs[i] = (byte)((a >> (b & 0x7))); 503 } 504 } 505 506 bh.consume(rs); 507 } 508 509 @Benchmark 510 public void ASHRMasked(Blackhole bh) { 511 byte[] as = fa.apply(size); 512 byte[] bs = fb.apply(size); 513 byte[] rs = fr.apply(size); 514 boolean[] ms = fm.apply(size); 515 516 for (int ic = 0; ic < INVOC_COUNT; ic++) { 517 for (int i = 0; i < as.length; i++) { 518 byte a = as[i]; 519 byte b = bs[i]; 520 if (ms[i % ms.length]) { 521 rs[i] = (byte)((a >> (b & 0x7))); 522 } else { 523 rs[i] = a; 524 } 525 } 526 } 527 bh.consume(rs); 528 } 529 530 @Benchmark 531 public void LSHR(Blackhole bh) { 532 byte[] as = fa.apply(size); 533 byte[] bs = fb.apply(size); 534 byte[] rs = fr.apply(size); 535 536 for (int ic = 0; ic < INVOC_COUNT; ic++) { 537 for (int i = 0; i < as.length; i++) { 538 byte a = as[i]; 539 byte b = bs[i]; 540 rs[i] = (byte)(((a & 0xFF) >>> (b & 0x7))); 541 } 542 } 543 544 bh.consume(rs); 545 } 546 547 @Benchmark 548 public void LSHRMasked(Blackhole bh) { 549 byte[] as = fa.apply(size); 550 byte[] bs = fb.apply(size); 551 byte[] rs = fr.apply(size); 552 boolean[] ms = fm.apply(size); 553 554 for (int ic = 0; ic < INVOC_COUNT; ic++) { 555 for (int i = 0; i < as.length; i++) { 556 byte a = as[i]; 557 byte b = bs[i]; 558 if (ms[i % ms.length]) { 559 rs[i] = (byte)(((a & 0xFF) >>> (b & 0x7))); 560 } else { 561 rs[i] = a; 562 } 563 } 564 } 565 bh.consume(rs); 566 } 567 568 @Benchmark 569 public void LSHLShift(Blackhole bh) { 570 byte[] as = fa.apply(size); 571 byte[] bs = fb.apply(size); 572 byte[] rs = fr.apply(size); 573 574 for (int ic = 0; ic < INVOC_COUNT; ic++) { 575 for (int i = 0; i < as.length; i++) { 576 byte a = as[i]; 577 byte b = bs[i]; 578 rs[i] = (byte)((a << (b & 7))); 579 } 580 } 581 582 bh.consume(rs); 583 } 584 585 @Benchmark 586 public void LSHLMaskedShift(Blackhole bh) { 587 byte[] as = fa.apply(size); 588 byte[] bs = fb.apply(size); 589 byte[] rs = fr.apply(size); 590 boolean[] ms = fm.apply(size); 591 592 for (int ic = 0; ic < INVOC_COUNT; ic++) { 593 for (int i = 0; i < as.length; i++) { 594 byte a = as[i]; 595 byte b = bs[i]; 596 boolean m = ms[i % ms.length]; 597 rs[i] = (m ? (byte)((a << (b & 7))) : a); 598 } 599 } 600 601 bh.consume(rs); 602 } 603 604 @Benchmark 605 public void LSHRShift(Blackhole bh) { 606 byte[] as = fa.apply(size); 607 byte[] bs = fb.apply(size); 608 byte[] rs = fr.apply(size); 609 610 for (int ic = 0; ic < INVOC_COUNT; ic++) { 611 for (int i = 0; i < as.length; i++) { 612 byte a = as[i]; 613 byte b = bs[i]; 614 rs[i] = (byte)(((a & 0xFF) >>> (b & 7))); 615 } 616 } 617 618 bh.consume(rs); 619 } 620 621 @Benchmark 622 public void LSHRMaskedShift(Blackhole bh) { 623 byte[] as = fa.apply(size); 624 byte[] bs = fb.apply(size); 625 byte[] rs = fr.apply(size); 626 boolean[] ms = fm.apply(size); 627 628 for (int ic = 0; ic < INVOC_COUNT; ic++) { 629 for (int i = 0; i < as.length; i++) { 630 byte a = as[i]; 631 byte b = bs[i]; 632 boolean m = ms[i % ms.length]; 633 rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a); 634 } 635 } 636 637 bh.consume(rs); 638 } 639 640 @Benchmark 641 public void ASHRShift(Blackhole bh) { 642 byte[] as = fa.apply(size); 643 byte[] bs = fb.apply(size); 644 byte[] rs = fr.apply(size); 645 646 for (int ic = 0; ic < INVOC_COUNT; ic++) { 647 for (int i = 0; i < as.length; i++) { 648 byte a = as[i]; 649 byte b = bs[i]; 650 rs[i] = (byte)((a >> (b & 7))); 651 } 652 } 653 654 bh.consume(rs); 655 } 656 657 @Benchmark 658 public void ASHRMaskedShift(Blackhole bh) { 659 byte[] as = fa.apply(size); 660 byte[] bs = fb.apply(size); 661 byte[] rs = fr.apply(size); 662 boolean[] ms = fm.apply(size); 663 664 for (int ic = 0; ic < INVOC_COUNT; ic++) { 665 for (int i = 0; i < as.length; i++) { 666 byte a = as[i]; 667 byte b = bs[i]; 668 boolean m = ms[i % ms.length]; 669 rs[i] = (m ? (byte)((a >> (b & 7))) : a); 670 } 671 } 672 673 bh.consume(rs); 674 } 675 676 @Benchmark 677 public void ROR(Blackhole bh) { 678 byte[] as = fa.apply(size); 679 byte[] bs = fb.apply(size); 680 byte[] rs = fr.apply(size); 681 682 for (int ic = 0; ic < INVOC_COUNT; ic++) { 683 for (int i = 0; i < as.length; i++) { 684 byte a = as[i]; 685 byte b = bs[i]; 686 rs[i] = (byte)(ROR_scalar(a,b)); 687 } 688 } 689 690 bh.consume(rs); 691 } 692 693 @Benchmark 694 public void RORMasked(Blackhole bh) { 695 byte[] as = fa.apply(size); 696 byte[] bs = fb.apply(size); 697 byte[] rs = fr.apply(size); 698 boolean[] ms = fm.apply(size); 699 700 for (int ic = 0; ic < INVOC_COUNT; ic++) { 701 for (int i = 0; i < as.length; i++) { 702 byte a = as[i]; 703 byte b = bs[i]; 704 if (ms[i % ms.length]) { 705 rs[i] = (byte)(ROR_scalar(a,b)); 706 } else { 707 rs[i] = a; 708 } 709 } 710 } 711 bh.consume(rs); 712 } 713 714 @Benchmark 715 public void ROL(Blackhole bh) { 716 byte[] as = fa.apply(size); 717 byte[] bs = fb.apply(size); 718 byte[] rs = fr.apply(size); 719 720 for (int ic = 0; ic < INVOC_COUNT; ic++) { 721 for (int i = 0; i < as.length; i++) { 722 byte a = as[i]; 723 byte b = bs[i]; 724 rs[i] = (byte)(ROL_scalar(a,b)); 725 } 726 } 727 728 bh.consume(rs); 729 } 730 731 @Benchmark 732 public void ROLMasked(Blackhole bh) { 733 byte[] as = fa.apply(size); 734 byte[] bs = fb.apply(size); 735 byte[] rs = fr.apply(size); 736 boolean[] ms = fm.apply(size); 737 738 for (int ic = 0; ic < INVOC_COUNT; ic++) { 739 for (int i = 0; i < as.length; i++) { 740 byte a = as[i]; 741 byte b = bs[i]; 742 if (ms[i % ms.length]) { 743 rs[i] = (byte)(ROL_scalar(a,b)); 744 } else { 745 rs[i] = a; 746 } 747 } 748 } 749 bh.consume(rs); 750 } 751 752 @Benchmark 753 public void RORShift(Blackhole bh) { 754 byte[] as = fa.apply(size); 755 byte[] bs = fb.apply(size); 756 byte[] rs = fr.apply(size); 757 758 for (int ic = 0; ic < INVOC_COUNT; ic++) { 759 for (int i = 0; i < as.length; i++) { 760 byte a = as[i]; 761 byte b = bs[i]; 762 rs[i] = (byte)(ROR_scalar(a, b)); 763 } 764 } 765 766 bh.consume(rs); 767 } 768 769 @Benchmark 770 public void RORMaskedShift(Blackhole bh) { 771 byte[] as = fa.apply(size); 772 byte[] bs = fb.apply(size); 773 byte[] rs = fr.apply(size); 774 boolean[] ms = fm.apply(size); 775 776 for (int ic = 0; ic < INVOC_COUNT; ic++) { 777 for (int i = 0; i < as.length; i++) { 778 byte a = as[i]; 779 byte b = bs[i]; 780 boolean m = ms[i % ms.length]; 781 rs[i] = (m ? (byte)(ROR_scalar(a, b)) : a); 782 } 783 } 784 785 bh.consume(rs); 786 } 787 788 @Benchmark 789 public void ROLShift(Blackhole bh) { 790 byte[] as = fa.apply(size); 791 byte[] bs = fb.apply(size); 792 byte[] rs = fr.apply(size); 793 794 for (int ic = 0; ic < INVOC_COUNT; ic++) { 795 for (int i = 0; i < as.length; i++) { 796 byte a = as[i]; 797 byte b = bs[i]; 798 rs[i] = (byte)(ROL_scalar(a, b)); 799 } 800 } 801 802 bh.consume(rs); 803 } 804 805 @Benchmark 806 public void ROLMaskedShift(Blackhole bh) { 807 byte[] as = fa.apply(size); 808 byte[] bs = fb.apply(size); 809 byte[] rs = fr.apply(size); 810 boolean[] ms = fm.apply(size); 811 812 for (int ic = 0; ic < INVOC_COUNT; ic++) { 813 for (int i = 0; i < as.length; i++) { 814 byte a = as[i]; 815 byte b = bs[i]; 816 boolean m = ms[i % ms.length]; 817 rs[i] = (m ? (byte)(ROL_scalar(a, b)) : a); 818 } 819 } 820 821 bh.consume(rs); 822 } 823 824 @Benchmark 825 public void LSHRShiftConst(Blackhole bh) { 826 byte[] as = fa.apply(size); 827 byte[] bs = fb.apply(size); 828 byte[] rs = fr.apply(size); 829 830 for (int ic = 0; ic < INVOC_COUNT; ic++) { 831 for (int i = 0; i < as.length; i++) { 832 byte a = as[i]; 833 byte b = bs[i]; 834 rs[i] = (byte)(((a & 0xFF) >>> CONST_SHIFT)); 835 } 836 } 837 838 bh.consume(rs); 839 } 840 841 @Benchmark 842 public void LSHRMaskedShiftConst(Blackhole bh) { 843 byte[] as = fa.apply(size); 844 byte[] bs = fb.apply(size); 845 byte[] rs = fr.apply(size); 846 boolean[] ms = fm.apply(size); 847 848 for (int ic = 0; ic < INVOC_COUNT; ic++) { 849 for (int i = 0; i < as.length; i++) { 850 byte a = as[i]; 851 byte b = bs[i]; 852 boolean m = ms[i % ms.length]; 853 rs[i] = (m ? (byte)(((a & 0xFF) >>> CONST_SHIFT)) : a); 854 } 855 } 856 857 bh.consume(rs); 858 } 859 860 @Benchmark 861 public void LSHLShiftConst(Blackhole bh) { 862 byte[] as = fa.apply(size); 863 byte[] bs = fb.apply(size); 864 byte[] rs = fr.apply(size); 865 866 for (int ic = 0; ic < INVOC_COUNT; ic++) { 867 for (int i = 0; i < as.length; i++) { 868 byte a = as[i]; 869 byte b = bs[i]; 870 rs[i] = (byte)((a << CONST_SHIFT)); 871 } 872 } 873 874 bh.consume(rs); 875 } 876 877 @Benchmark 878 public void LSHLMaskedShiftConst(Blackhole bh) { 879 byte[] as = fa.apply(size); 880 byte[] bs = fb.apply(size); 881 byte[] rs = fr.apply(size); 882 boolean[] ms = fm.apply(size); 883 884 for (int ic = 0; ic < INVOC_COUNT; ic++) { 885 for (int i = 0; i < as.length; i++) { 886 byte a = as[i]; 887 byte b = bs[i]; 888 boolean m = ms[i % ms.length]; 889 rs[i] = (m ? (byte)((a << CONST_SHIFT)) : a); 890 } 891 } 892 893 bh.consume(rs); 894 } 895 896 @Benchmark 897 public void ASHRShiftConst(Blackhole bh) { 898 byte[] as = fa.apply(size); 899 byte[] bs = fb.apply(size); 900 byte[] rs = fr.apply(size); 901 902 for (int ic = 0; ic < INVOC_COUNT; ic++) { 903 for (int i = 0; i < as.length; i++) { 904 byte a = as[i]; 905 byte b = bs[i]; 906 rs[i] = (byte)((a >> CONST_SHIFT)); 907 } 908 } 909 910 bh.consume(rs); 911 } 912 913 @Benchmark 914 public void ASHRMaskedShiftConst(Blackhole bh) { 915 byte[] as = fa.apply(size); 916 byte[] bs = fb.apply(size); 917 byte[] rs = fr.apply(size); 918 boolean[] ms = fm.apply(size); 919 920 for (int ic = 0; ic < INVOC_COUNT; ic++) { 921 for (int i = 0; i < as.length; i++) { 922 byte a = as[i]; 923 byte b = bs[i]; 924 boolean m = ms[i % ms.length]; 925 rs[i] = (m ? (byte)((a >> CONST_SHIFT)) : a); 926 } 927 } 928 929 bh.consume(rs); 930 } 931 932 @Benchmark 933 public void RORShiftConst(Blackhole bh) { 934 byte[] as = fa.apply(size); 935 byte[] bs = fb.apply(size); 936 byte[] rs = fr.apply(size); 937 938 for (int ic = 0; ic < INVOC_COUNT; ic++) { 939 for (int i = 0; i < as.length; i++) { 940 byte a = as[i]; 941 byte b = bs[i]; 942 rs[i] = (byte)(ROR_scalar(a, CONST_SHIFT)); 943 } 944 } 945 946 bh.consume(rs); 947 } 948 949 @Benchmark 950 public void RORMaskedShiftConst(Blackhole bh) { 951 byte[] as = fa.apply(size); 952 byte[] bs = fb.apply(size); 953 byte[] rs = fr.apply(size); 954 boolean[] ms = fm.apply(size); 955 956 for (int ic = 0; ic < INVOC_COUNT; ic++) { 957 for (int i = 0; i < as.length; i++) { 958 byte a = as[i]; 959 byte b = bs[i]; 960 boolean m = ms[i % ms.length]; 961 rs[i] = (m ? (byte)(ROR_scalar(a, CONST_SHIFT)) : a); 962 } 963 } 964 965 bh.consume(rs); 966 } 967 968 @Benchmark 969 public void ROLShiftConst(Blackhole bh) { 970 byte[] as = fa.apply(size); 971 byte[] bs = fb.apply(size); 972 byte[] rs = fr.apply(size); 973 974 for (int ic = 0; ic < INVOC_COUNT; ic++) { 975 for (int i = 0; i < as.length; i++) { 976 byte a = as[i]; 977 byte b = bs[i]; 978 rs[i] = (byte)(ROL_scalar(a, CONST_SHIFT)); 979 } 980 } 981 982 bh.consume(rs); 983 } 984 985 @Benchmark 986 public void ROLMaskedShiftConst(Blackhole bh) { 987 byte[] as = fa.apply(size); 988 byte[] bs = fb.apply(size); 989 byte[] rs = fr.apply(size); 990 boolean[] ms = fm.apply(size); 991 992 for (int ic = 0; ic < INVOC_COUNT; ic++) { 993 for (int i = 0; i < as.length; i++) { 994 byte a = as[i]; 995 byte b = bs[i]; 996 boolean m = ms[i % ms.length]; 997 rs[i] = (m ? (byte)(ROL_scalar(a, CONST_SHIFT)) : a); 998 } 999 } 1000 1001 bh.consume(rs); 1002 } 1003 1004 @Benchmark 1005 public void MIN(Blackhole bh) { 1006 byte[] as = fa.apply(size); 1007 byte[] bs = fb.apply(size); 1008 byte[] rs = fr.apply(size); 1009 1010 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1011 for (int i = 0; i < as.length; i++) { 1012 byte a = as[i]; 1013 byte b = bs[i]; 1014 rs[i] = (byte)(Math.min(a, b)); 1015 } 1016 } 1017 1018 bh.consume(rs); 1019 } 1020 1021 @Benchmark 1022 public void MAX(Blackhole bh) { 1023 byte[] as = fa.apply(size); 1024 byte[] bs = fb.apply(size); 1025 byte[] rs = fr.apply(size); 1026 1027 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1028 for (int i = 0; i < as.length; i++) { 1029 byte a = as[i]; 1030 byte b = bs[i]; 1031 rs[i] = (byte)(Math.max(a, b)); 1032 } 1033 } 1034 1035 bh.consume(rs); 1036 } 1037 1038 @Benchmark 1039 public void UMIN(Blackhole bh) { 1040 byte[] as = fa.apply(size); 1041 byte[] bs = fb.apply(size); 1042 byte[] rs = fr.apply(size); 1043 1044 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1045 for (int i = 0; i < as.length; i++) { 1046 byte a = as[i]; 1047 byte b = bs[i]; 1048 rs[i] = (byte)(VectorMath.minUnsigned(a, b)); 1049 } 1050 } 1051 1052 bh.consume(rs); 1053 } 1054 1055 @Benchmark 1056 public void UMINMasked(Blackhole bh) { 1057 byte[] as = fa.apply(size); 1058 byte[] bs = fb.apply(size); 1059 byte[] rs = fr.apply(size); 1060 boolean[] ms = fm.apply(size); 1061 1062 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1063 for (int i = 0; i < as.length; i++) { 1064 byte a = as[i]; 1065 byte b = bs[i]; 1066 if (ms[i % ms.length]) { 1067 rs[i] = (byte)(VectorMath.minUnsigned(a, b)); 1068 } else { 1069 rs[i] = a; 1070 } 1071 } 1072 } 1073 bh.consume(rs); 1074 } 1075 1076 @Benchmark 1077 public void UMAX(Blackhole bh) { 1078 byte[] as = fa.apply(size); 1079 byte[] bs = fb.apply(size); 1080 byte[] rs = fr.apply(size); 1081 1082 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1083 for (int i = 0; i < as.length; i++) { 1084 byte a = as[i]; 1085 byte b = bs[i]; 1086 rs[i] = (byte)(VectorMath.maxUnsigned(a, b)); 1087 } 1088 } 1089 1090 bh.consume(rs); 1091 } 1092 1093 @Benchmark 1094 public void UMAXMasked(Blackhole bh) { 1095 byte[] as = fa.apply(size); 1096 byte[] bs = fb.apply(size); 1097 byte[] rs = fr.apply(size); 1098 boolean[] ms = fm.apply(size); 1099 1100 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1101 for (int i = 0; i < as.length; i++) { 1102 byte a = as[i]; 1103 byte b = bs[i]; 1104 if (ms[i % ms.length]) { 1105 rs[i] = (byte)(VectorMath.maxUnsigned(a, b)); 1106 } else { 1107 rs[i] = a; 1108 } 1109 } 1110 } 1111 bh.consume(rs); 1112 } 1113 1114 @Benchmark 1115 public void ANDLanes(Blackhole bh) { 1116 byte[] as = fa.apply(size); 1117 byte r = -1; 1118 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1119 r = -1; 1120 for (int i = 0; i < as.length; i++) { 1121 r &= as[i]; 1122 } 1123 } 1124 bh.consume(r); 1125 } 1126 1127 @Benchmark 1128 public void ANDMaskedLanes(Blackhole bh) { 1129 byte[] as = fa.apply(size); 1130 boolean[] ms = fm.apply(size); 1131 byte r = -1; 1132 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1133 r = -1; 1134 for (int i = 0; i < as.length; i++) { 1135 if (ms[i % ms.length]) 1136 r &= as[i]; 1137 } 1138 } 1139 bh.consume(r); 1140 } 1141 1142 @Benchmark 1143 public void ORLanes(Blackhole bh) { 1144 byte[] as = fa.apply(size); 1145 byte r = 0; 1146 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1147 r = 0; 1148 for (int i = 0; i < as.length; i++) { 1149 r |= as[i]; 1150 } 1151 } 1152 bh.consume(r); 1153 } 1154 1155 @Benchmark 1156 public void ORMaskedLanes(Blackhole bh) { 1157 byte[] as = fa.apply(size); 1158 boolean[] ms = fm.apply(size); 1159 byte r = 0; 1160 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1161 r = 0; 1162 for (int i = 0; i < as.length; i++) { 1163 if (ms[i % ms.length]) 1164 r |= as[i]; 1165 } 1166 } 1167 bh.consume(r); 1168 } 1169 1170 @Benchmark 1171 public void XORLanes(Blackhole bh) { 1172 byte[] as = fa.apply(size); 1173 byte r = 0; 1174 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1175 r = 0; 1176 for (int i = 0; i < as.length; i++) { 1177 r ^= as[i]; 1178 } 1179 } 1180 bh.consume(r); 1181 } 1182 1183 @Benchmark 1184 public void XORMaskedLanes(Blackhole bh) { 1185 byte[] as = fa.apply(size); 1186 boolean[] ms = fm.apply(size); 1187 byte r = 0; 1188 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1189 r = 0; 1190 for (int i = 0; i < as.length; i++) { 1191 if (ms[i % ms.length]) 1192 r ^= as[i]; 1193 } 1194 } 1195 bh.consume(r); 1196 } 1197 1198 @Benchmark 1199 public void ADDLanes(Blackhole bh) { 1200 byte[] as = fa.apply(size); 1201 byte r = 0; 1202 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1203 r = 0; 1204 for (int i = 0; i < as.length; i++) { 1205 r += as[i]; 1206 } 1207 } 1208 bh.consume(r); 1209 } 1210 1211 @Benchmark 1212 public void ADDMaskedLanes(Blackhole bh) { 1213 byte[] as = fa.apply(size); 1214 boolean[] ms = fm.apply(size); 1215 byte r = 0; 1216 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1217 r = 0; 1218 for (int i = 0; i < as.length; i++) { 1219 if (ms[i % ms.length]) 1220 r += as[i]; 1221 } 1222 } 1223 bh.consume(r); 1224 } 1225 1226 @Benchmark 1227 public void MULLanes(Blackhole bh) { 1228 byte[] as = fa.apply(size); 1229 byte r = 1; 1230 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1231 r = 1; 1232 for (int i = 0; i < as.length; i++) { 1233 r *= as[i]; 1234 } 1235 } 1236 bh.consume(r); 1237 } 1238 1239 @Benchmark 1240 public void MULMaskedLanes(Blackhole bh) { 1241 byte[] as = fa.apply(size); 1242 boolean[] ms = fm.apply(size); 1243 byte r = 1; 1244 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1245 r = 1; 1246 for (int i = 0; i < as.length; i++) { 1247 if (ms[i % ms.length]) 1248 r *= as[i]; 1249 } 1250 } 1251 bh.consume(r); 1252 } 1253 1254 @Benchmark 1255 public void anyTrue(Blackhole bh) { 1256 boolean[] ms = fm.apply(size); 1257 boolean r = false; 1258 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1259 r = false; 1260 for (int i = 0; i < ms.length; i++) { 1261 r |= ms[i]; 1262 } 1263 } 1264 bh.consume(r); 1265 } 1266 1267 @Benchmark 1268 public void allTrue(Blackhole bh) { 1269 boolean[] ms = fm.apply(size); 1270 boolean r = true; 1271 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1272 r = true; 1273 for (int i = 0; i < ms.length; i++) { 1274 r &= ms[i]; 1275 } 1276 } 1277 bh.consume(r); 1278 } 1279 1280 @Benchmark 1281 public void IS_DEFAULT(Blackhole bh) { 1282 byte[] as = fa.apply(size); 1283 boolean r = true; 1284 1285 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1286 for (int i = 0; i < as.length; i++) { 1287 byte a = as[i]; 1288 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation 1289 } 1290 } 1291 1292 bh.consume(r); 1293 } 1294 1295 @Benchmark 1296 public void IS_NEGATIVE(Blackhole bh) { 1297 byte[] as = fa.apply(size); 1298 boolean r = true; 1299 1300 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1301 for (int i = 0; i < as.length; i++) { 1302 byte a = as[i]; 1303 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation 1304 } 1305 } 1306 1307 bh.consume(r); 1308 } 1309 1310 @Benchmark 1311 public void LT(Blackhole bh) { 1312 byte[] as = fa.apply(size); 1313 byte[] bs = fb.apply(size); 1314 boolean r = true; 1315 1316 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1317 for (int i = 0; i < as.length; i++) { 1318 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1319 } 1320 } 1321 1322 bh.consume(r); 1323 } 1324 1325 @Benchmark 1326 public void GT(Blackhole bh) { 1327 byte[] as = fa.apply(size); 1328 byte[] bs = fb.apply(size); 1329 boolean r = true; 1330 1331 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1332 for (int i = 0; i < as.length; i++) { 1333 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1334 } 1335 } 1336 1337 bh.consume(r); 1338 } 1339 1340 @Benchmark 1341 public void EQ(Blackhole bh) { 1342 byte[] as = fa.apply(size); 1343 byte[] bs = fb.apply(size); 1344 boolean r = true; 1345 1346 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1347 for (int i = 0; i < as.length; i++) { 1348 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1349 } 1350 } 1351 1352 bh.consume(r); 1353 } 1354 1355 @Benchmark 1356 public void NE(Blackhole bh) { 1357 byte[] as = fa.apply(size); 1358 byte[] bs = fb.apply(size); 1359 boolean r = true; 1360 1361 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1362 for (int i = 0; i < as.length; i++) { 1363 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1364 } 1365 } 1366 1367 bh.consume(r); 1368 } 1369 1370 @Benchmark 1371 public void LE(Blackhole bh) { 1372 byte[] as = fa.apply(size); 1373 byte[] bs = fb.apply(size); 1374 boolean r = true; 1375 1376 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1377 for (int i = 0; i < as.length; i++) { 1378 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1379 } 1380 } 1381 1382 bh.consume(r); 1383 } 1384 1385 @Benchmark 1386 public void GE(Blackhole bh) { 1387 byte[] as = fa.apply(size); 1388 byte[] bs = fb.apply(size); 1389 boolean r = true; 1390 1391 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1392 for (int i = 0; i < as.length; i++) { 1393 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1394 } 1395 } 1396 1397 bh.consume(r); 1398 } 1399 1400 @Benchmark 1401 public void ULT(Blackhole bh) { 1402 byte[] as = fa.apply(size); 1403 byte[] bs = fb.apply(size); 1404 boolean r = true; 1405 1406 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1407 for (int i = 0; i < as.length; i++) { 1408 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1409 } 1410 } 1411 1412 bh.consume(r); 1413 } 1414 1415 @Benchmark 1416 public void UGT(Blackhole bh) { 1417 byte[] as = fa.apply(size); 1418 byte[] bs = fb.apply(size); 1419 boolean r = true; 1420 1421 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1422 for (int i = 0; i < as.length; i++) { 1423 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1424 } 1425 } 1426 1427 bh.consume(r); 1428 } 1429 1430 @Benchmark 1431 public void ULE(Blackhole bh) { 1432 byte[] as = fa.apply(size); 1433 byte[] bs = fb.apply(size); 1434 boolean r = true; 1435 1436 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1437 for (int i = 0; i < as.length; i++) { 1438 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1439 } 1440 } 1441 1442 bh.consume(r); 1443 } 1444 1445 @Benchmark 1446 public void UGE(Blackhole bh) { 1447 byte[] as = fa.apply(size); 1448 byte[] bs = fb.apply(size); 1449 boolean r = true; 1450 1451 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1452 for (int i = 0; i < as.length; i++) { 1453 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1454 } 1455 } 1456 1457 bh.consume(r); 1458 } 1459 1460 @Benchmark 1461 public void blend(Blackhole bh) { 1462 byte[] as = fa.apply(size); 1463 byte[] bs = fb.apply(size); 1464 byte[] rs = fr.apply(size); 1465 boolean[] ms = fm.apply(size); 1466 1467 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1468 for (int i = 0; i < as.length; i++) { 1469 byte a = as[i]; 1470 byte b = bs[i]; 1471 boolean m = ms[i % ms.length]; 1472 rs[i] = (m ? b : a); 1473 } 1474 } 1475 1476 bh.consume(rs); 1477 } 1478 1479 void rearrangeShared(int window, Blackhole bh) { 1480 byte[] as = fa.apply(size); 1481 int[] order = fs.apply(size); 1482 byte[] rs = fr.apply(size); 1483 1484 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1485 for (int i = 0; i < as.length; i += window) { 1486 for (int j = 0; j < window; j++) { 1487 byte a = as[i+j]; 1488 int pos = order[j]; 1489 rs[i + pos] = a; 1490 } 1491 } 1492 } 1493 1494 bh.consume(rs); 1495 } 1496 1497 @Benchmark 1498 public void rearrange064(Blackhole bh) { 1499 int window = 64 / Byte.SIZE; 1500 rearrangeShared(window, bh); 1501 } 1502 1503 @Benchmark 1504 public void rearrange128(Blackhole bh) { 1505 int window = 128 / Byte.SIZE; 1506 rearrangeShared(window, bh); 1507 } 1508 1509 @Benchmark 1510 public void rearrange256(Blackhole bh) { 1511 int window = 256 / Byte.SIZE; 1512 rearrangeShared(window, bh); 1513 } 1514 1515 @Benchmark 1516 public void rearrange512(Blackhole bh) { 1517 int window = 512 / Byte.SIZE; 1518 rearrangeShared(window, bh); 1519 } 1520 1521 @Benchmark 1522 public void compressScalar(Blackhole bh) { 1523 byte[] as = fa.apply(size); 1524 byte[] rs = new byte[size]; 1525 boolean[] im = fmt.apply(size); 1526 1527 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1528 for (int i = 0, j = 0; i < as.length; i++) { 1529 if (im[i]) { 1530 rs[j++] = as[i]; 1531 } 1532 } 1533 } 1534 1535 bh.consume(rs); 1536 } 1537 1538 @Benchmark 1539 public void expandScalar(Blackhole bh) { 1540 byte[] as = fa.apply(size); 1541 byte[] rs = new byte[size]; 1542 boolean[] im = fmt.apply(size); 1543 1544 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1545 for (int i = 0, j = 0; i < as.length; i++) { 1546 if (im[i]) { 1547 rs[i++] = as[j++]; 1548 } 1549 } 1550 } 1551 1552 bh.consume(rs); 1553 } 1554 1555 @Benchmark 1556 public void maskCompressScalar(Blackhole bh) { 1557 boolean[] im = fmt.apply(size); 1558 boolean[] rm = new boolean[size]; 1559 1560 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1561 for (int i = 0, j = 0; i < im.length; i++) { 1562 if (im[i]) { 1563 rm[j++] = im[i]; 1564 } 1565 } 1566 } 1567 1568 bh.consume(rm); 1569 } 1570 1571 void broadcastShared(int window, Blackhole bh) { 1572 byte[] as = fa.apply(size); 1573 byte[] rs = fr.apply(size); 1574 1575 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1576 for (int i = 0; i < as.length; i += window) { 1577 int idx = i; 1578 for (int j = 0; j < window; j++) { 1579 rs[j] = as[idx]; 1580 } 1581 } 1582 } 1583 1584 bh.consume(rs); 1585 } 1586 1587 @Benchmark 1588 public void broadcast064(Blackhole bh) { 1589 int window = 64 / Byte.SIZE; 1590 broadcastShared(window, bh); 1591 } 1592 1593 @Benchmark 1594 public void broadcast128(Blackhole bh) { 1595 int window = 128 / Byte.SIZE; 1596 broadcastShared(window, bh); 1597 } 1598 1599 @Benchmark 1600 public void broadcast256(Blackhole bh) { 1601 int window = 256 / Byte.SIZE; 1602 broadcastShared(window, bh); 1603 } 1604 1605 @Benchmark 1606 public void broadcast512(Blackhole bh) { 1607 int window = 512 / Byte.SIZE; 1608 broadcastShared(window, bh); 1609 } 1610 1611 @Benchmark 1612 public void zero(Blackhole bh) { 1613 byte[] as = fa.apply(size); 1614 1615 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1616 for (int i = 0; i < as.length; i++) { 1617 as[i] = (byte)0; 1618 } 1619 } 1620 1621 bh.consume(as); 1622 } 1623 1624 @Benchmark 1625 public void BITWISE_BLEND(Blackhole bh) { 1626 byte[] as = fa.apply(size); 1627 byte[] bs = fb.apply(size); 1628 byte[] cs = fc.apply(size); 1629 byte[] rs = fr.apply(size); 1630 1631 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1632 for (int i = 0; i < as.length; i++) { 1633 byte a = as[i]; 1634 byte b = bs[i]; 1635 byte c = cs[i]; 1636 rs[i] = (byte)((a&~(c))|(b&c)); 1637 } 1638 } 1639 1640 bh.consume(rs); 1641 } 1642 1643 @Benchmark 1644 public void BITWISE_BLENDMasked(Blackhole bh) { 1645 byte[] as = fa.apply(size); 1646 byte[] bs = fb.apply(size); 1647 byte[] cs = fc.apply(size); 1648 byte[] rs = fr.apply(size); 1649 boolean[] ms = fm.apply(size); 1650 1651 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1652 for (int i = 0; i < as.length; i++) { 1653 byte a = as[i]; 1654 byte b = bs[i]; 1655 byte c = cs[i]; 1656 if (ms[i % ms.length]) { 1657 rs[i] = (byte)((a&~(c))|(b&c)); 1658 } else { 1659 rs[i] = a; 1660 } 1661 } 1662 } 1663 bh.consume(rs); 1664 } 1665 @Benchmark 1666 public void NEG(Blackhole bh) { 1667 byte[] as = fa.apply(size); 1668 byte[] rs = fr.apply(size); 1669 1670 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1671 for (int i = 0; i < as.length; i++) { 1672 byte a = as[i]; 1673 rs[i] = (byte)(-((byte)a)); 1674 } 1675 } 1676 1677 bh.consume(rs); 1678 } 1679 1680 @Benchmark 1681 public void NEGMasked(Blackhole bh) { 1682 byte[] as = fa.apply(size); 1683 byte[] rs = fr.apply(size); 1684 boolean[] ms = fm.apply(size); 1685 1686 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1687 for (int i = 0; i < as.length; i++) { 1688 byte a = as[i]; 1689 boolean m = ms[i % ms.length]; 1690 rs[i] = (m ? (byte)(-((byte)a)) : a); 1691 } 1692 } 1693 1694 bh.consume(rs); 1695 } 1696 @Benchmark 1697 public void ABS(Blackhole bh) { 1698 byte[] as = fa.apply(size); 1699 byte[] rs = fr.apply(size); 1700 1701 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1702 for (int i = 0; i < as.length; i++) { 1703 byte a = as[i]; 1704 rs[i] = (byte)(Math.abs((byte)a)); 1705 } 1706 } 1707 1708 bh.consume(rs); 1709 } 1710 1711 @Benchmark 1712 public void ABSMasked(Blackhole bh) { 1713 byte[] as = fa.apply(size); 1714 byte[] rs = fr.apply(size); 1715 boolean[] ms = fm.apply(size); 1716 1717 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1718 for (int i = 0; i < as.length; i++) { 1719 byte a = as[i]; 1720 boolean m = ms[i % ms.length]; 1721 rs[i] = (m ? (byte)(Math.abs((byte)a)) : a); 1722 } 1723 } 1724 1725 bh.consume(rs); 1726 } 1727 @Benchmark 1728 public void NOT(Blackhole bh) { 1729 byte[] as = fa.apply(size); 1730 byte[] rs = fr.apply(size); 1731 1732 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1733 for (int i = 0; i < as.length; i++) { 1734 byte a = as[i]; 1735 rs[i] = (byte)(~((byte)a)); 1736 } 1737 } 1738 1739 bh.consume(rs); 1740 } 1741 1742 @Benchmark 1743 public void NOTMasked(Blackhole bh) { 1744 byte[] as = fa.apply(size); 1745 byte[] rs = fr.apply(size); 1746 boolean[] ms = fm.apply(size); 1747 1748 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1749 for (int i = 0; i < as.length; i++) { 1750 byte a = as[i]; 1751 boolean m = ms[i % ms.length]; 1752 rs[i] = (m ? (byte)(~((byte)a)) : a); 1753 } 1754 } 1755 1756 bh.consume(rs); 1757 } 1758 @Benchmark 1759 public void ZOMO(Blackhole bh) { 1760 byte[] as = fa.apply(size); 1761 byte[] rs = fr.apply(size); 1762 1763 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1764 for (int i = 0; i < as.length; i++) { 1765 byte a = as[i]; 1766 rs[i] = (byte)((a==0?0:-1)); 1767 } 1768 } 1769 1770 bh.consume(rs); 1771 } 1772 1773 @Benchmark 1774 public void ZOMOMasked(Blackhole bh) { 1775 byte[] as = fa.apply(size); 1776 byte[] rs = fr.apply(size); 1777 boolean[] ms = fm.apply(size); 1778 1779 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1780 for (int i = 0; i < as.length; i++) { 1781 byte a = as[i]; 1782 boolean m = ms[i % ms.length]; 1783 rs[i] = (m ? (byte)((a==0?0:-1)) : a); 1784 } 1785 } 1786 1787 bh.consume(rs); 1788 } 1789 @Benchmark 1790 public void BIT_COUNT(Blackhole bh) { 1791 byte[] as = fa.apply(size); 1792 byte[] rs = fr.apply(size); 1793 1794 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1795 for (int i = 0; i < as.length; i++) { 1796 byte a = as[i]; 1797 rs[i] = (byte)(Integer.bitCount((int)a & 0xFF)); 1798 } 1799 } 1800 1801 bh.consume(rs); 1802 } 1803 1804 @Benchmark 1805 public void BIT_COUNTMasked(Blackhole bh) { 1806 byte[] as = fa.apply(size); 1807 byte[] rs = fr.apply(size); 1808 boolean[] ms = fm.apply(size); 1809 1810 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1811 for (int i = 0; i < as.length; i++) { 1812 byte a = as[i]; 1813 boolean m = ms[i % ms.length]; 1814 rs[i] = (m ? (byte)(Integer.bitCount((int)a & 0xFF)) : a); 1815 } 1816 } 1817 1818 bh.consume(rs); 1819 } 1820 @Benchmark 1821 public void TRAILING_ZEROS_COUNT(Blackhole bh) { 1822 byte[] as = fa.apply(size); 1823 byte[] rs = fr.apply(size); 1824 1825 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1826 for (int i = 0; i < as.length; i++) { 1827 byte a = as[i]; 1828 rs[i] = (byte)(TRAILING_ZEROS_COUNT_scalar(a)); 1829 } 1830 } 1831 1832 bh.consume(rs); 1833 } 1834 1835 @Benchmark 1836 public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) { 1837 byte[] as = fa.apply(size); 1838 byte[] rs = fr.apply(size); 1839 boolean[] ms = fm.apply(size); 1840 1841 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1842 for (int i = 0; i < as.length; i++) { 1843 byte a = as[i]; 1844 boolean m = ms[i % ms.length]; 1845 rs[i] = (m ? (byte)(TRAILING_ZEROS_COUNT_scalar(a)) : a); 1846 } 1847 } 1848 1849 bh.consume(rs); 1850 } 1851 @Benchmark 1852 public void LEADING_ZEROS_COUNT(Blackhole bh) { 1853 byte[] as = fa.apply(size); 1854 byte[] rs = fr.apply(size); 1855 1856 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1857 for (int i = 0; i < as.length; i++) { 1858 byte a = as[i]; 1859 rs[i] = (byte)(LEADING_ZEROS_COUNT_scalar(a)); 1860 } 1861 } 1862 1863 bh.consume(rs); 1864 } 1865 1866 @Benchmark 1867 public void LEADING_ZEROS_COUNTMasked(Blackhole bh) { 1868 byte[] as = fa.apply(size); 1869 byte[] rs = fr.apply(size); 1870 boolean[] ms = fm.apply(size); 1871 1872 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1873 for (int i = 0; i < as.length; i++) { 1874 byte a = as[i]; 1875 boolean m = ms[i % ms.length]; 1876 rs[i] = (m ? (byte)(LEADING_ZEROS_COUNT_scalar(a)) : a); 1877 } 1878 } 1879 1880 bh.consume(rs); 1881 } 1882 @Benchmark 1883 public void REVERSE(Blackhole bh) { 1884 byte[] as = fa.apply(size); 1885 byte[] rs = fr.apply(size); 1886 1887 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1888 for (int i = 0; i < as.length; i++) { 1889 byte a = as[i]; 1890 rs[i] = (byte)(REVERSE_scalar(a)); 1891 } 1892 } 1893 1894 bh.consume(rs); 1895 } 1896 1897 @Benchmark 1898 public void REVERSEMasked(Blackhole bh) { 1899 byte[] as = fa.apply(size); 1900 byte[] rs = fr.apply(size); 1901 boolean[] ms = fm.apply(size); 1902 1903 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1904 for (int i = 0; i < as.length; i++) { 1905 byte a = as[i]; 1906 boolean m = ms[i % ms.length]; 1907 rs[i] = (m ? (byte)(REVERSE_scalar(a)) : a); 1908 } 1909 } 1910 1911 bh.consume(rs); 1912 } 1913 @Benchmark 1914 public void REVERSE_BYTES(Blackhole bh) { 1915 byte[] as = fa.apply(size); 1916 byte[] rs = fr.apply(size); 1917 1918 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1919 for (int i = 0; i < as.length; i++) { 1920 byte a = as[i]; 1921 rs[i] = (byte)(a); 1922 } 1923 } 1924 1925 bh.consume(rs); 1926 } 1927 1928 @Benchmark 1929 public void REVERSE_BYTESMasked(Blackhole bh) { 1930 byte[] as = fa.apply(size); 1931 byte[] rs = fr.apply(size); 1932 boolean[] ms = fm.apply(size); 1933 1934 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1935 for (int i = 0; i < as.length; i++) { 1936 byte a = as[i]; 1937 boolean m = ms[i % ms.length]; 1938 rs[i] = (m ? (byte)(a) : a); 1939 } 1940 } 1941 1942 bh.consume(rs); 1943 } 1944 }