1 /* 2 * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package org.openjdk.bench.jdk.incubator.vector.operation; 25 26 // -- This file was mechanically generated: Do not edit! -- // 27 28 import java.util.concurrent.TimeUnit; 29 import java.util.function.IntFunction; 30 31 import org.openjdk.jmh.annotations.*; 32 import org.openjdk.jmh.infra.Blackhole; 33 34 @BenchmarkMode(Mode.Throughput) 35 @OutputTimeUnit(TimeUnit.MILLISECONDS) 36 @State(Scope.Benchmark) 37 @Warmup(iterations = 3, time = 1) 38 @Measurement(iterations = 5, time = 1) 39 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 40 public class ByteScalar extends AbstractVectorBenchmark { 41 static final int INVOC_COUNT = 1; // To align with vector benchmarks. 42 43 private static final byte CONST_SHIFT = Byte.SIZE / 2; 44 45 @Param("1024") 46 int size; 47 48 byte[] fill(IntFunction<Byte> f) { 49 byte[] array = new byte[size]; 50 for (int i = 0; i < array.length; i++) { 51 array[i] = f.apply(i); 52 } 53 return array; 54 } 55 56 static byte bits(byte e) { 57 return e; 58 } 59 60 byte[] as, bs, cs, rs; 61 boolean[] ms, mt, rms; 62 int[] ss; 63 64 @Setup 65 public void init() { 66 as = fill(i -> (byte)(2*i)); 67 bs = fill(i -> (byte)(i+1)); 68 cs = fill(i -> (byte)(i+5)); 69 rs = fill(i -> (byte)0); 70 ms = fillMask(size, i -> (i % 2) == 0); 71 mt = fillMask(size, i -> true); 72 rms = fillMask(size, i -> false); 73 74 ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1))); 75 } 76 77 final IntFunction<byte[]> fa = vl -> as; 78 final IntFunction<byte[]> fb = vl -> bs; 79 final IntFunction<byte[]> fc = vl -> cs; 80 final IntFunction<byte[]> fr = vl -> rs; 81 final IntFunction<boolean[]> fm = vl -> ms; 82 final IntFunction<boolean[]> fmt = vl -> mt; 83 final IntFunction<boolean[]> fmr = vl -> rms; 84 final IntFunction<int[]> fs = vl -> ss; 85 86 static boolean eq(byte a, byte b) { 87 return a == b; 88 } 89 90 static boolean neq(byte a, byte b) { 91 return a != b; 92 } 93 94 static boolean lt(byte a, byte b) { 95 return a < b; 96 } 97 98 static boolean le(byte a, byte b) { 99 return a <= b; 100 } 101 102 static boolean gt(byte a, byte b) { 103 return a > b; 104 } 105 106 static boolean ge(byte a, byte b) { 107 return a >= b; 108 } 109 110 static boolean ult(byte a, byte b) { 111 return Byte.compareUnsigned(a, b) < 0; 112 } 113 114 static boolean ule(byte a, byte b) { 115 return Byte.compareUnsigned(a, b) <= 0; 116 } 117 118 static boolean ugt(byte a, byte b) { 119 return Byte.compareUnsigned(a, b) > 0; 120 } 121 122 static boolean uge(byte a, byte b) { 123 return Byte.compareUnsigned(a, b) >= 0; 124 } 125 126 static byte ROL_scalar(byte a, byte b) { 127 return (byte)(((((byte)a) & 0xFF) << (b & 7)) | ((((byte)a) & 0xFF) >>> (8 - (b & 7)))); 128 } 129 130 static byte ROR_scalar(byte a, byte b) { 131 return (byte)(((((byte)a) & 0xFF) >>> (b & 7)) | ((((byte)a) & 0xFF) << (8 - (b & 7)))); 132 } 133 134 static byte TRAILING_ZEROS_COUNT_scalar(byte a) { 135 return (byte) (a != 0 ? Integer.numberOfTrailingZeros(a) : 8); 136 } 137 138 static byte LEADING_ZEROS_COUNT_scalar(byte a) { 139 return (byte) (a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0); 140 } 141 142 static byte REVERSE_scalar(byte a) { 143 byte b = ROL_scalar(a, (byte) 4); 144 b = (byte)(((b & 0x55) << 1) | ((b & 0xAA) >>> 1)); 145 b = (byte)(((b & 0x33) << 2) | ((b & 0xCC) >>> 2)); 146 return b; 147 } 148 149 @Benchmark 150 public void ADD(Blackhole bh) { 151 byte[] as = fa.apply(size); 152 byte[] bs = fb.apply(size); 153 byte[] rs = fr.apply(size); 154 155 for (int ic = 0; ic < INVOC_COUNT; ic++) { 156 for (int i = 0; i < as.length; i++) { 157 byte a = as[i]; 158 byte b = bs[i]; 159 rs[i] = (byte)(a + b); 160 } 161 } 162 163 bh.consume(rs); 164 } 165 166 @Benchmark 167 public void ADDMasked(Blackhole bh) { 168 byte[] as = fa.apply(size); 169 byte[] bs = fb.apply(size); 170 byte[] rs = fr.apply(size); 171 boolean[] ms = fm.apply(size); 172 173 for (int ic = 0; ic < INVOC_COUNT; ic++) { 174 for (int i = 0; i < as.length; i++) { 175 byte a = as[i]; 176 byte b = bs[i]; 177 if (ms[i % ms.length]) { 178 rs[i] = (byte)(a + b); 179 } else { 180 rs[i] = a; 181 } 182 } 183 } 184 bh.consume(rs); 185 } 186 187 @Benchmark 188 public void SUB(Blackhole bh) { 189 byte[] as = fa.apply(size); 190 byte[] bs = fb.apply(size); 191 byte[] rs = fr.apply(size); 192 193 for (int ic = 0; ic < INVOC_COUNT; ic++) { 194 for (int i = 0; i < as.length; i++) { 195 byte a = as[i]; 196 byte b = bs[i]; 197 rs[i] = (byte)(a - b); 198 } 199 } 200 201 bh.consume(rs); 202 } 203 204 @Benchmark 205 public void SUBMasked(Blackhole bh) { 206 byte[] as = fa.apply(size); 207 byte[] bs = fb.apply(size); 208 byte[] rs = fr.apply(size); 209 boolean[] ms = fm.apply(size); 210 211 for (int ic = 0; ic < INVOC_COUNT; ic++) { 212 for (int i = 0; i < as.length; i++) { 213 byte a = as[i]; 214 byte b = bs[i]; 215 if (ms[i % ms.length]) { 216 rs[i] = (byte)(a - b); 217 } else { 218 rs[i] = a; 219 } 220 } 221 } 222 bh.consume(rs); 223 } 224 225 @Benchmark 226 public void MUL(Blackhole bh) { 227 byte[] as = fa.apply(size); 228 byte[] bs = fb.apply(size); 229 byte[] rs = fr.apply(size); 230 231 for (int ic = 0; ic < INVOC_COUNT; ic++) { 232 for (int i = 0; i < as.length; i++) { 233 byte a = as[i]; 234 byte b = bs[i]; 235 rs[i] = (byte)(a * b); 236 } 237 } 238 239 bh.consume(rs); 240 } 241 242 @Benchmark 243 public void MULMasked(Blackhole bh) { 244 byte[] as = fa.apply(size); 245 byte[] bs = fb.apply(size); 246 byte[] rs = fr.apply(size); 247 boolean[] ms = fm.apply(size); 248 249 for (int ic = 0; ic < INVOC_COUNT; ic++) { 250 for (int i = 0; i < as.length; i++) { 251 byte a = as[i]; 252 byte b = bs[i]; 253 if (ms[i % ms.length]) { 254 rs[i] = (byte)(a * b); 255 } else { 256 rs[i] = a; 257 } 258 } 259 } 260 bh.consume(rs); 261 } 262 263 @Benchmark 264 public void FIRST_NONZERO(Blackhole bh) { 265 byte[] as = fa.apply(size); 266 byte[] bs = fb.apply(size); 267 byte[] rs = fr.apply(size); 268 269 for (int ic = 0; ic < INVOC_COUNT; ic++) { 270 for (int i = 0; i < as.length; i++) { 271 byte a = as[i]; 272 byte b = bs[i]; 273 rs[i] = (byte)((a)!=0?a:b); 274 } 275 } 276 277 bh.consume(rs); 278 } 279 280 @Benchmark 281 public void FIRST_NONZEROMasked(Blackhole bh) { 282 byte[] as = fa.apply(size); 283 byte[] bs = fb.apply(size); 284 byte[] rs = fr.apply(size); 285 boolean[] ms = fm.apply(size); 286 287 for (int ic = 0; ic < INVOC_COUNT; ic++) { 288 for (int i = 0; i < as.length; i++) { 289 byte a = as[i]; 290 byte b = bs[i]; 291 if (ms[i % ms.length]) { 292 rs[i] = (byte)((a)!=0?a:b); 293 } else { 294 rs[i] = a; 295 } 296 } 297 } 298 bh.consume(rs); 299 } 300 301 @Benchmark 302 public void AND(Blackhole bh) { 303 byte[] as = fa.apply(size); 304 byte[] bs = fb.apply(size); 305 byte[] rs = fr.apply(size); 306 307 for (int ic = 0; ic < INVOC_COUNT; ic++) { 308 for (int i = 0; i < as.length; i++) { 309 byte a = as[i]; 310 byte b = bs[i]; 311 rs[i] = (byte)(a & b); 312 } 313 } 314 315 bh.consume(rs); 316 } 317 318 @Benchmark 319 public void ANDMasked(Blackhole bh) { 320 byte[] as = fa.apply(size); 321 byte[] bs = fb.apply(size); 322 byte[] rs = fr.apply(size); 323 boolean[] ms = fm.apply(size); 324 325 for (int ic = 0; ic < INVOC_COUNT; ic++) { 326 for (int i = 0; i < as.length; i++) { 327 byte a = as[i]; 328 byte b = bs[i]; 329 if (ms[i % ms.length]) { 330 rs[i] = (byte)(a & b); 331 } else { 332 rs[i] = a; 333 } 334 } 335 } 336 bh.consume(rs); 337 } 338 339 @Benchmark 340 public void AND_NOT(Blackhole bh) { 341 byte[] as = fa.apply(size); 342 byte[] bs = fb.apply(size); 343 byte[] rs = fr.apply(size); 344 345 for (int ic = 0; ic < INVOC_COUNT; ic++) { 346 for (int i = 0; i < as.length; i++) { 347 byte a = as[i]; 348 byte b = bs[i]; 349 rs[i] = (byte)(a & ~b); 350 } 351 } 352 353 bh.consume(rs); 354 } 355 356 @Benchmark 357 public void AND_NOTMasked(Blackhole bh) { 358 byte[] as = fa.apply(size); 359 byte[] bs = fb.apply(size); 360 byte[] rs = fr.apply(size); 361 boolean[] ms = fm.apply(size); 362 363 for (int ic = 0; ic < INVOC_COUNT; ic++) { 364 for (int i = 0; i < as.length; i++) { 365 byte a = as[i]; 366 byte b = bs[i]; 367 if (ms[i % ms.length]) { 368 rs[i] = (byte)(a & ~b); 369 } else { 370 rs[i] = a; 371 } 372 } 373 } 374 bh.consume(rs); 375 } 376 377 @Benchmark 378 public void OR(Blackhole bh) { 379 byte[] as = fa.apply(size); 380 byte[] bs = fb.apply(size); 381 byte[] rs = fr.apply(size); 382 383 for (int ic = 0; ic < INVOC_COUNT; ic++) { 384 for (int i = 0; i < as.length; i++) { 385 byte a = as[i]; 386 byte b = bs[i]; 387 rs[i] = (byte)(a | b); 388 } 389 } 390 391 bh.consume(rs); 392 } 393 394 @Benchmark 395 public void ORMasked(Blackhole bh) { 396 byte[] as = fa.apply(size); 397 byte[] bs = fb.apply(size); 398 byte[] rs = fr.apply(size); 399 boolean[] ms = fm.apply(size); 400 401 for (int ic = 0; ic < INVOC_COUNT; ic++) { 402 for (int i = 0; i < as.length; i++) { 403 byte a = as[i]; 404 byte b = bs[i]; 405 if (ms[i % ms.length]) { 406 rs[i] = (byte)(a | b); 407 } else { 408 rs[i] = a; 409 } 410 } 411 } 412 bh.consume(rs); 413 } 414 415 @Benchmark 416 public void XOR(Blackhole bh) { 417 byte[] as = fa.apply(size); 418 byte[] bs = fb.apply(size); 419 byte[] rs = fr.apply(size); 420 421 for (int ic = 0; ic < INVOC_COUNT; ic++) { 422 for (int i = 0; i < as.length; i++) { 423 byte a = as[i]; 424 byte b = bs[i]; 425 rs[i] = (byte)(a ^ b); 426 } 427 } 428 429 bh.consume(rs); 430 } 431 432 @Benchmark 433 public void XORMasked(Blackhole bh) { 434 byte[] as = fa.apply(size); 435 byte[] bs = fb.apply(size); 436 byte[] rs = fr.apply(size); 437 boolean[] ms = fm.apply(size); 438 439 for (int ic = 0; ic < INVOC_COUNT; ic++) { 440 for (int i = 0; i < as.length; i++) { 441 byte a = as[i]; 442 byte b = bs[i]; 443 if (ms[i % ms.length]) { 444 rs[i] = (byte)(a ^ b); 445 } else { 446 rs[i] = a; 447 } 448 } 449 } 450 bh.consume(rs); 451 } 452 453 @Benchmark 454 public void LSHL(Blackhole bh) { 455 byte[] as = fa.apply(size); 456 byte[] bs = fb.apply(size); 457 byte[] rs = fr.apply(size); 458 459 for (int ic = 0; ic < INVOC_COUNT; ic++) { 460 for (int i = 0; i < as.length; i++) { 461 byte a = as[i]; 462 byte b = bs[i]; 463 rs[i] = (byte)((a << (b & 0x7))); 464 } 465 } 466 467 bh.consume(rs); 468 } 469 470 @Benchmark 471 public void LSHLMasked(Blackhole bh) { 472 byte[] as = fa.apply(size); 473 byte[] bs = fb.apply(size); 474 byte[] rs = fr.apply(size); 475 boolean[] ms = fm.apply(size); 476 477 for (int ic = 0; ic < INVOC_COUNT; ic++) { 478 for (int i = 0; i < as.length; i++) { 479 byte a = as[i]; 480 byte b = bs[i]; 481 if (ms[i % ms.length]) { 482 rs[i] = (byte)((a << (b & 0x7))); 483 } else { 484 rs[i] = a; 485 } 486 } 487 } 488 bh.consume(rs); 489 } 490 491 @Benchmark 492 public void ASHR(Blackhole bh) { 493 byte[] as = fa.apply(size); 494 byte[] bs = fb.apply(size); 495 byte[] rs = fr.apply(size); 496 497 for (int ic = 0; ic < INVOC_COUNT; ic++) { 498 for (int i = 0; i < as.length; i++) { 499 byte a = as[i]; 500 byte b = bs[i]; 501 rs[i] = (byte)((a >> (b & 0x7))); 502 } 503 } 504 505 bh.consume(rs); 506 } 507 508 @Benchmark 509 public void ASHRMasked(Blackhole bh) { 510 byte[] as = fa.apply(size); 511 byte[] bs = fb.apply(size); 512 byte[] rs = fr.apply(size); 513 boolean[] ms = fm.apply(size); 514 515 for (int ic = 0; ic < INVOC_COUNT; ic++) { 516 for (int i = 0; i < as.length; i++) { 517 byte a = as[i]; 518 byte b = bs[i]; 519 if (ms[i % ms.length]) { 520 rs[i] = (byte)((a >> (b & 0x7))); 521 } else { 522 rs[i] = a; 523 } 524 } 525 } 526 bh.consume(rs); 527 } 528 529 @Benchmark 530 public void LSHR(Blackhole bh) { 531 byte[] as = fa.apply(size); 532 byte[] bs = fb.apply(size); 533 byte[] rs = fr.apply(size); 534 535 for (int ic = 0; ic < INVOC_COUNT; ic++) { 536 for (int i = 0; i < as.length; i++) { 537 byte a = as[i]; 538 byte b = bs[i]; 539 rs[i] = (byte)(((a & 0xFF) >>> (b & 0x7))); 540 } 541 } 542 543 bh.consume(rs); 544 } 545 546 @Benchmark 547 public void LSHRMasked(Blackhole bh) { 548 byte[] as = fa.apply(size); 549 byte[] bs = fb.apply(size); 550 byte[] rs = fr.apply(size); 551 boolean[] ms = fm.apply(size); 552 553 for (int ic = 0; ic < INVOC_COUNT; ic++) { 554 for (int i = 0; i < as.length; i++) { 555 byte a = as[i]; 556 byte b = bs[i]; 557 if (ms[i % ms.length]) { 558 rs[i] = (byte)(((a & 0xFF) >>> (b & 0x7))); 559 } else { 560 rs[i] = a; 561 } 562 } 563 } 564 bh.consume(rs); 565 } 566 567 @Benchmark 568 public void LSHLShift(Blackhole bh) { 569 byte[] as = fa.apply(size); 570 byte[] bs = fb.apply(size); 571 byte[] rs = fr.apply(size); 572 573 for (int ic = 0; ic < INVOC_COUNT; ic++) { 574 for (int i = 0; i < as.length; i++) { 575 byte a = as[i]; 576 byte b = bs[i]; 577 rs[i] = (byte)((a << (b & 7))); 578 } 579 } 580 581 bh.consume(rs); 582 } 583 584 @Benchmark 585 public void LSHLMaskedShift(Blackhole bh) { 586 byte[] as = fa.apply(size); 587 byte[] bs = fb.apply(size); 588 byte[] rs = fr.apply(size); 589 boolean[] ms = fm.apply(size); 590 591 for (int ic = 0; ic < INVOC_COUNT; ic++) { 592 for (int i = 0; i < as.length; i++) { 593 byte a = as[i]; 594 byte b = bs[i]; 595 boolean m = ms[i % ms.length]; 596 rs[i] = (m ? (byte)((a << (b & 7))) : a); 597 } 598 } 599 600 bh.consume(rs); 601 } 602 603 @Benchmark 604 public void LSHRShift(Blackhole bh) { 605 byte[] as = fa.apply(size); 606 byte[] bs = fb.apply(size); 607 byte[] rs = fr.apply(size); 608 609 for (int ic = 0; ic < INVOC_COUNT; ic++) { 610 for (int i = 0; i < as.length; i++) { 611 byte a = as[i]; 612 byte b = bs[i]; 613 rs[i] = (byte)(((a & 0xFF) >>> (b & 7))); 614 } 615 } 616 617 bh.consume(rs); 618 } 619 620 @Benchmark 621 public void LSHRMaskedShift(Blackhole bh) { 622 byte[] as = fa.apply(size); 623 byte[] bs = fb.apply(size); 624 byte[] rs = fr.apply(size); 625 boolean[] ms = fm.apply(size); 626 627 for (int ic = 0; ic < INVOC_COUNT; ic++) { 628 for (int i = 0; i < as.length; i++) { 629 byte a = as[i]; 630 byte b = bs[i]; 631 boolean m = ms[i % ms.length]; 632 rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a); 633 } 634 } 635 636 bh.consume(rs); 637 } 638 639 @Benchmark 640 public void ASHRShift(Blackhole bh) { 641 byte[] as = fa.apply(size); 642 byte[] bs = fb.apply(size); 643 byte[] rs = fr.apply(size); 644 645 for (int ic = 0; ic < INVOC_COUNT; ic++) { 646 for (int i = 0; i < as.length; i++) { 647 byte a = as[i]; 648 byte b = bs[i]; 649 rs[i] = (byte)((a >> (b & 7))); 650 } 651 } 652 653 bh.consume(rs); 654 } 655 656 @Benchmark 657 public void ASHRMaskedShift(Blackhole bh) { 658 byte[] as = fa.apply(size); 659 byte[] bs = fb.apply(size); 660 byte[] rs = fr.apply(size); 661 boolean[] ms = fm.apply(size); 662 663 for (int ic = 0; ic < INVOC_COUNT; ic++) { 664 for (int i = 0; i < as.length; i++) { 665 byte a = as[i]; 666 byte b = bs[i]; 667 boolean m = ms[i % ms.length]; 668 rs[i] = (m ? (byte)((a >> (b & 7))) : a); 669 } 670 } 671 672 bh.consume(rs); 673 } 674 675 @Benchmark 676 public void ROR(Blackhole bh) { 677 byte[] as = fa.apply(size); 678 byte[] bs = fb.apply(size); 679 byte[] rs = fr.apply(size); 680 681 for (int ic = 0; ic < INVOC_COUNT; ic++) { 682 for (int i = 0; i < as.length; i++) { 683 byte a = as[i]; 684 byte b = bs[i]; 685 rs[i] = (byte)(ROR_scalar(a,b)); 686 } 687 } 688 689 bh.consume(rs); 690 } 691 692 @Benchmark 693 public void RORMasked(Blackhole bh) { 694 byte[] as = fa.apply(size); 695 byte[] bs = fb.apply(size); 696 byte[] rs = fr.apply(size); 697 boolean[] ms = fm.apply(size); 698 699 for (int ic = 0; ic < INVOC_COUNT; ic++) { 700 for (int i = 0; i < as.length; i++) { 701 byte a = as[i]; 702 byte b = bs[i]; 703 if (ms[i % ms.length]) { 704 rs[i] = (byte)(ROR_scalar(a,b)); 705 } else { 706 rs[i] = a; 707 } 708 } 709 } 710 bh.consume(rs); 711 } 712 713 @Benchmark 714 public void ROL(Blackhole bh) { 715 byte[] as = fa.apply(size); 716 byte[] bs = fb.apply(size); 717 byte[] rs = fr.apply(size); 718 719 for (int ic = 0; ic < INVOC_COUNT; ic++) { 720 for (int i = 0; i < as.length; i++) { 721 byte a = as[i]; 722 byte b = bs[i]; 723 rs[i] = (byte)(ROL_scalar(a,b)); 724 } 725 } 726 727 bh.consume(rs); 728 } 729 730 @Benchmark 731 public void ROLMasked(Blackhole bh) { 732 byte[] as = fa.apply(size); 733 byte[] bs = fb.apply(size); 734 byte[] rs = fr.apply(size); 735 boolean[] ms = fm.apply(size); 736 737 for (int ic = 0; ic < INVOC_COUNT; ic++) { 738 for (int i = 0; i < as.length; i++) { 739 byte a = as[i]; 740 byte b = bs[i]; 741 if (ms[i % ms.length]) { 742 rs[i] = (byte)(ROL_scalar(a,b)); 743 } else { 744 rs[i] = a; 745 } 746 } 747 } 748 bh.consume(rs); 749 } 750 751 @Benchmark 752 public void RORShift(Blackhole bh) { 753 byte[] as = fa.apply(size); 754 byte[] bs = fb.apply(size); 755 byte[] rs = fr.apply(size); 756 757 for (int ic = 0; ic < INVOC_COUNT; ic++) { 758 for (int i = 0; i < as.length; i++) { 759 byte a = as[i]; 760 byte b = bs[i]; 761 rs[i] = (byte)(ROR_scalar(a, b)); 762 } 763 } 764 765 bh.consume(rs); 766 } 767 768 @Benchmark 769 public void RORMaskedShift(Blackhole bh) { 770 byte[] as = fa.apply(size); 771 byte[] bs = fb.apply(size); 772 byte[] rs = fr.apply(size); 773 boolean[] ms = fm.apply(size); 774 775 for (int ic = 0; ic < INVOC_COUNT; ic++) { 776 for (int i = 0; i < as.length; i++) { 777 byte a = as[i]; 778 byte b = bs[i]; 779 boolean m = ms[i % ms.length]; 780 rs[i] = (m ? (byte)(ROR_scalar(a, b)) : a); 781 } 782 } 783 784 bh.consume(rs); 785 } 786 787 @Benchmark 788 public void ROLShift(Blackhole bh) { 789 byte[] as = fa.apply(size); 790 byte[] bs = fb.apply(size); 791 byte[] rs = fr.apply(size); 792 793 for (int ic = 0; ic < INVOC_COUNT; ic++) { 794 for (int i = 0; i < as.length; i++) { 795 byte a = as[i]; 796 byte b = bs[i]; 797 rs[i] = (byte)(ROL_scalar(a, b)); 798 } 799 } 800 801 bh.consume(rs); 802 } 803 804 @Benchmark 805 public void ROLMaskedShift(Blackhole bh) { 806 byte[] as = fa.apply(size); 807 byte[] bs = fb.apply(size); 808 byte[] rs = fr.apply(size); 809 boolean[] ms = fm.apply(size); 810 811 for (int ic = 0; ic < INVOC_COUNT; ic++) { 812 for (int i = 0; i < as.length; i++) { 813 byte a = as[i]; 814 byte b = bs[i]; 815 boolean m = ms[i % ms.length]; 816 rs[i] = (m ? (byte)(ROL_scalar(a, b)) : a); 817 } 818 } 819 820 bh.consume(rs); 821 } 822 823 @Benchmark 824 public void LSHRShiftConst(Blackhole bh) { 825 byte[] as = fa.apply(size); 826 byte[] bs = fb.apply(size); 827 byte[] rs = fr.apply(size); 828 829 for (int ic = 0; ic < INVOC_COUNT; ic++) { 830 for (int i = 0; i < as.length; i++) { 831 byte a = as[i]; 832 byte b = bs[i]; 833 rs[i] = (byte)(((a & 0xFF) >>> CONST_SHIFT)); 834 } 835 } 836 837 bh.consume(rs); 838 } 839 840 @Benchmark 841 public void LSHRMaskedShiftConst(Blackhole bh) { 842 byte[] as = fa.apply(size); 843 byte[] bs = fb.apply(size); 844 byte[] rs = fr.apply(size); 845 boolean[] ms = fm.apply(size); 846 847 for (int ic = 0; ic < INVOC_COUNT; ic++) { 848 for (int i = 0; i < as.length; i++) { 849 byte a = as[i]; 850 byte b = bs[i]; 851 boolean m = ms[i % ms.length]; 852 rs[i] = (m ? (byte)(((a & 0xFF) >>> CONST_SHIFT)) : a); 853 } 854 } 855 856 bh.consume(rs); 857 } 858 859 @Benchmark 860 public void LSHLShiftConst(Blackhole bh) { 861 byte[] as = fa.apply(size); 862 byte[] bs = fb.apply(size); 863 byte[] rs = fr.apply(size); 864 865 for (int ic = 0; ic < INVOC_COUNT; ic++) { 866 for (int i = 0; i < as.length; i++) { 867 byte a = as[i]; 868 byte b = bs[i]; 869 rs[i] = (byte)((a << CONST_SHIFT)); 870 } 871 } 872 873 bh.consume(rs); 874 } 875 876 @Benchmark 877 public void LSHLMaskedShiftConst(Blackhole bh) { 878 byte[] as = fa.apply(size); 879 byte[] bs = fb.apply(size); 880 byte[] rs = fr.apply(size); 881 boolean[] ms = fm.apply(size); 882 883 for (int ic = 0; ic < INVOC_COUNT; ic++) { 884 for (int i = 0; i < as.length; i++) { 885 byte a = as[i]; 886 byte b = bs[i]; 887 boolean m = ms[i % ms.length]; 888 rs[i] = (m ? (byte)((a << CONST_SHIFT)) : a); 889 } 890 } 891 892 bh.consume(rs); 893 } 894 895 @Benchmark 896 public void ASHRShiftConst(Blackhole bh) { 897 byte[] as = fa.apply(size); 898 byte[] bs = fb.apply(size); 899 byte[] rs = fr.apply(size); 900 901 for (int ic = 0; ic < INVOC_COUNT; ic++) { 902 for (int i = 0; i < as.length; i++) { 903 byte a = as[i]; 904 byte b = bs[i]; 905 rs[i] = (byte)((a >> CONST_SHIFT)); 906 } 907 } 908 909 bh.consume(rs); 910 } 911 912 @Benchmark 913 public void ASHRMaskedShiftConst(Blackhole bh) { 914 byte[] as = fa.apply(size); 915 byte[] bs = fb.apply(size); 916 byte[] rs = fr.apply(size); 917 boolean[] ms = fm.apply(size); 918 919 for (int ic = 0; ic < INVOC_COUNT; ic++) { 920 for (int i = 0; i < as.length; i++) { 921 byte a = as[i]; 922 byte b = bs[i]; 923 boolean m = ms[i % ms.length]; 924 rs[i] = (m ? (byte)((a >> CONST_SHIFT)) : a); 925 } 926 } 927 928 bh.consume(rs); 929 } 930 931 @Benchmark 932 public void RORShiftConst(Blackhole bh) { 933 byte[] as = fa.apply(size); 934 byte[] bs = fb.apply(size); 935 byte[] rs = fr.apply(size); 936 937 for (int ic = 0; ic < INVOC_COUNT; ic++) { 938 for (int i = 0; i < as.length; i++) { 939 byte a = as[i]; 940 byte b = bs[i]; 941 rs[i] = (byte)(ROR_scalar(a, CONST_SHIFT)); 942 } 943 } 944 945 bh.consume(rs); 946 } 947 948 @Benchmark 949 public void RORMaskedShiftConst(Blackhole bh) { 950 byte[] as = fa.apply(size); 951 byte[] bs = fb.apply(size); 952 byte[] rs = fr.apply(size); 953 boolean[] ms = fm.apply(size); 954 955 for (int ic = 0; ic < INVOC_COUNT; ic++) { 956 for (int i = 0; i < as.length; i++) { 957 byte a = as[i]; 958 byte b = bs[i]; 959 boolean m = ms[i % ms.length]; 960 rs[i] = (m ? (byte)(ROR_scalar(a, CONST_SHIFT)) : a); 961 } 962 } 963 964 bh.consume(rs); 965 } 966 967 @Benchmark 968 public void ROLShiftConst(Blackhole bh) { 969 byte[] as = fa.apply(size); 970 byte[] bs = fb.apply(size); 971 byte[] rs = fr.apply(size); 972 973 for (int ic = 0; ic < INVOC_COUNT; ic++) { 974 for (int i = 0; i < as.length; i++) { 975 byte a = as[i]; 976 byte b = bs[i]; 977 rs[i] = (byte)(ROL_scalar(a, CONST_SHIFT)); 978 } 979 } 980 981 bh.consume(rs); 982 } 983 984 @Benchmark 985 public void ROLMaskedShiftConst(Blackhole bh) { 986 byte[] as = fa.apply(size); 987 byte[] bs = fb.apply(size); 988 byte[] rs = fr.apply(size); 989 boolean[] ms = fm.apply(size); 990 991 for (int ic = 0; ic < INVOC_COUNT; ic++) { 992 for (int i = 0; i < as.length; i++) { 993 byte a = as[i]; 994 byte b = bs[i]; 995 boolean m = ms[i % ms.length]; 996 rs[i] = (m ? (byte)(ROL_scalar(a, CONST_SHIFT)) : a); 997 } 998 } 999 1000 bh.consume(rs); 1001 } 1002 1003 @Benchmark 1004 public void MIN(Blackhole bh) { 1005 byte[] as = fa.apply(size); 1006 byte[] bs = fb.apply(size); 1007 byte[] rs = fr.apply(size); 1008 1009 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1010 for (int i = 0; i < as.length; i++) { 1011 byte a = as[i]; 1012 byte b = bs[i]; 1013 rs[i] = (byte)(Math.min(a, b)); 1014 } 1015 } 1016 1017 bh.consume(rs); 1018 } 1019 1020 @Benchmark 1021 public void MAX(Blackhole bh) { 1022 byte[] as = fa.apply(size); 1023 byte[] bs = fb.apply(size); 1024 byte[] rs = fr.apply(size); 1025 1026 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1027 for (int i = 0; i < as.length; i++) { 1028 byte a = as[i]; 1029 byte b = bs[i]; 1030 rs[i] = (byte)(Math.max(a, b)); 1031 } 1032 } 1033 1034 bh.consume(rs); 1035 } 1036 1037 @Benchmark 1038 public void ANDLanes(Blackhole bh) { 1039 byte[] as = fa.apply(size); 1040 byte r = -1; 1041 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1042 r = -1; 1043 for (int i = 0; i < as.length; i++) { 1044 r &= as[i]; 1045 } 1046 } 1047 bh.consume(r); 1048 } 1049 1050 @Benchmark 1051 public void ANDMaskedLanes(Blackhole bh) { 1052 byte[] as = fa.apply(size); 1053 boolean[] ms = fm.apply(size); 1054 byte r = -1; 1055 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1056 r = -1; 1057 for (int i = 0; i < as.length; i++) { 1058 if (ms[i % ms.length]) 1059 r &= as[i]; 1060 } 1061 } 1062 bh.consume(r); 1063 } 1064 1065 @Benchmark 1066 public void ORLanes(Blackhole bh) { 1067 byte[] as = fa.apply(size); 1068 byte r = 0; 1069 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1070 r = 0; 1071 for (int i = 0; i < as.length; i++) { 1072 r |= as[i]; 1073 } 1074 } 1075 bh.consume(r); 1076 } 1077 1078 @Benchmark 1079 public void ORMaskedLanes(Blackhole bh) { 1080 byte[] as = fa.apply(size); 1081 boolean[] ms = fm.apply(size); 1082 byte r = 0; 1083 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1084 r = 0; 1085 for (int i = 0; i < as.length; i++) { 1086 if (ms[i % ms.length]) 1087 r |= as[i]; 1088 } 1089 } 1090 bh.consume(r); 1091 } 1092 1093 @Benchmark 1094 public void XORLanes(Blackhole bh) { 1095 byte[] as = fa.apply(size); 1096 byte r = 0; 1097 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1098 r = 0; 1099 for (int i = 0; i < as.length; i++) { 1100 r ^= as[i]; 1101 } 1102 } 1103 bh.consume(r); 1104 } 1105 1106 @Benchmark 1107 public void XORMaskedLanes(Blackhole bh) { 1108 byte[] as = fa.apply(size); 1109 boolean[] ms = fm.apply(size); 1110 byte r = 0; 1111 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1112 r = 0; 1113 for (int i = 0; i < as.length; i++) { 1114 if (ms[i % ms.length]) 1115 r ^= as[i]; 1116 } 1117 } 1118 bh.consume(r); 1119 } 1120 1121 @Benchmark 1122 public void ADDLanes(Blackhole bh) { 1123 byte[] as = fa.apply(size); 1124 byte r = 0; 1125 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1126 r = 0; 1127 for (int i = 0; i < as.length; i++) { 1128 r += as[i]; 1129 } 1130 } 1131 bh.consume(r); 1132 } 1133 1134 @Benchmark 1135 public void ADDMaskedLanes(Blackhole bh) { 1136 byte[] as = fa.apply(size); 1137 boolean[] ms = fm.apply(size); 1138 byte r = 0; 1139 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1140 r = 0; 1141 for (int i = 0; i < as.length; i++) { 1142 if (ms[i % ms.length]) 1143 r += as[i]; 1144 } 1145 } 1146 bh.consume(r); 1147 } 1148 1149 @Benchmark 1150 public void MULLanes(Blackhole bh) { 1151 byte[] as = fa.apply(size); 1152 byte r = 1; 1153 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1154 r = 1; 1155 for (int i = 0; i < as.length; i++) { 1156 r *= as[i]; 1157 } 1158 } 1159 bh.consume(r); 1160 } 1161 1162 @Benchmark 1163 public void MULMaskedLanes(Blackhole bh) { 1164 byte[] as = fa.apply(size); 1165 boolean[] ms = fm.apply(size); 1166 byte r = 1; 1167 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1168 r = 1; 1169 for (int i = 0; i < as.length; i++) { 1170 if (ms[i % ms.length]) 1171 r *= as[i]; 1172 } 1173 } 1174 bh.consume(r); 1175 } 1176 1177 @Benchmark 1178 public void anyTrue(Blackhole bh) { 1179 boolean[] ms = fm.apply(size); 1180 boolean r = false; 1181 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1182 r = false; 1183 for (int i = 0; i < ms.length; i++) { 1184 r |= ms[i]; 1185 } 1186 } 1187 bh.consume(r); 1188 } 1189 1190 @Benchmark 1191 public void allTrue(Blackhole bh) { 1192 boolean[] ms = fm.apply(size); 1193 boolean r = true; 1194 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1195 r = true; 1196 for (int i = 0; i < ms.length; i++) { 1197 r &= ms[i]; 1198 } 1199 } 1200 bh.consume(r); 1201 } 1202 1203 @Benchmark 1204 public void IS_DEFAULT(Blackhole bh) { 1205 byte[] as = fa.apply(size); 1206 boolean r = true; 1207 1208 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1209 for (int i = 0; i < as.length; i++) { 1210 byte a = as[i]; 1211 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation 1212 } 1213 } 1214 1215 bh.consume(r); 1216 } 1217 1218 @Benchmark 1219 public void IS_NEGATIVE(Blackhole bh) { 1220 byte[] as = fa.apply(size); 1221 boolean r = true; 1222 1223 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1224 for (int i = 0; i < as.length; i++) { 1225 byte a = as[i]; 1226 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation 1227 } 1228 } 1229 1230 bh.consume(r); 1231 } 1232 1233 @Benchmark 1234 public void LT(Blackhole bh) { 1235 byte[] as = fa.apply(size); 1236 byte[] bs = fb.apply(size); 1237 boolean r = true; 1238 1239 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1240 for (int i = 0; i < as.length; i++) { 1241 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1242 } 1243 } 1244 1245 bh.consume(r); 1246 } 1247 1248 @Benchmark 1249 public void GT(Blackhole bh) { 1250 byte[] as = fa.apply(size); 1251 byte[] bs = fb.apply(size); 1252 boolean r = true; 1253 1254 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1255 for (int i = 0; i < as.length; i++) { 1256 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1257 } 1258 } 1259 1260 bh.consume(r); 1261 } 1262 1263 @Benchmark 1264 public void EQ(Blackhole bh) { 1265 byte[] as = fa.apply(size); 1266 byte[] bs = fb.apply(size); 1267 boolean r = true; 1268 1269 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1270 for (int i = 0; i < as.length; i++) { 1271 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1272 } 1273 } 1274 1275 bh.consume(r); 1276 } 1277 1278 @Benchmark 1279 public void NE(Blackhole bh) { 1280 byte[] as = fa.apply(size); 1281 byte[] bs = fb.apply(size); 1282 boolean r = true; 1283 1284 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1285 for (int i = 0; i < as.length; i++) { 1286 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1287 } 1288 } 1289 1290 bh.consume(r); 1291 } 1292 1293 @Benchmark 1294 public void LE(Blackhole bh) { 1295 byte[] as = fa.apply(size); 1296 byte[] bs = fb.apply(size); 1297 boolean r = true; 1298 1299 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1300 for (int i = 0; i < as.length; i++) { 1301 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1302 } 1303 } 1304 1305 bh.consume(r); 1306 } 1307 1308 @Benchmark 1309 public void GE(Blackhole bh) { 1310 byte[] as = fa.apply(size); 1311 byte[] bs = fb.apply(size); 1312 boolean r = true; 1313 1314 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1315 for (int i = 0; i < as.length; i++) { 1316 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1317 } 1318 } 1319 1320 bh.consume(r); 1321 } 1322 1323 @Benchmark 1324 public void UNSIGNED_LT(Blackhole bh) { 1325 byte[] as = fa.apply(size); 1326 byte[] bs = fb.apply(size); 1327 boolean r = true; 1328 1329 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1330 for (int i = 0; i < as.length; i++) { 1331 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1332 } 1333 } 1334 1335 bh.consume(r); 1336 } 1337 1338 @Benchmark 1339 public void UNSIGNED_GT(Blackhole bh) { 1340 byte[] as = fa.apply(size); 1341 byte[] bs = fb.apply(size); 1342 boolean r = true; 1343 1344 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1345 for (int i = 0; i < as.length; i++) { 1346 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1347 } 1348 } 1349 1350 bh.consume(r); 1351 } 1352 1353 @Benchmark 1354 public void UNSIGNED_LE(Blackhole bh) { 1355 byte[] as = fa.apply(size); 1356 byte[] bs = fb.apply(size); 1357 boolean r = true; 1358 1359 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1360 for (int i = 0; i < as.length; i++) { 1361 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1362 } 1363 } 1364 1365 bh.consume(r); 1366 } 1367 1368 @Benchmark 1369 public void UNSIGNED_GE(Blackhole bh) { 1370 byte[] as = fa.apply(size); 1371 byte[] bs = fb.apply(size); 1372 boolean r = true; 1373 1374 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1375 for (int i = 0; i < as.length; i++) { 1376 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation 1377 } 1378 } 1379 1380 bh.consume(r); 1381 } 1382 1383 @Benchmark 1384 public void blend(Blackhole bh) { 1385 byte[] as = fa.apply(size); 1386 byte[] bs = fb.apply(size); 1387 byte[] rs = fr.apply(size); 1388 boolean[] ms = fm.apply(size); 1389 1390 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1391 for (int i = 0; i < as.length; i++) { 1392 byte a = as[i]; 1393 byte b = bs[i]; 1394 boolean m = ms[i % ms.length]; 1395 rs[i] = (m ? b : a); 1396 } 1397 } 1398 1399 bh.consume(rs); 1400 } 1401 1402 void rearrangeShared(int window, Blackhole bh) { 1403 byte[] as = fa.apply(size); 1404 int[] order = fs.apply(size); 1405 byte[] rs = fr.apply(size); 1406 1407 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1408 for (int i = 0; i < as.length; i += window) { 1409 for (int j = 0; j < window; j++) { 1410 byte a = as[i+j]; 1411 int pos = order[j]; 1412 rs[i + pos] = a; 1413 } 1414 } 1415 } 1416 1417 bh.consume(rs); 1418 } 1419 1420 @Benchmark 1421 public void rearrange064(Blackhole bh) { 1422 int window = 64 / Byte.SIZE; 1423 rearrangeShared(window, bh); 1424 } 1425 1426 @Benchmark 1427 public void rearrange128(Blackhole bh) { 1428 int window = 128 / Byte.SIZE; 1429 rearrangeShared(window, bh); 1430 } 1431 1432 @Benchmark 1433 public void rearrange256(Blackhole bh) { 1434 int window = 256 / Byte.SIZE; 1435 rearrangeShared(window, bh); 1436 } 1437 1438 @Benchmark 1439 public void rearrange512(Blackhole bh) { 1440 int window = 512 / Byte.SIZE; 1441 rearrangeShared(window, bh); 1442 } 1443 1444 @Benchmark 1445 public void compressScalar(Blackhole bh) { 1446 byte[] as = fa.apply(size); 1447 byte[] rs = new byte[size]; 1448 boolean[] im = fmt.apply(size); 1449 1450 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1451 for (int i = 0, j = 0; i < as.length; i++) { 1452 if (im[i]) { 1453 rs[j++] = as[i]; 1454 } 1455 } 1456 } 1457 1458 bh.consume(rs); 1459 } 1460 1461 @Benchmark 1462 public void expandScalar(Blackhole bh) { 1463 byte[] as = fa.apply(size); 1464 byte[] rs = new byte[size]; 1465 boolean[] im = fmt.apply(size); 1466 1467 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1468 for (int i = 0, j = 0; i < as.length; i++) { 1469 if (im[i]) { 1470 rs[i++] = as[j++]; 1471 } 1472 } 1473 } 1474 1475 bh.consume(rs); 1476 } 1477 1478 @Benchmark 1479 public void maskCompressScalar(Blackhole bh) { 1480 boolean[] im = fmt.apply(size); 1481 boolean[] rm = new boolean[size]; 1482 1483 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1484 for (int i = 0, j = 0; i < im.length; i++) { 1485 if (im[i]) { 1486 rm[j++] = im[i]; 1487 } 1488 } 1489 } 1490 1491 bh.consume(rm); 1492 } 1493 1494 void broadcastShared(int window, Blackhole bh) { 1495 byte[] as = fa.apply(size); 1496 byte[] rs = fr.apply(size); 1497 1498 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1499 for (int i = 0; i < as.length; i += window) { 1500 int idx = i; 1501 for (int j = 0; j < window; j++) { 1502 rs[j] = as[idx]; 1503 } 1504 } 1505 } 1506 1507 bh.consume(rs); 1508 } 1509 1510 @Benchmark 1511 public void broadcast064(Blackhole bh) { 1512 int window = 64 / Byte.SIZE; 1513 broadcastShared(window, bh); 1514 } 1515 1516 @Benchmark 1517 public void broadcast128(Blackhole bh) { 1518 int window = 128 / Byte.SIZE; 1519 broadcastShared(window, bh); 1520 } 1521 1522 @Benchmark 1523 public void broadcast256(Blackhole bh) { 1524 int window = 256 / Byte.SIZE; 1525 broadcastShared(window, bh); 1526 } 1527 1528 @Benchmark 1529 public void broadcast512(Blackhole bh) { 1530 int window = 512 / Byte.SIZE; 1531 broadcastShared(window, bh); 1532 } 1533 1534 @Benchmark 1535 public void zero(Blackhole bh) { 1536 byte[] as = fa.apply(size); 1537 1538 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1539 for (int i = 0; i < as.length; i++) { 1540 as[i] = (byte)0; 1541 } 1542 } 1543 1544 bh.consume(as); 1545 } 1546 1547 @Benchmark 1548 public void BITWISE_BLEND(Blackhole bh) { 1549 byte[] as = fa.apply(size); 1550 byte[] bs = fb.apply(size); 1551 byte[] cs = fc.apply(size); 1552 byte[] rs = fr.apply(size); 1553 1554 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1555 for (int i = 0; i < as.length; i++) { 1556 byte a = as[i]; 1557 byte b = bs[i]; 1558 byte c = cs[i]; 1559 rs[i] = (byte)((a&~(c))|(b&c)); 1560 } 1561 } 1562 1563 bh.consume(rs); 1564 } 1565 1566 @Benchmark 1567 public void BITWISE_BLENDMasked(Blackhole bh) { 1568 byte[] as = fa.apply(size); 1569 byte[] bs = fb.apply(size); 1570 byte[] cs = fc.apply(size); 1571 byte[] rs = fr.apply(size); 1572 boolean[] ms = fm.apply(size); 1573 1574 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1575 for (int i = 0; i < as.length; i++) { 1576 byte a = as[i]; 1577 byte b = bs[i]; 1578 byte c = cs[i]; 1579 if (ms[i % ms.length]) { 1580 rs[i] = (byte)((a&~(c))|(b&c)); 1581 } else { 1582 rs[i] = a; 1583 } 1584 } 1585 } 1586 bh.consume(rs); 1587 } 1588 @Benchmark 1589 public void NEG(Blackhole bh) { 1590 byte[] as = fa.apply(size); 1591 byte[] rs = fr.apply(size); 1592 1593 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1594 for (int i = 0; i < as.length; i++) { 1595 byte a = as[i]; 1596 rs[i] = (byte)(-((byte)a)); 1597 } 1598 } 1599 1600 bh.consume(rs); 1601 } 1602 1603 @Benchmark 1604 public void NEGMasked(Blackhole bh) { 1605 byte[] as = fa.apply(size); 1606 byte[] rs = fr.apply(size); 1607 boolean[] ms = fm.apply(size); 1608 1609 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1610 for (int i = 0; i < as.length; i++) { 1611 byte a = as[i]; 1612 boolean m = ms[i % ms.length]; 1613 rs[i] = (m ? (byte)(-((byte)a)) : a); 1614 } 1615 } 1616 1617 bh.consume(rs); 1618 } 1619 @Benchmark 1620 public void ABS(Blackhole bh) { 1621 byte[] as = fa.apply(size); 1622 byte[] rs = fr.apply(size); 1623 1624 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1625 for (int i = 0; i < as.length; i++) { 1626 byte a = as[i]; 1627 rs[i] = (byte)(Math.abs((byte)a)); 1628 } 1629 } 1630 1631 bh.consume(rs); 1632 } 1633 1634 @Benchmark 1635 public void ABSMasked(Blackhole bh) { 1636 byte[] as = fa.apply(size); 1637 byte[] rs = fr.apply(size); 1638 boolean[] ms = fm.apply(size); 1639 1640 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1641 for (int i = 0; i < as.length; i++) { 1642 byte a = as[i]; 1643 boolean m = ms[i % ms.length]; 1644 rs[i] = (m ? (byte)(Math.abs((byte)a)) : a); 1645 } 1646 } 1647 1648 bh.consume(rs); 1649 } 1650 @Benchmark 1651 public void NOT(Blackhole bh) { 1652 byte[] as = fa.apply(size); 1653 byte[] rs = fr.apply(size); 1654 1655 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1656 for (int i = 0; i < as.length; i++) { 1657 byte a = as[i]; 1658 rs[i] = (byte)(~((byte)a)); 1659 } 1660 } 1661 1662 bh.consume(rs); 1663 } 1664 1665 @Benchmark 1666 public void NOTMasked(Blackhole bh) { 1667 byte[] as = fa.apply(size); 1668 byte[] rs = fr.apply(size); 1669 boolean[] ms = fm.apply(size); 1670 1671 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1672 for (int i = 0; i < as.length; i++) { 1673 byte a = as[i]; 1674 boolean m = ms[i % ms.length]; 1675 rs[i] = (m ? (byte)(~((byte)a)) : a); 1676 } 1677 } 1678 1679 bh.consume(rs); 1680 } 1681 @Benchmark 1682 public void ZOMO(Blackhole bh) { 1683 byte[] as = fa.apply(size); 1684 byte[] rs = fr.apply(size); 1685 1686 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1687 for (int i = 0; i < as.length; i++) { 1688 byte a = as[i]; 1689 rs[i] = (byte)((a==0?0:-1)); 1690 } 1691 } 1692 1693 bh.consume(rs); 1694 } 1695 1696 @Benchmark 1697 public void ZOMOMasked(Blackhole bh) { 1698 byte[] as = fa.apply(size); 1699 byte[] rs = fr.apply(size); 1700 boolean[] ms = fm.apply(size); 1701 1702 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1703 for (int i = 0; i < as.length; i++) { 1704 byte a = as[i]; 1705 boolean m = ms[i % ms.length]; 1706 rs[i] = (m ? (byte)((a==0?0:-1)) : a); 1707 } 1708 } 1709 1710 bh.consume(rs); 1711 } 1712 @Benchmark 1713 public void BIT_COUNT(Blackhole bh) { 1714 byte[] as = fa.apply(size); 1715 byte[] rs = fr.apply(size); 1716 1717 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1718 for (int i = 0; i < as.length; i++) { 1719 byte a = as[i]; 1720 rs[i] = (byte)(Integer.bitCount((int)a & 0xFF)); 1721 } 1722 } 1723 1724 bh.consume(rs); 1725 } 1726 1727 @Benchmark 1728 public void BIT_COUNTMasked(Blackhole bh) { 1729 byte[] as = fa.apply(size); 1730 byte[] rs = fr.apply(size); 1731 boolean[] ms = fm.apply(size); 1732 1733 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1734 for (int i = 0; i < as.length; i++) { 1735 byte a = as[i]; 1736 boolean m = ms[i % ms.length]; 1737 rs[i] = (m ? (byte)(Integer.bitCount((int)a & 0xFF)) : a); 1738 } 1739 } 1740 1741 bh.consume(rs); 1742 } 1743 @Benchmark 1744 public void TRAILING_ZEROS_COUNT(Blackhole bh) { 1745 byte[] as = fa.apply(size); 1746 byte[] rs = fr.apply(size); 1747 1748 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1749 for (int i = 0; i < as.length; i++) { 1750 byte a = as[i]; 1751 rs[i] = (byte)(TRAILING_ZEROS_COUNT_scalar(a)); 1752 } 1753 } 1754 1755 bh.consume(rs); 1756 } 1757 1758 @Benchmark 1759 public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) { 1760 byte[] as = fa.apply(size); 1761 byte[] rs = fr.apply(size); 1762 boolean[] ms = fm.apply(size); 1763 1764 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1765 for (int i = 0; i < as.length; i++) { 1766 byte a = as[i]; 1767 boolean m = ms[i % ms.length]; 1768 rs[i] = (m ? (byte)(TRAILING_ZEROS_COUNT_scalar(a)) : a); 1769 } 1770 } 1771 1772 bh.consume(rs); 1773 } 1774 @Benchmark 1775 public void LEADING_ZEROS_COUNT(Blackhole bh) { 1776 byte[] as = fa.apply(size); 1777 byte[] rs = fr.apply(size); 1778 1779 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1780 for (int i = 0; i < as.length; i++) { 1781 byte a = as[i]; 1782 rs[i] = (byte)(LEADING_ZEROS_COUNT_scalar(a)); 1783 } 1784 } 1785 1786 bh.consume(rs); 1787 } 1788 1789 @Benchmark 1790 public void LEADING_ZEROS_COUNTMasked(Blackhole bh) { 1791 byte[] as = fa.apply(size); 1792 byte[] rs = fr.apply(size); 1793 boolean[] ms = fm.apply(size); 1794 1795 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1796 for (int i = 0; i < as.length; i++) { 1797 byte a = as[i]; 1798 boolean m = ms[i % ms.length]; 1799 rs[i] = (m ? (byte)(LEADING_ZEROS_COUNT_scalar(a)) : a); 1800 } 1801 } 1802 1803 bh.consume(rs); 1804 } 1805 @Benchmark 1806 public void REVERSE(Blackhole bh) { 1807 byte[] as = fa.apply(size); 1808 byte[] rs = fr.apply(size); 1809 1810 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1811 for (int i = 0; i < as.length; i++) { 1812 byte a = as[i]; 1813 rs[i] = (byte)(REVERSE_scalar(a)); 1814 } 1815 } 1816 1817 bh.consume(rs); 1818 } 1819 1820 @Benchmark 1821 public void REVERSEMasked(Blackhole bh) { 1822 byte[] as = fa.apply(size); 1823 byte[] rs = fr.apply(size); 1824 boolean[] ms = fm.apply(size); 1825 1826 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1827 for (int i = 0; i < as.length; i++) { 1828 byte a = as[i]; 1829 boolean m = ms[i % ms.length]; 1830 rs[i] = (m ? (byte)(REVERSE_scalar(a)) : a); 1831 } 1832 } 1833 1834 bh.consume(rs); 1835 } 1836 @Benchmark 1837 public void REVERSE_BYTES(Blackhole bh) { 1838 byte[] as = fa.apply(size); 1839 byte[] rs = fr.apply(size); 1840 1841 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1842 for (int i = 0; i < as.length; i++) { 1843 byte a = as[i]; 1844 rs[i] = (byte)(a); 1845 } 1846 } 1847 1848 bh.consume(rs); 1849 } 1850 1851 @Benchmark 1852 public void REVERSE_BYTESMasked(Blackhole bh) { 1853 byte[] as = fa.apply(size); 1854 byte[] rs = fr.apply(size); 1855 boolean[] ms = fm.apply(size); 1856 1857 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1858 for (int i = 0; i < as.length; i++) { 1859 byte a = as[i]; 1860 boolean m = ms[i % ms.length]; 1861 rs[i] = (m ? (byte)(a) : a); 1862 } 1863 } 1864 1865 bh.consume(rs); 1866 } 1867 }