1 /*
2 * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package org.openjdk.bench.jdk.incubator.vector.operation;
25
26 // -- This file was mechanically generated: Do not edit! -- //
27
28 import java.util.concurrent.TimeUnit;
29 import java.util.function.IntFunction;
30 import jdk.incubator.vector.VectorMath;
31
32 import org.openjdk.jmh.annotations.*;
33 import org.openjdk.jmh.infra.Blackhole;
34
35 @BenchmarkMode(Mode.Throughput)
36 @OutputTimeUnit(TimeUnit.MILLISECONDS)
37 @State(Scope.Benchmark)
38 @Warmup(iterations = 3, time = 1)
39 @Measurement(iterations = 5, time = 1)
40 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
41 public class ByteScalar extends AbstractVectorBenchmark {
42 static final int INVOC_COUNT = 1; // To align with vector benchmarks.
43
44 private static final byte CONST_SHIFT = Byte.SIZE / 2;
45
46 @Param("1024")
47 int size;
48
49 byte[] fill(IntFunction<Byte> f) {
50 byte[] array = new byte[size];
51 for (int i = 0; i < array.length; i++) {
52 array[i] = f.apply(i);
53 }
54 return array;
55 }
56
57 static byte bits(byte e) {
58 return e;
59 }
60
61 byte[] as, bs, cs, rs;
62 boolean[] ms, mt, rms;
63 int[] ss;
64
65 @Setup
66 public void init() {
67 as = fill(i -> (byte)(2*i));
68 bs = fill(i -> (byte)(i+1));
69 cs = fill(i -> (byte)(i+5));
70 rs = fill(i -> (byte)0);
71 ms = fillMask(size, i -> (i % 2) == 0);
72 mt = fillMask(size, i -> true);
73 rms = fillMask(size, i -> false);
74
75 ss = fillInt(size, i -> RAND.nextInt(Math.max(i,1)));
76 }
77
78 final IntFunction<byte[]> fa = vl -> as;
79 final IntFunction<byte[]> fb = vl -> bs;
80 final IntFunction<byte[]> fc = vl -> cs;
81 final IntFunction<byte[]> fr = vl -> rs;
82 final IntFunction<boolean[]> fm = vl -> ms;
83 final IntFunction<boolean[]> fmt = vl -> mt;
84 final IntFunction<boolean[]> fmr = vl -> rms;
85 final IntFunction<int[]> fs = vl -> ss;
86
87 static boolean eq(byte a, byte b) {
88 return a == b;
89 }
90
91 static boolean neq(byte a, byte b) {
92 return a != b;
93 }
94
95 static boolean lt(byte a, byte b) {
96 return a < b;
97 }
98
99 static boolean le(byte a, byte b) {
100 return a <= b;
101 }
102
103 static boolean gt(byte a, byte b) {
104 return a > b;
105 }
106
107 static boolean ge(byte a, byte b) {
108 return a >= b;
109 }
110
111 static boolean ult(byte a, byte b) {
112 return Byte.compareUnsigned(a, b) < 0;
113 }
114
115 static boolean ule(byte a, byte b) {
116 return Byte.compareUnsigned(a, b) <= 0;
117 }
118
119 static boolean ugt(byte a, byte b) {
120 return Byte.compareUnsigned(a, b) > 0;
121 }
122
123 static boolean uge(byte a, byte b) {
124 return Byte.compareUnsigned(a, b) >= 0;
125 }
126
127 static byte ROL_scalar(byte a, byte b) {
128 return (byte)(((((byte)a) & 0xFF) << (b & 7)) | ((((byte)a) & 0xFF) >>> (8 - (b & 7))));
129 }
130
131 static byte ROR_scalar(byte a, byte b) {
132 return (byte)(((((byte)a) & 0xFF) >>> (b & 7)) | ((((byte)a) & 0xFF) << (8 - (b & 7))));
133 }
134
135 static byte TRAILING_ZEROS_COUNT_scalar(byte a) {
136 return (byte) (a != 0 ? Integer.numberOfTrailingZeros(a) : 8);
137 }
138
139 static byte LEADING_ZEROS_COUNT_scalar(byte a) {
140 return (byte) (a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0);
141 }
142
143 static byte REVERSE_scalar(byte a) {
144 byte b = ROL_scalar(a, (byte) 4);
145 b = (byte)(((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
146 b = (byte)(((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
147 return b;
148 }
149
150 @Benchmark
151 public void ADD(Blackhole bh) {
152 byte[] as = fa.apply(size);
153 byte[] bs = fb.apply(size);
154 byte[] rs = fr.apply(size);
155
156 for (int ic = 0; ic < INVOC_COUNT; ic++) {
157 for (int i = 0; i < as.length; i++) {
158 byte a = as[i];
159 byte b = bs[i];
160 rs[i] = (byte)(a + b);
161 }
162 }
163
164 bh.consume(rs);
165 }
166
167 @Benchmark
168 public void ADDMasked(Blackhole bh) {
169 byte[] as = fa.apply(size);
170 byte[] bs = fb.apply(size);
171 byte[] rs = fr.apply(size);
172 boolean[] ms = fm.apply(size);
173
174 for (int ic = 0; ic < INVOC_COUNT; ic++) {
175 for (int i = 0; i < as.length; i++) {
176 byte a = as[i];
177 byte b = bs[i];
178 if (ms[i % ms.length]) {
179 rs[i] = (byte)(a + b);
180 } else {
181 rs[i] = a;
182 }
183 }
184 }
185 bh.consume(rs);
186 }
187
188 @Benchmark
189 public void SUB(Blackhole bh) {
190 byte[] as = fa.apply(size);
191 byte[] bs = fb.apply(size);
192 byte[] rs = fr.apply(size);
193
194 for (int ic = 0; ic < INVOC_COUNT; ic++) {
195 for (int i = 0; i < as.length; i++) {
196 byte a = as[i];
197 byte b = bs[i];
198 rs[i] = (byte)(a - b);
199 }
200 }
201
202 bh.consume(rs);
203 }
204
205 @Benchmark
206 public void SUBMasked(Blackhole bh) {
207 byte[] as = fa.apply(size);
208 byte[] bs = fb.apply(size);
209 byte[] rs = fr.apply(size);
210 boolean[] ms = fm.apply(size);
211
212 for (int ic = 0; ic < INVOC_COUNT; ic++) {
213 for (int i = 0; i < as.length; i++) {
214 byte a = as[i];
215 byte b = bs[i];
216 if (ms[i % ms.length]) {
217 rs[i] = (byte)(a - b);
218 } else {
219 rs[i] = a;
220 }
221 }
222 }
223 bh.consume(rs);
224 }
225
226 @Benchmark
227 public void MUL(Blackhole bh) {
228 byte[] as = fa.apply(size);
229 byte[] bs = fb.apply(size);
230 byte[] rs = fr.apply(size);
231
232 for (int ic = 0; ic < INVOC_COUNT; ic++) {
233 for (int i = 0; i < as.length; i++) {
234 byte a = as[i];
235 byte b = bs[i];
236 rs[i] = (byte)(a * b);
237 }
238 }
239
240 bh.consume(rs);
241 }
242
243 @Benchmark
244 public void MULMasked(Blackhole bh) {
245 byte[] as = fa.apply(size);
246 byte[] bs = fb.apply(size);
247 byte[] rs = fr.apply(size);
248 boolean[] ms = fm.apply(size);
249
250 for (int ic = 0; ic < INVOC_COUNT; ic++) {
251 for (int i = 0; i < as.length; i++) {
252 byte a = as[i];
253 byte b = bs[i];
254 if (ms[i % ms.length]) {
255 rs[i] = (byte)(a * b);
256 } else {
257 rs[i] = a;
258 }
259 }
260 }
261 bh.consume(rs);
262 }
263
264 @Benchmark
265 public void FIRST_NONZERO(Blackhole bh) {
266 byte[] as = fa.apply(size);
267 byte[] bs = fb.apply(size);
268 byte[] rs = fr.apply(size);
269
270 for (int ic = 0; ic < INVOC_COUNT; ic++) {
271 for (int i = 0; i < as.length; i++) {
272 byte a = as[i];
273 byte b = bs[i];
274 rs[i] = (byte)((a)!=0?a:b);
275 }
276 }
277
278 bh.consume(rs);
279 }
280
281 @Benchmark
282 public void FIRST_NONZEROMasked(Blackhole bh) {
283 byte[] as = fa.apply(size);
284 byte[] bs = fb.apply(size);
285 byte[] rs = fr.apply(size);
286 boolean[] ms = fm.apply(size);
287
288 for (int ic = 0; ic < INVOC_COUNT; ic++) {
289 for (int i = 0; i < as.length; i++) {
290 byte a = as[i];
291 byte b = bs[i];
292 if (ms[i % ms.length]) {
293 rs[i] = (byte)((a)!=0?a:b);
294 } else {
295 rs[i] = a;
296 }
297 }
298 }
299 bh.consume(rs);
300 }
301
302 @Benchmark
303 public void AND(Blackhole bh) {
304 byte[] as = fa.apply(size);
305 byte[] bs = fb.apply(size);
306 byte[] rs = fr.apply(size);
307
308 for (int ic = 0; ic < INVOC_COUNT; ic++) {
309 for (int i = 0; i < as.length; i++) {
310 byte a = as[i];
311 byte b = bs[i];
312 rs[i] = (byte)(a & b);
313 }
314 }
315
316 bh.consume(rs);
317 }
318
319 @Benchmark
320 public void ANDMasked(Blackhole bh) {
321 byte[] as = fa.apply(size);
322 byte[] bs = fb.apply(size);
323 byte[] rs = fr.apply(size);
324 boolean[] ms = fm.apply(size);
325
326 for (int ic = 0; ic < INVOC_COUNT; ic++) {
327 for (int i = 0; i < as.length; i++) {
328 byte a = as[i];
329 byte b = bs[i];
330 if (ms[i % ms.length]) {
331 rs[i] = (byte)(a & b);
332 } else {
333 rs[i] = a;
334 }
335 }
336 }
337 bh.consume(rs);
338 }
339
340 @Benchmark
341 public void AND_NOT(Blackhole bh) {
342 byte[] as = fa.apply(size);
343 byte[] bs = fb.apply(size);
344 byte[] rs = fr.apply(size);
345
346 for (int ic = 0; ic < INVOC_COUNT; ic++) {
347 for (int i = 0; i < as.length; i++) {
348 byte a = as[i];
349 byte b = bs[i];
350 rs[i] = (byte)(a & ~b);
351 }
352 }
353
354 bh.consume(rs);
355 }
356
357 @Benchmark
358 public void AND_NOTMasked(Blackhole bh) {
359 byte[] as = fa.apply(size);
360 byte[] bs = fb.apply(size);
361 byte[] rs = fr.apply(size);
362 boolean[] ms = fm.apply(size);
363
364 for (int ic = 0; ic < INVOC_COUNT; ic++) {
365 for (int i = 0; i < as.length; i++) {
366 byte a = as[i];
367 byte b = bs[i];
368 if (ms[i % ms.length]) {
369 rs[i] = (byte)(a & ~b);
370 } else {
371 rs[i] = a;
372 }
373 }
374 }
375 bh.consume(rs);
376 }
377
378 @Benchmark
379 public void OR(Blackhole bh) {
380 byte[] as = fa.apply(size);
381 byte[] bs = fb.apply(size);
382 byte[] rs = fr.apply(size);
383
384 for (int ic = 0; ic < INVOC_COUNT; ic++) {
385 for (int i = 0; i < as.length; i++) {
386 byte a = as[i];
387 byte b = bs[i];
388 rs[i] = (byte)(a | b);
389 }
390 }
391
392 bh.consume(rs);
393 }
394
395 @Benchmark
396 public void ORMasked(Blackhole bh) {
397 byte[] as = fa.apply(size);
398 byte[] bs = fb.apply(size);
399 byte[] rs = fr.apply(size);
400 boolean[] ms = fm.apply(size);
401
402 for (int ic = 0; ic < INVOC_COUNT; ic++) {
403 for (int i = 0; i < as.length; i++) {
404 byte a = as[i];
405 byte b = bs[i];
406 if (ms[i % ms.length]) {
407 rs[i] = (byte)(a | b);
408 } else {
409 rs[i] = a;
410 }
411 }
412 }
413 bh.consume(rs);
414 }
415
416 @Benchmark
417 public void XOR(Blackhole bh) {
418 byte[] as = fa.apply(size);
419 byte[] bs = fb.apply(size);
420 byte[] rs = fr.apply(size);
421
422 for (int ic = 0; ic < INVOC_COUNT; ic++) {
423 for (int i = 0; i < as.length; i++) {
424 byte a = as[i];
425 byte b = bs[i];
426 rs[i] = (byte)(a ^ b);
427 }
428 }
429
430 bh.consume(rs);
431 }
432
433 @Benchmark
434 public void XORMasked(Blackhole bh) {
435 byte[] as = fa.apply(size);
436 byte[] bs = fb.apply(size);
437 byte[] rs = fr.apply(size);
438 boolean[] ms = fm.apply(size);
439
440 for (int ic = 0; ic < INVOC_COUNT; ic++) {
441 for (int i = 0; i < as.length; i++) {
442 byte a = as[i];
443 byte b = bs[i];
444 if (ms[i % ms.length]) {
445 rs[i] = (byte)(a ^ b);
446 } else {
447 rs[i] = a;
448 }
449 }
450 }
451 bh.consume(rs);
452 }
453
454 @Benchmark
455 public void LSHL(Blackhole bh) {
456 byte[] as = fa.apply(size);
457 byte[] bs = fb.apply(size);
458 byte[] rs = fr.apply(size);
459
460 for (int ic = 0; ic < INVOC_COUNT; ic++) {
461 for (int i = 0; i < as.length; i++) {
462 byte a = as[i];
463 byte b = bs[i];
464 rs[i] = (byte)((a << (b & 0x7)));
465 }
466 }
467
468 bh.consume(rs);
469 }
470
471 @Benchmark
472 public void LSHLMasked(Blackhole bh) {
473 byte[] as = fa.apply(size);
474 byte[] bs = fb.apply(size);
475 byte[] rs = fr.apply(size);
476 boolean[] ms = fm.apply(size);
477
478 for (int ic = 0; ic < INVOC_COUNT; ic++) {
479 for (int i = 0; i < as.length; i++) {
480 byte a = as[i];
481 byte b = bs[i];
482 if (ms[i % ms.length]) {
483 rs[i] = (byte)((a << (b & 0x7)));
484 } else {
485 rs[i] = a;
486 }
487 }
488 }
489 bh.consume(rs);
490 }
491
492 @Benchmark
493 public void ASHR(Blackhole bh) {
494 byte[] as = fa.apply(size);
495 byte[] bs = fb.apply(size);
496 byte[] rs = fr.apply(size);
497
498 for (int ic = 0; ic < INVOC_COUNT; ic++) {
499 for (int i = 0; i < as.length; i++) {
500 byte a = as[i];
501 byte b = bs[i];
502 rs[i] = (byte)((a >> (b & 0x7)));
503 }
504 }
505
506 bh.consume(rs);
507 }
508
509 @Benchmark
510 public void ASHRMasked(Blackhole bh) {
511 byte[] as = fa.apply(size);
512 byte[] bs = fb.apply(size);
513 byte[] rs = fr.apply(size);
514 boolean[] ms = fm.apply(size);
515
516 for (int ic = 0; ic < INVOC_COUNT; ic++) {
517 for (int i = 0; i < as.length; i++) {
518 byte a = as[i];
519 byte b = bs[i];
520 if (ms[i % ms.length]) {
521 rs[i] = (byte)((a >> (b & 0x7)));
522 } else {
523 rs[i] = a;
524 }
525 }
526 }
527 bh.consume(rs);
528 }
529
530 @Benchmark
531 public void LSHR(Blackhole bh) {
532 byte[] as = fa.apply(size);
533 byte[] bs = fb.apply(size);
534 byte[] rs = fr.apply(size);
535
536 for (int ic = 0; ic < INVOC_COUNT; ic++) {
537 for (int i = 0; i < as.length; i++) {
538 byte a = as[i];
539 byte b = bs[i];
540 rs[i] = (byte)(((a & 0xFF) >>> (b & 0x7)));
541 }
542 }
543
544 bh.consume(rs);
545 }
546
547 @Benchmark
548 public void LSHRMasked(Blackhole bh) {
549 byte[] as = fa.apply(size);
550 byte[] bs = fb.apply(size);
551 byte[] rs = fr.apply(size);
552 boolean[] ms = fm.apply(size);
553
554 for (int ic = 0; ic < INVOC_COUNT; ic++) {
555 for (int i = 0; i < as.length; i++) {
556 byte a = as[i];
557 byte b = bs[i];
558 if (ms[i % ms.length]) {
559 rs[i] = (byte)(((a & 0xFF) >>> (b & 0x7)));
560 } else {
561 rs[i] = a;
562 }
563 }
564 }
565 bh.consume(rs);
566 }
567
568 @Benchmark
569 public void LSHLShift(Blackhole bh) {
570 byte[] as = fa.apply(size);
571 byte[] bs = fb.apply(size);
572 byte[] rs = fr.apply(size);
573
574 for (int ic = 0; ic < INVOC_COUNT; ic++) {
575 for (int i = 0; i < as.length; i++) {
576 byte a = as[i];
577 byte b = bs[i];
578 rs[i] = (byte)((a << (b & 7)));
579 }
580 }
581
582 bh.consume(rs);
583 }
584
585 @Benchmark
586 public void LSHLMaskedShift(Blackhole bh) {
587 byte[] as = fa.apply(size);
588 byte[] bs = fb.apply(size);
589 byte[] rs = fr.apply(size);
590 boolean[] ms = fm.apply(size);
591
592 for (int ic = 0; ic < INVOC_COUNT; ic++) {
593 for (int i = 0; i < as.length; i++) {
594 byte a = as[i];
595 byte b = bs[i];
596 boolean m = ms[i % ms.length];
597 rs[i] = (m ? (byte)((a << (b & 7))) : a);
598 }
599 }
600
601 bh.consume(rs);
602 }
603
604 @Benchmark
605 public void LSHRShift(Blackhole bh) {
606 byte[] as = fa.apply(size);
607 byte[] bs = fb.apply(size);
608 byte[] rs = fr.apply(size);
609
610 for (int ic = 0; ic < INVOC_COUNT; ic++) {
611 for (int i = 0; i < as.length; i++) {
612 byte a = as[i];
613 byte b = bs[i];
614 rs[i] = (byte)(((a & 0xFF) >>> (b & 7)));
615 }
616 }
617
618 bh.consume(rs);
619 }
620
621 @Benchmark
622 public void LSHRMaskedShift(Blackhole bh) {
623 byte[] as = fa.apply(size);
624 byte[] bs = fb.apply(size);
625 byte[] rs = fr.apply(size);
626 boolean[] ms = fm.apply(size);
627
628 for (int ic = 0; ic < INVOC_COUNT; ic++) {
629 for (int i = 0; i < as.length; i++) {
630 byte a = as[i];
631 byte b = bs[i];
632 boolean m = ms[i % ms.length];
633 rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a);
634 }
635 }
636
637 bh.consume(rs);
638 }
639
640 @Benchmark
641 public void ASHRShift(Blackhole bh) {
642 byte[] as = fa.apply(size);
643 byte[] bs = fb.apply(size);
644 byte[] rs = fr.apply(size);
645
646 for (int ic = 0; ic < INVOC_COUNT; ic++) {
647 for (int i = 0; i < as.length; i++) {
648 byte a = as[i];
649 byte b = bs[i];
650 rs[i] = (byte)((a >> (b & 7)));
651 }
652 }
653
654 bh.consume(rs);
655 }
656
657 @Benchmark
658 public void ASHRMaskedShift(Blackhole bh) {
659 byte[] as = fa.apply(size);
660 byte[] bs = fb.apply(size);
661 byte[] rs = fr.apply(size);
662 boolean[] ms = fm.apply(size);
663
664 for (int ic = 0; ic < INVOC_COUNT; ic++) {
665 for (int i = 0; i < as.length; i++) {
666 byte a = as[i];
667 byte b = bs[i];
668 boolean m = ms[i % ms.length];
669 rs[i] = (m ? (byte)((a >> (b & 7))) : a);
670 }
671 }
672
673 bh.consume(rs);
674 }
675
676 @Benchmark
677 public void ROR(Blackhole bh) {
678 byte[] as = fa.apply(size);
679 byte[] bs = fb.apply(size);
680 byte[] rs = fr.apply(size);
681
682 for (int ic = 0; ic < INVOC_COUNT; ic++) {
683 for (int i = 0; i < as.length; i++) {
684 byte a = as[i];
685 byte b = bs[i];
686 rs[i] = (byte)(ROR_scalar(a,b));
687 }
688 }
689
690 bh.consume(rs);
691 }
692
693 @Benchmark
694 public void RORMasked(Blackhole bh) {
695 byte[] as = fa.apply(size);
696 byte[] bs = fb.apply(size);
697 byte[] rs = fr.apply(size);
698 boolean[] ms = fm.apply(size);
699
700 for (int ic = 0; ic < INVOC_COUNT; ic++) {
701 for (int i = 0; i < as.length; i++) {
702 byte a = as[i];
703 byte b = bs[i];
704 if (ms[i % ms.length]) {
705 rs[i] = (byte)(ROR_scalar(a,b));
706 } else {
707 rs[i] = a;
708 }
709 }
710 }
711 bh.consume(rs);
712 }
713
714 @Benchmark
715 public void ROL(Blackhole bh) {
716 byte[] as = fa.apply(size);
717 byte[] bs = fb.apply(size);
718 byte[] rs = fr.apply(size);
719
720 for (int ic = 0; ic < INVOC_COUNT; ic++) {
721 for (int i = 0; i < as.length; i++) {
722 byte a = as[i];
723 byte b = bs[i];
724 rs[i] = (byte)(ROL_scalar(a,b));
725 }
726 }
727
728 bh.consume(rs);
729 }
730
731 @Benchmark
732 public void ROLMasked(Blackhole bh) {
733 byte[] as = fa.apply(size);
734 byte[] bs = fb.apply(size);
735 byte[] rs = fr.apply(size);
736 boolean[] ms = fm.apply(size);
737
738 for (int ic = 0; ic < INVOC_COUNT; ic++) {
739 for (int i = 0; i < as.length; i++) {
740 byte a = as[i];
741 byte b = bs[i];
742 if (ms[i % ms.length]) {
743 rs[i] = (byte)(ROL_scalar(a,b));
744 } else {
745 rs[i] = a;
746 }
747 }
748 }
749 bh.consume(rs);
750 }
751
752 @Benchmark
753 public void RORShift(Blackhole bh) {
754 byte[] as = fa.apply(size);
755 byte[] bs = fb.apply(size);
756 byte[] rs = fr.apply(size);
757
758 for (int ic = 0; ic < INVOC_COUNT; ic++) {
759 for (int i = 0; i < as.length; i++) {
760 byte a = as[i];
761 byte b = bs[i];
762 rs[i] = (byte)(ROR_scalar(a, b));
763 }
764 }
765
766 bh.consume(rs);
767 }
768
769 @Benchmark
770 public void RORMaskedShift(Blackhole bh) {
771 byte[] as = fa.apply(size);
772 byte[] bs = fb.apply(size);
773 byte[] rs = fr.apply(size);
774 boolean[] ms = fm.apply(size);
775
776 for (int ic = 0; ic < INVOC_COUNT; ic++) {
777 for (int i = 0; i < as.length; i++) {
778 byte a = as[i];
779 byte b = bs[i];
780 boolean m = ms[i % ms.length];
781 rs[i] = (m ? (byte)(ROR_scalar(a, b)) : a);
782 }
783 }
784
785 bh.consume(rs);
786 }
787
788 @Benchmark
789 public void ROLShift(Blackhole bh) {
790 byte[] as = fa.apply(size);
791 byte[] bs = fb.apply(size);
792 byte[] rs = fr.apply(size);
793
794 for (int ic = 0; ic < INVOC_COUNT; ic++) {
795 for (int i = 0; i < as.length; i++) {
796 byte a = as[i];
797 byte b = bs[i];
798 rs[i] = (byte)(ROL_scalar(a, b));
799 }
800 }
801
802 bh.consume(rs);
803 }
804
805 @Benchmark
806 public void ROLMaskedShift(Blackhole bh) {
807 byte[] as = fa.apply(size);
808 byte[] bs = fb.apply(size);
809 byte[] rs = fr.apply(size);
810 boolean[] ms = fm.apply(size);
811
812 for (int ic = 0; ic < INVOC_COUNT; ic++) {
813 for (int i = 0; i < as.length; i++) {
814 byte a = as[i];
815 byte b = bs[i];
816 boolean m = ms[i % ms.length];
817 rs[i] = (m ? (byte)(ROL_scalar(a, b)) : a);
818 }
819 }
820
821 bh.consume(rs);
822 }
823
824 @Benchmark
825 public void LSHRShiftConst(Blackhole bh) {
826 byte[] as = fa.apply(size);
827 byte[] bs = fb.apply(size);
828 byte[] rs = fr.apply(size);
829
830 for (int ic = 0; ic < INVOC_COUNT; ic++) {
831 for (int i = 0; i < as.length; i++) {
832 byte a = as[i];
833 byte b = bs[i];
834 rs[i] = (byte)(((a & 0xFF) >>> CONST_SHIFT));
835 }
836 }
837
838 bh.consume(rs);
839 }
840
841 @Benchmark
842 public void LSHRMaskedShiftConst(Blackhole bh) {
843 byte[] as = fa.apply(size);
844 byte[] bs = fb.apply(size);
845 byte[] rs = fr.apply(size);
846 boolean[] ms = fm.apply(size);
847
848 for (int ic = 0; ic < INVOC_COUNT; ic++) {
849 for (int i = 0; i < as.length; i++) {
850 byte a = as[i];
851 byte b = bs[i];
852 boolean m = ms[i % ms.length];
853 rs[i] = (m ? (byte)(((a & 0xFF) >>> CONST_SHIFT)) : a);
854 }
855 }
856
857 bh.consume(rs);
858 }
859
860 @Benchmark
861 public void LSHLShiftConst(Blackhole bh) {
862 byte[] as = fa.apply(size);
863 byte[] bs = fb.apply(size);
864 byte[] rs = fr.apply(size);
865
866 for (int ic = 0; ic < INVOC_COUNT; ic++) {
867 for (int i = 0; i < as.length; i++) {
868 byte a = as[i];
869 byte b = bs[i];
870 rs[i] = (byte)((a << CONST_SHIFT));
871 }
872 }
873
874 bh.consume(rs);
875 }
876
877 @Benchmark
878 public void LSHLMaskedShiftConst(Blackhole bh) {
879 byte[] as = fa.apply(size);
880 byte[] bs = fb.apply(size);
881 byte[] rs = fr.apply(size);
882 boolean[] ms = fm.apply(size);
883
884 for (int ic = 0; ic < INVOC_COUNT; ic++) {
885 for (int i = 0; i < as.length; i++) {
886 byte a = as[i];
887 byte b = bs[i];
888 boolean m = ms[i % ms.length];
889 rs[i] = (m ? (byte)((a << CONST_SHIFT)) : a);
890 }
891 }
892
893 bh.consume(rs);
894 }
895
896 @Benchmark
897 public void ASHRShiftConst(Blackhole bh) {
898 byte[] as = fa.apply(size);
899 byte[] bs = fb.apply(size);
900 byte[] rs = fr.apply(size);
901
902 for (int ic = 0; ic < INVOC_COUNT; ic++) {
903 for (int i = 0; i < as.length; i++) {
904 byte a = as[i];
905 byte b = bs[i];
906 rs[i] = (byte)((a >> CONST_SHIFT));
907 }
908 }
909
910 bh.consume(rs);
911 }
912
913 @Benchmark
914 public void ASHRMaskedShiftConst(Blackhole bh) {
915 byte[] as = fa.apply(size);
916 byte[] bs = fb.apply(size);
917 byte[] rs = fr.apply(size);
918 boolean[] ms = fm.apply(size);
919
920 for (int ic = 0; ic < INVOC_COUNT; ic++) {
921 for (int i = 0; i < as.length; i++) {
922 byte a = as[i];
923 byte b = bs[i];
924 boolean m = ms[i % ms.length];
925 rs[i] = (m ? (byte)((a >> CONST_SHIFT)) : a);
926 }
927 }
928
929 bh.consume(rs);
930 }
931
932 @Benchmark
933 public void RORShiftConst(Blackhole bh) {
934 byte[] as = fa.apply(size);
935 byte[] bs = fb.apply(size);
936 byte[] rs = fr.apply(size);
937
938 for (int ic = 0; ic < INVOC_COUNT; ic++) {
939 for (int i = 0; i < as.length; i++) {
940 byte a = as[i];
941 byte b = bs[i];
942 rs[i] = (byte)(ROR_scalar(a, CONST_SHIFT));
943 }
944 }
945
946 bh.consume(rs);
947 }
948
949 @Benchmark
950 public void RORMaskedShiftConst(Blackhole bh) {
951 byte[] as = fa.apply(size);
952 byte[] bs = fb.apply(size);
953 byte[] rs = fr.apply(size);
954 boolean[] ms = fm.apply(size);
955
956 for (int ic = 0; ic < INVOC_COUNT; ic++) {
957 for (int i = 0; i < as.length; i++) {
958 byte a = as[i];
959 byte b = bs[i];
960 boolean m = ms[i % ms.length];
961 rs[i] = (m ? (byte)(ROR_scalar(a, CONST_SHIFT)) : a);
962 }
963 }
964
965 bh.consume(rs);
966 }
967
968 @Benchmark
969 public void ROLShiftConst(Blackhole bh) {
970 byte[] as = fa.apply(size);
971 byte[] bs = fb.apply(size);
972 byte[] rs = fr.apply(size);
973
974 for (int ic = 0; ic < INVOC_COUNT; ic++) {
975 for (int i = 0; i < as.length; i++) {
976 byte a = as[i];
977 byte b = bs[i];
978 rs[i] = (byte)(ROL_scalar(a, CONST_SHIFT));
979 }
980 }
981
982 bh.consume(rs);
983 }
984
985 @Benchmark
986 public void ROLMaskedShiftConst(Blackhole bh) {
987 byte[] as = fa.apply(size);
988 byte[] bs = fb.apply(size);
989 byte[] rs = fr.apply(size);
990 boolean[] ms = fm.apply(size);
991
992 for (int ic = 0; ic < INVOC_COUNT; ic++) {
993 for (int i = 0; i < as.length; i++) {
994 byte a = as[i];
995 byte b = bs[i];
996 boolean m = ms[i % ms.length];
997 rs[i] = (m ? (byte)(ROL_scalar(a, CONST_SHIFT)) : a);
998 }
999 }
1000
1001 bh.consume(rs);
1002 }
1003
1004 @Benchmark
1005 public void MIN(Blackhole bh) {
1006 byte[] as = fa.apply(size);
1007 byte[] bs = fb.apply(size);
1008 byte[] rs = fr.apply(size);
1009
1010 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1011 for (int i = 0; i < as.length; i++) {
1012 byte a = as[i];
1013 byte b = bs[i];
1014 rs[i] = (byte)(Math.min(a, b));
1015 }
1016 }
1017
1018 bh.consume(rs);
1019 }
1020
1021 @Benchmark
1022 public void MAX(Blackhole bh) {
1023 byte[] as = fa.apply(size);
1024 byte[] bs = fb.apply(size);
1025 byte[] rs = fr.apply(size);
1026
1027 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1028 for (int i = 0; i < as.length; i++) {
1029 byte a = as[i];
1030 byte b = bs[i];
1031 rs[i] = (byte)(Math.max(a, b));
1032 }
1033 }
1034
1035 bh.consume(rs);
1036 }
1037
1038 @Benchmark
1039 public void UMIN(Blackhole bh) {
1040 byte[] as = fa.apply(size);
1041 byte[] bs = fb.apply(size);
1042 byte[] rs = fr.apply(size);
1043
1044 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1045 for (int i = 0; i < as.length; i++) {
1046 byte a = as[i];
1047 byte b = bs[i];
1048 rs[i] = (byte)(VectorMath.minUnsigned(a, b));
1049 }
1050 }
1051
1052 bh.consume(rs);
1053 }
1054
1055 @Benchmark
1056 public void UMINMasked(Blackhole bh) {
1057 byte[] as = fa.apply(size);
1058 byte[] bs = fb.apply(size);
1059 byte[] rs = fr.apply(size);
1060 boolean[] ms = fm.apply(size);
1061
1062 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1063 for (int i = 0; i < as.length; i++) {
1064 byte a = as[i];
1065 byte b = bs[i];
1066 if (ms[i % ms.length]) {
1067 rs[i] = (byte)(VectorMath.minUnsigned(a, b));
1068 } else {
1069 rs[i] = a;
1070 }
1071 }
1072 }
1073 bh.consume(rs);
1074 }
1075
1076 @Benchmark
1077 public void UMAX(Blackhole bh) {
1078 byte[] as = fa.apply(size);
1079 byte[] bs = fb.apply(size);
1080 byte[] rs = fr.apply(size);
1081
1082 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1083 for (int i = 0; i < as.length; i++) {
1084 byte a = as[i];
1085 byte b = bs[i];
1086 rs[i] = (byte)(VectorMath.maxUnsigned(a, b));
1087 }
1088 }
1089
1090 bh.consume(rs);
1091 }
1092
1093 @Benchmark
1094 public void UMAXMasked(Blackhole bh) {
1095 byte[] as = fa.apply(size);
1096 byte[] bs = fb.apply(size);
1097 byte[] rs = fr.apply(size);
1098 boolean[] ms = fm.apply(size);
1099
1100 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1101 for (int i = 0; i < as.length; i++) {
1102 byte a = as[i];
1103 byte b = bs[i];
1104 if (ms[i % ms.length]) {
1105 rs[i] = (byte)(VectorMath.maxUnsigned(a, b));
1106 } else {
1107 rs[i] = a;
1108 }
1109 }
1110 }
1111 bh.consume(rs);
1112 }
1113
1114 @Benchmark
1115 public void ANDLanes(Blackhole bh) {
1116 byte[] as = fa.apply(size);
1117 byte r = -1;
1118 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1119 r = -1;
1120 for (int i = 0; i < as.length; i++) {
1121 r &= as[i];
1122 }
1123 }
1124 bh.consume(r);
1125 }
1126
1127 @Benchmark
1128 public void ANDMaskedLanes(Blackhole bh) {
1129 byte[] as = fa.apply(size);
1130 boolean[] ms = fm.apply(size);
1131 byte r = -1;
1132 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1133 r = -1;
1134 for (int i = 0; i < as.length; i++) {
1135 if (ms[i % ms.length])
1136 r &= as[i];
1137 }
1138 }
1139 bh.consume(r);
1140 }
1141
1142 @Benchmark
1143 public void ORLanes(Blackhole bh) {
1144 byte[] as = fa.apply(size);
1145 byte r = 0;
1146 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1147 r = 0;
1148 for (int i = 0; i < as.length; i++) {
1149 r |= as[i];
1150 }
1151 }
1152 bh.consume(r);
1153 }
1154
1155 @Benchmark
1156 public void ORMaskedLanes(Blackhole bh) {
1157 byte[] as = fa.apply(size);
1158 boolean[] ms = fm.apply(size);
1159 byte r = 0;
1160 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1161 r = 0;
1162 for (int i = 0; i < as.length; i++) {
1163 if (ms[i % ms.length])
1164 r |= as[i];
1165 }
1166 }
1167 bh.consume(r);
1168 }
1169
1170 @Benchmark
1171 public void XORLanes(Blackhole bh) {
1172 byte[] as = fa.apply(size);
1173 byte r = 0;
1174 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1175 r = 0;
1176 for (int i = 0; i < as.length; i++) {
1177 r ^= as[i];
1178 }
1179 }
1180 bh.consume(r);
1181 }
1182
1183 @Benchmark
1184 public void XORMaskedLanes(Blackhole bh) {
1185 byte[] as = fa.apply(size);
1186 boolean[] ms = fm.apply(size);
1187 byte r = 0;
1188 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1189 r = 0;
1190 for (int i = 0; i < as.length; i++) {
1191 if (ms[i % ms.length])
1192 r ^= as[i];
1193 }
1194 }
1195 bh.consume(r);
1196 }
1197
1198 @Benchmark
1199 public void ADDLanes(Blackhole bh) {
1200 byte[] as = fa.apply(size);
1201 byte r = 0;
1202 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1203 r = 0;
1204 for (int i = 0; i < as.length; i++) {
1205 r += as[i];
1206 }
1207 }
1208 bh.consume(r);
1209 }
1210
1211 @Benchmark
1212 public void ADDMaskedLanes(Blackhole bh) {
1213 byte[] as = fa.apply(size);
1214 boolean[] ms = fm.apply(size);
1215 byte r = 0;
1216 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1217 r = 0;
1218 for (int i = 0; i < as.length; i++) {
1219 if (ms[i % ms.length])
1220 r += as[i];
1221 }
1222 }
1223 bh.consume(r);
1224 }
1225
1226 @Benchmark
1227 public void MULLanes(Blackhole bh) {
1228 byte[] as = fa.apply(size);
1229 byte r = 1;
1230 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1231 r = 1;
1232 for (int i = 0; i < as.length; i++) {
1233 r *= as[i];
1234 }
1235 }
1236 bh.consume(r);
1237 }
1238
1239 @Benchmark
1240 public void MULMaskedLanes(Blackhole bh) {
1241 byte[] as = fa.apply(size);
1242 boolean[] ms = fm.apply(size);
1243 byte r = 1;
1244 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1245 r = 1;
1246 for (int i = 0; i < as.length; i++) {
1247 if (ms[i % ms.length])
1248 r *= as[i];
1249 }
1250 }
1251 bh.consume(r);
1252 }
1253
1254 @Benchmark
1255 public void anyTrue(Blackhole bh) {
1256 boolean[] ms = fm.apply(size);
1257 boolean r = false;
1258 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1259 r = false;
1260 for (int i = 0; i < ms.length; i++) {
1261 r |= ms[i];
1262 }
1263 }
1264 bh.consume(r);
1265 }
1266
1267 @Benchmark
1268 public void allTrue(Blackhole bh) {
1269 boolean[] ms = fm.apply(size);
1270 boolean r = true;
1271 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1272 r = true;
1273 for (int i = 0; i < ms.length; i++) {
1274 r &= ms[i];
1275 }
1276 }
1277 bh.consume(r);
1278 }
1279
1280 @Benchmark
1281 public void IS_DEFAULT(Blackhole bh) {
1282 byte[] as = fa.apply(size);
1283 boolean r = true;
1284
1285 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1286 for (int i = 0; i < as.length; i++) {
1287 byte a = as[i];
1288 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation
1289 }
1290 }
1291
1292 bh.consume(r);
1293 }
1294
1295 @Benchmark
1296 public void IS_NEGATIVE(Blackhole bh) {
1297 byte[] as = fa.apply(size);
1298 boolean r = true;
1299
1300 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1301 for (int i = 0; i < as.length; i++) {
1302 byte a = as[i];
1303 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation
1304 }
1305 }
1306
1307 bh.consume(r);
1308 }
1309
1310 @Benchmark
1311 public void LT(Blackhole bh) {
1312 byte[] as = fa.apply(size);
1313 byte[] bs = fb.apply(size);
1314 boolean r = true;
1315
1316 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1317 for (int i = 0; i < as.length; i++) {
1318 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1319 }
1320 }
1321
1322 bh.consume(r);
1323 }
1324
1325 @Benchmark
1326 public void GT(Blackhole bh) {
1327 byte[] as = fa.apply(size);
1328 byte[] bs = fb.apply(size);
1329 boolean r = true;
1330
1331 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1332 for (int i = 0; i < as.length; i++) {
1333 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1334 }
1335 }
1336
1337 bh.consume(r);
1338 }
1339
1340 @Benchmark
1341 public void EQ(Blackhole bh) {
1342 byte[] as = fa.apply(size);
1343 byte[] bs = fb.apply(size);
1344 boolean r = true;
1345
1346 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1347 for (int i = 0; i < as.length; i++) {
1348 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1349 }
1350 }
1351
1352 bh.consume(r);
1353 }
1354
1355 @Benchmark
1356 public void NE(Blackhole bh) {
1357 byte[] as = fa.apply(size);
1358 byte[] bs = fb.apply(size);
1359 boolean r = true;
1360
1361 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1362 for (int i = 0; i < as.length; i++) {
1363 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1364 }
1365 }
1366
1367 bh.consume(r);
1368 }
1369
1370 @Benchmark
1371 public void LE(Blackhole bh) {
1372 byte[] as = fa.apply(size);
1373 byte[] bs = fb.apply(size);
1374 boolean r = true;
1375
1376 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1377 for (int i = 0; i < as.length; i++) {
1378 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1379 }
1380 }
1381
1382 bh.consume(r);
1383 }
1384
1385 @Benchmark
1386 public void GE(Blackhole bh) {
1387 byte[] as = fa.apply(size);
1388 byte[] bs = fb.apply(size);
1389 boolean r = true;
1390
1391 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1392 for (int i = 0; i < as.length; i++) {
1393 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1394 }
1395 }
1396
1397 bh.consume(r);
1398 }
1399
1400 @Benchmark
1401 public void ULT(Blackhole bh) {
1402 byte[] as = fa.apply(size);
1403 byte[] bs = fb.apply(size);
1404 boolean r = true;
1405
1406 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1407 for (int i = 0; i < as.length; i++) {
1408 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1409 }
1410 }
1411
1412 bh.consume(r);
1413 }
1414
1415 @Benchmark
1416 public void UGT(Blackhole bh) {
1417 byte[] as = fa.apply(size);
1418 byte[] bs = fb.apply(size);
1419 boolean r = true;
1420
1421 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1422 for (int i = 0; i < as.length; i++) {
1423 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1424 }
1425 }
1426
1427 bh.consume(r);
1428 }
1429
1430 @Benchmark
1431 public void ULE(Blackhole bh) {
1432 byte[] as = fa.apply(size);
1433 byte[] bs = fb.apply(size);
1434 boolean r = true;
1435
1436 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1437 for (int i = 0; i < as.length; i++) {
1438 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1439 }
1440 }
1441
1442 bh.consume(r);
1443 }
1444
1445 @Benchmark
1446 public void UGE(Blackhole bh) {
1447 byte[] as = fa.apply(size);
1448 byte[] bs = fb.apply(size);
1449 boolean r = true;
1450
1451 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1452 for (int i = 0; i < as.length; i++) {
1453 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1454 }
1455 }
1456
1457 bh.consume(r);
1458 }
1459
1460 @Benchmark
1461 public void blend(Blackhole bh) {
1462 byte[] as = fa.apply(size);
1463 byte[] bs = fb.apply(size);
1464 byte[] rs = fr.apply(size);
1465 boolean[] ms = fm.apply(size);
1466
1467 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1468 for (int i = 0; i < as.length; i++) {
1469 byte a = as[i];
1470 byte b = bs[i];
1471 boolean m = ms[i % ms.length];
1472 rs[i] = (m ? b : a);
1473 }
1474 }
1475
1476 bh.consume(rs);
1477 }
1478
1479 void rearrangeShared(int window, Blackhole bh) {
1480 byte[] as = fa.apply(size);
1481 int[] order = fs.apply(size);
1482 byte[] rs = fr.apply(size);
1483
1484 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1485 for (int i = 0; i < as.length; i += window) {
1486 for (int j = 0; j < window; j++) {
1487 byte a = as[i+j];
1488 int pos = order[j];
1489 rs[i + pos] = a;
1490 }
1491 }
1492 }
1493
1494 bh.consume(rs);
1495 }
1496
1497 @Benchmark
1498 public void rearrange064(Blackhole bh) {
1499 int window = 64 / Byte.SIZE;
1500 rearrangeShared(window, bh);
1501 }
1502
1503 @Benchmark
1504 public void rearrange128(Blackhole bh) {
1505 int window = 128 / Byte.SIZE;
1506 rearrangeShared(window, bh);
1507 }
1508
1509 @Benchmark
1510 public void rearrange256(Blackhole bh) {
1511 int window = 256 / Byte.SIZE;
1512 rearrangeShared(window, bh);
1513 }
1514
1515 @Benchmark
1516 public void rearrange512(Blackhole bh) {
1517 int window = 512 / Byte.SIZE;
1518 rearrangeShared(window, bh);
1519 }
1520
1521 @Benchmark
1522 public void compressScalar(Blackhole bh) {
1523 byte[] as = fa.apply(size);
1524 byte[] rs = new byte[size];
1525 boolean[] im = fmt.apply(size);
1526
1527 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1528 for (int i = 0, j = 0; i < as.length; i++) {
1529 if (im[i]) {
1530 rs[j++] = as[i];
1531 }
1532 }
1533 }
1534
1535 bh.consume(rs);
1536 }
1537
1538 @Benchmark
1539 public void expandScalar(Blackhole bh) {
1540 byte[] as = fa.apply(size);
1541 byte[] rs = new byte[size];
1542 boolean[] im = fmt.apply(size);
1543
1544 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1545 for (int i = 0, j = 0; i < as.length; i++) {
1546 if (im[i]) {
1547 rs[i++] = as[j++];
1548 }
1549 }
1550 }
1551
1552 bh.consume(rs);
1553 }
1554
1555 @Benchmark
1556 public void maskCompressScalar(Blackhole bh) {
1557 boolean[] im = fmt.apply(size);
1558 boolean[] rm = new boolean[size];
1559
1560 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1561 for (int i = 0, j = 0; i < im.length; i++) {
1562 if (im[i]) {
1563 rm[j++] = im[i];
1564 }
1565 }
1566 }
1567
1568 bh.consume(rm);
1569 }
1570
1571 void broadcastShared(int window, Blackhole bh) {
1572 byte[] as = fa.apply(size);
1573 byte[] rs = fr.apply(size);
1574
1575 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1576 for (int i = 0; i < as.length; i += window) {
1577 int idx = i;
1578 for (int j = 0; j < window; j++) {
1579 rs[j] = as[idx];
1580 }
1581 }
1582 }
1583
1584 bh.consume(rs);
1585 }
1586
1587 @Benchmark
1588 public void broadcast064(Blackhole bh) {
1589 int window = 64 / Byte.SIZE;
1590 broadcastShared(window, bh);
1591 }
1592
1593 @Benchmark
1594 public void broadcast128(Blackhole bh) {
1595 int window = 128 / Byte.SIZE;
1596 broadcastShared(window, bh);
1597 }
1598
1599 @Benchmark
1600 public void broadcast256(Blackhole bh) {
1601 int window = 256 / Byte.SIZE;
1602 broadcastShared(window, bh);
1603 }
1604
1605 @Benchmark
1606 public void broadcast512(Blackhole bh) {
1607 int window = 512 / Byte.SIZE;
1608 broadcastShared(window, bh);
1609 }
1610
1611 @Benchmark
1612 public void zero(Blackhole bh) {
1613 byte[] as = fa.apply(size);
1614
1615 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1616 for (int i = 0; i < as.length; i++) {
1617 as[i] = (byte)0;
1618 }
1619 }
1620
1621 bh.consume(as);
1622 }
1623
1624 @Benchmark
1625 public void BITWISE_BLEND(Blackhole bh) {
1626 byte[] as = fa.apply(size);
1627 byte[] bs = fb.apply(size);
1628 byte[] cs = fc.apply(size);
1629 byte[] rs = fr.apply(size);
1630
1631 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1632 for (int i = 0; i < as.length; i++) {
1633 byte a = as[i];
1634 byte b = bs[i];
1635 byte c = cs[i];
1636 rs[i] = (byte)((a&~(c))|(b&c));
1637 }
1638 }
1639
1640 bh.consume(rs);
1641 }
1642
1643 @Benchmark
1644 public void BITWISE_BLENDMasked(Blackhole bh) {
1645 byte[] as = fa.apply(size);
1646 byte[] bs = fb.apply(size);
1647 byte[] cs = fc.apply(size);
1648 byte[] rs = fr.apply(size);
1649 boolean[] ms = fm.apply(size);
1650
1651 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1652 for (int i = 0; i < as.length; i++) {
1653 byte a = as[i];
1654 byte b = bs[i];
1655 byte c = cs[i];
1656 if (ms[i % ms.length]) {
1657 rs[i] = (byte)((a&~(c))|(b&c));
1658 } else {
1659 rs[i] = a;
1660 }
1661 }
1662 }
1663 bh.consume(rs);
1664 }
1665 @Benchmark
1666 public void NEG(Blackhole bh) {
1667 byte[] as = fa.apply(size);
1668 byte[] rs = fr.apply(size);
1669
1670 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1671 for (int i = 0; i < as.length; i++) {
1672 byte a = as[i];
1673 rs[i] = (byte)(-((byte)a));
1674 }
1675 }
1676
1677 bh.consume(rs);
1678 }
1679
1680 @Benchmark
1681 public void NEGMasked(Blackhole bh) {
1682 byte[] as = fa.apply(size);
1683 byte[] rs = fr.apply(size);
1684 boolean[] ms = fm.apply(size);
1685
1686 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1687 for (int i = 0; i < as.length; i++) {
1688 byte a = as[i];
1689 boolean m = ms[i % ms.length];
1690 rs[i] = (m ? (byte)(-((byte)a)) : a);
1691 }
1692 }
1693
1694 bh.consume(rs);
1695 }
1696 @Benchmark
1697 public void ABS(Blackhole bh) {
1698 byte[] as = fa.apply(size);
1699 byte[] rs = fr.apply(size);
1700
1701 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1702 for (int i = 0; i < as.length; i++) {
1703 byte a = as[i];
1704 rs[i] = (byte)(Math.abs((byte)a));
1705 }
1706 }
1707
1708 bh.consume(rs);
1709 }
1710
1711 @Benchmark
1712 public void ABSMasked(Blackhole bh) {
1713 byte[] as = fa.apply(size);
1714 byte[] rs = fr.apply(size);
1715 boolean[] ms = fm.apply(size);
1716
1717 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1718 for (int i = 0; i < as.length; i++) {
1719 byte a = as[i];
1720 boolean m = ms[i % ms.length];
1721 rs[i] = (m ? (byte)(Math.abs((byte)a)) : a);
1722 }
1723 }
1724
1725 bh.consume(rs);
1726 }
1727 @Benchmark
1728 public void NOT(Blackhole bh) {
1729 byte[] as = fa.apply(size);
1730 byte[] rs = fr.apply(size);
1731
1732 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1733 for (int i = 0; i < as.length; i++) {
1734 byte a = as[i];
1735 rs[i] = (byte)(~((byte)a));
1736 }
1737 }
1738
1739 bh.consume(rs);
1740 }
1741
1742 @Benchmark
1743 public void NOTMasked(Blackhole bh) {
1744 byte[] as = fa.apply(size);
1745 byte[] rs = fr.apply(size);
1746 boolean[] ms = fm.apply(size);
1747
1748 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1749 for (int i = 0; i < as.length; i++) {
1750 byte a = as[i];
1751 boolean m = ms[i % ms.length];
1752 rs[i] = (m ? (byte)(~((byte)a)) : a);
1753 }
1754 }
1755
1756 bh.consume(rs);
1757 }
1758 @Benchmark
1759 public void ZOMO(Blackhole bh) {
1760 byte[] as = fa.apply(size);
1761 byte[] rs = fr.apply(size);
1762
1763 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1764 for (int i = 0; i < as.length; i++) {
1765 byte a = as[i];
1766 rs[i] = (byte)((a==0?0:-1));
1767 }
1768 }
1769
1770 bh.consume(rs);
1771 }
1772
1773 @Benchmark
1774 public void ZOMOMasked(Blackhole bh) {
1775 byte[] as = fa.apply(size);
1776 byte[] rs = fr.apply(size);
1777 boolean[] ms = fm.apply(size);
1778
1779 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1780 for (int i = 0; i < as.length; i++) {
1781 byte a = as[i];
1782 boolean m = ms[i % ms.length];
1783 rs[i] = (m ? (byte)((a==0?0:-1)) : a);
1784 }
1785 }
1786
1787 bh.consume(rs);
1788 }
1789 @Benchmark
1790 public void BIT_COUNT(Blackhole bh) {
1791 byte[] as = fa.apply(size);
1792 byte[] rs = fr.apply(size);
1793
1794 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1795 for (int i = 0; i < as.length; i++) {
1796 byte a = as[i];
1797 rs[i] = (byte)(Integer.bitCount((int)a & 0xFF));
1798 }
1799 }
1800
1801 bh.consume(rs);
1802 }
1803
1804 @Benchmark
1805 public void BIT_COUNTMasked(Blackhole bh) {
1806 byte[] as = fa.apply(size);
1807 byte[] rs = fr.apply(size);
1808 boolean[] ms = fm.apply(size);
1809
1810 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1811 for (int i = 0; i < as.length; i++) {
1812 byte a = as[i];
1813 boolean m = ms[i % ms.length];
1814 rs[i] = (m ? (byte)(Integer.bitCount((int)a & 0xFF)) : a);
1815 }
1816 }
1817
1818 bh.consume(rs);
1819 }
1820 @Benchmark
1821 public void TRAILING_ZEROS_COUNT(Blackhole bh) {
1822 byte[] as = fa.apply(size);
1823 byte[] rs = fr.apply(size);
1824
1825 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1826 for (int i = 0; i < as.length; i++) {
1827 byte a = as[i];
1828 rs[i] = (byte)(TRAILING_ZEROS_COUNT_scalar(a));
1829 }
1830 }
1831
1832 bh.consume(rs);
1833 }
1834
1835 @Benchmark
1836 public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) {
1837 byte[] as = fa.apply(size);
1838 byte[] rs = fr.apply(size);
1839 boolean[] ms = fm.apply(size);
1840
1841 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1842 for (int i = 0; i < as.length; i++) {
1843 byte a = as[i];
1844 boolean m = ms[i % ms.length];
1845 rs[i] = (m ? (byte)(TRAILING_ZEROS_COUNT_scalar(a)) : a);
1846 }
1847 }
1848
1849 bh.consume(rs);
1850 }
1851 @Benchmark
1852 public void LEADING_ZEROS_COUNT(Blackhole bh) {
1853 byte[] as = fa.apply(size);
1854 byte[] rs = fr.apply(size);
1855
1856 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1857 for (int i = 0; i < as.length; i++) {
1858 byte a = as[i];
1859 rs[i] = (byte)(LEADING_ZEROS_COUNT_scalar(a));
1860 }
1861 }
1862
1863 bh.consume(rs);
1864 }
1865
1866 @Benchmark
1867 public void LEADING_ZEROS_COUNTMasked(Blackhole bh) {
1868 byte[] as = fa.apply(size);
1869 byte[] rs = fr.apply(size);
1870 boolean[] ms = fm.apply(size);
1871
1872 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1873 for (int i = 0; i < as.length; i++) {
1874 byte a = as[i];
1875 boolean m = ms[i % ms.length];
1876 rs[i] = (m ? (byte)(LEADING_ZEROS_COUNT_scalar(a)) : a);
1877 }
1878 }
1879
1880 bh.consume(rs);
1881 }
1882 @Benchmark
1883 public void REVERSE(Blackhole bh) {
1884 byte[] as = fa.apply(size);
1885 byte[] rs = fr.apply(size);
1886
1887 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1888 for (int i = 0; i < as.length; i++) {
1889 byte a = as[i];
1890 rs[i] = (byte)(REVERSE_scalar(a));
1891 }
1892 }
1893
1894 bh.consume(rs);
1895 }
1896
1897 @Benchmark
1898 public void REVERSEMasked(Blackhole bh) {
1899 byte[] as = fa.apply(size);
1900 byte[] rs = fr.apply(size);
1901 boolean[] ms = fm.apply(size);
1902
1903 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1904 for (int i = 0; i < as.length; i++) {
1905 byte a = as[i];
1906 boolean m = ms[i % ms.length];
1907 rs[i] = (m ? (byte)(REVERSE_scalar(a)) : a);
1908 }
1909 }
1910
1911 bh.consume(rs);
1912 }
1913 @Benchmark
1914 public void REVERSE_BYTES(Blackhole bh) {
1915 byte[] as = fa.apply(size);
1916 byte[] rs = fr.apply(size);
1917
1918 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1919 for (int i = 0; i < as.length; i++) {
1920 byte a = as[i];
1921 rs[i] = (byte)(a);
1922 }
1923 }
1924
1925 bh.consume(rs);
1926 }
1927
1928 @Benchmark
1929 public void REVERSE_BYTESMasked(Blackhole bh) {
1930 byte[] as = fa.apply(size);
1931 byte[] rs = fr.apply(size);
1932 boolean[] ms = fm.apply(size);
1933
1934 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1935 for (int i = 0; i < as.length; i++) {
1936 byte a = as[i];
1937 boolean m = ms[i % ms.length];
1938 rs[i] = (m ? (byte)(a) : a);
1939 }
1940 }
1941
1942 bh.consume(rs);
1943 }
1944 }