1 /*
2 * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package org.openjdk.bench.jdk.incubator.vector.operation;
25
26 // -- This file was mechanically generated: Do not edit! -- //
27
28 import java.util.concurrent.TimeUnit;
29 import java.util.function.IntFunction;
30 import jdk.incubator.vector.VectorMath;
31
32 import org.openjdk.jmh.annotations.*;
33 import org.openjdk.jmh.infra.Blackhole;
34
35 @BenchmarkMode(Mode.Throughput)
36 @OutputTimeUnit(TimeUnit.MILLISECONDS)
37 @State(Scope.Benchmark)
38 @Warmup(iterations = 3, time = 1)
39 @Measurement(iterations = 5, time = 1)
40 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
41 public class ShortScalar extends AbstractVectorBenchmark {
42 static final int INVOC_COUNT = 1; // To align with vector benchmarks.
43
44 private static final short CONST_SHIFT = Short.SIZE / 2;
45
46 @Param("1024")
47 int size;
48
49 short[] fill(IntFunction<Short> f) {
50 short[] array = new short[size];
51 for (int i = 0; i < array.length; i++) {
52 array[i] = f.apply(i);
53 }
54 return array;
55 }
56
57 static short bits(short e) {
58 return e;
59 }
60
61 short[] as, bs, cs, rs;
62 boolean[] ms, mt, rms;
63 int[] ss;
64
65 @Setup
66 public void init() {
67 as = fill(i -> (short)(2*i));
68 bs = fill(i -> (short)(i+1));
69 cs = fill(i -> (short)(i+5));
70 rs = fill(i -> (short)0);
71 ms = fillMask(size, i -> (i % 2) == 0);
72 mt = fillMask(size, i -> true);
73 rms = fillMask(size, i -> false);
74
75 ss = fillInt(size, i -> RAND.nextInt(Math.max(i,1)));
76 }
77
78 final IntFunction<short[]> fa = vl -> as;
79 final IntFunction<short[]> fb = vl -> bs;
80 final IntFunction<short[]> fc = vl -> cs;
81 final IntFunction<short[]> fr = vl -> rs;
82 final IntFunction<boolean[]> fm = vl -> ms;
83 final IntFunction<boolean[]> fmt = vl -> mt;
84 final IntFunction<boolean[]> fmr = vl -> rms;
85 final IntFunction<int[]> fs = vl -> ss;
86
87 static boolean eq(short a, short b) {
88 return a == b;
89 }
90
91 static boolean neq(short a, short b) {
92 return a != b;
93 }
94
95 static boolean lt(short a, short b) {
96 return a < b;
97 }
98
99 static boolean le(short a, short b) {
100 return a <= b;
101 }
102
103 static boolean gt(short a, short b) {
104 return a > b;
105 }
106
107 static boolean ge(short a, short b) {
108 return a >= b;
109 }
110
111 static boolean ult(short a, short b) {
112 return Short.compareUnsigned(a, b) < 0;
113 }
114
115 static boolean ule(short a, short b) {
116 return Short.compareUnsigned(a, b) <= 0;
117 }
118
119 static boolean ugt(short a, short b) {
120 return Short.compareUnsigned(a, b) > 0;
121 }
122
123 static boolean uge(short a, short b) {
124 return Short.compareUnsigned(a, b) >= 0;
125 }
126
127 static short ROL_scalar(short a, short b) {
128 return (short)(((((short)a) & 0xFFFF) << (b & 15)) | ((((short)a) & 0xFFFF) >>> (16 - (b & 15))));
129 }
130
131 static short ROR_scalar(short a, short b) {
132 return (short)(((((short)a) & 0xFFFF) >>> (b & 15)) | ((((short)a) & 0xFFFF) << (16 - (b & 15))));
133 }
134
135 static short TRAILING_ZEROS_COUNT_scalar(short a) {
136 return (short) (a != 0 ? Integer.numberOfTrailingZeros(a) : 16);
137 }
138
139 static short LEADING_ZEROS_COUNT_scalar(short a) {
140 return (short) (a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0);
141 }
142
143 static short REVERSE_scalar(short a) {
144 short b = ROL_scalar(a, (short) 8);
145 b = (short)(((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
146 b = (short)(((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
147 b = (short)(((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
148 return b;
149 }
150
151 @Benchmark
152 public void ADD(Blackhole bh) {
153 short[] as = fa.apply(size);
154 short[] bs = fb.apply(size);
155 short[] rs = fr.apply(size);
156
157 for (int ic = 0; ic < INVOC_COUNT; ic++) {
158 for (int i = 0; i < as.length; i++) {
159 short a = as[i];
160 short b = bs[i];
161 rs[i] = (short)(a + b);
162 }
163 }
164
165 bh.consume(rs);
166 }
167
168 @Benchmark
169 public void ADDMasked(Blackhole bh) {
170 short[] as = fa.apply(size);
171 short[] bs = fb.apply(size);
172 short[] rs = fr.apply(size);
173 boolean[] ms = fm.apply(size);
174
175 for (int ic = 0; ic < INVOC_COUNT; ic++) {
176 for (int i = 0; i < as.length; i++) {
177 short a = as[i];
178 short b = bs[i];
179 if (ms[i % ms.length]) {
180 rs[i] = (short)(a + b);
181 } else {
182 rs[i] = a;
183 }
184 }
185 }
186 bh.consume(rs);
187 }
188
189 @Benchmark
190 public void SUB(Blackhole bh) {
191 short[] as = fa.apply(size);
192 short[] bs = fb.apply(size);
193 short[] rs = fr.apply(size);
194
195 for (int ic = 0; ic < INVOC_COUNT; ic++) {
196 for (int i = 0; i < as.length; i++) {
197 short a = as[i];
198 short b = bs[i];
199 rs[i] = (short)(a - b);
200 }
201 }
202
203 bh.consume(rs);
204 }
205
206 @Benchmark
207 public void SUBMasked(Blackhole bh) {
208 short[] as = fa.apply(size);
209 short[] bs = fb.apply(size);
210 short[] rs = fr.apply(size);
211 boolean[] ms = fm.apply(size);
212
213 for (int ic = 0; ic < INVOC_COUNT; ic++) {
214 for (int i = 0; i < as.length; i++) {
215 short a = as[i];
216 short b = bs[i];
217 if (ms[i % ms.length]) {
218 rs[i] = (short)(a - b);
219 } else {
220 rs[i] = a;
221 }
222 }
223 }
224 bh.consume(rs);
225 }
226
227 @Benchmark
228 public void MUL(Blackhole bh) {
229 short[] as = fa.apply(size);
230 short[] bs = fb.apply(size);
231 short[] rs = fr.apply(size);
232
233 for (int ic = 0; ic < INVOC_COUNT; ic++) {
234 for (int i = 0; i < as.length; i++) {
235 short a = as[i];
236 short b = bs[i];
237 rs[i] = (short)(a * b);
238 }
239 }
240
241 bh.consume(rs);
242 }
243
244 @Benchmark
245 public void MULMasked(Blackhole bh) {
246 short[] as = fa.apply(size);
247 short[] bs = fb.apply(size);
248 short[] rs = fr.apply(size);
249 boolean[] ms = fm.apply(size);
250
251 for (int ic = 0; ic < INVOC_COUNT; ic++) {
252 for (int i = 0; i < as.length; i++) {
253 short a = as[i];
254 short b = bs[i];
255 if (ms[i % ms.length]) {
256 rs[i] = (short)(a * b);
257 } else {
258 rs[i] = a;
259 }
260 }
261 }
262 bh.consume(rs);
263 }
264
265 @Benchmark
266 public void FIRST_NONZERO(Blackhole bh) {
267 short[] as = fa.apply(size);
268 short[] bs = fb.apply(size);
269 short[] rs = fr.apply(size);
270
271 for (int ic = 0; ic < INVOC_COUNT; ic++) {
272 for (int i = 0; i < as.length; i++) {
273 short a = as[i];
274 short b = bs[i];
275 rs[i] = (short)((a)!=0?a:b);
276 }
277 }
278
279 bh.consume(rs);
280 }
281
282 @Benchmark
283 public void FIRST_NONZEROMasked(Blackhole bh) {
284 short[] as = fa.apply(size);
285 short[] bs = fb.apply(size);
286 short[] rs = fr.apply(size);
287 boolean[] ms = fm.apply(size);
288
289 for (int ic = 0; ic < INVOC_COUNT; ic++) {
290 for (int i = 0; i < as.length; i++) {
291 short a = as[i];
292 short b = bs[i];
293 if (ms[i % ms.length]) {
294 rs[i] = (short)((a)!=0?a:b);
295 } else {
296 rs[i] = a;
297 }
298 }
299 }
300 bh.consume(rs);
301 }
302
303 @Benchmark
304 public void AND(Blackhole bh) {
305 short[] as = fa.apply(size);
306 short[] bs = fb.apply(size);
307 short[] rs = fr.apply(size);
308
309 for (int ic = 0; ic < INVOC_COUNT; ic++) {
310 for (int i = 0; i < as.length; i++) {
311 short a = as[i];
312 short b = bs[i];
313 rs[i] = (short)(a & b);
314 }
315 }
316
317 bh.consume(rs);
318 }
319
320 @Benchmark
321 public void ANDMasked(Blackhole bh) {
322 short[] as = fa.apply(size);
323 short[] bs = fb.apply(size);
324 short[] rs = fr.apply(size);
325 boolean[] ms = fm.apply(size);
326
327 for (int ic = 0; ic < INVOC_COUNT; ic++) {
328 for (int i = 0; i < as.length; i++) {
329 short a = as[i];
330 short b = bs[i];
331 if (ms[i % ms.length]) {
332 rs[i] = (short)(a & b);
333 } else {
334 rs[i] = a;
335 }
336 }
337 }
338 bh.consume(rs);
339 }
340
341 @Benchmark
342 public void AND_NOT(Blackhole bh) {
343 short[] as = fa.apply(size);
344 short[] bs = fb.apply(size);
345 short[] rs = fr.apply(size);
346
347 for (int ic = 0; ic < INVOC_COUNT; ic++) {
348 for (int i = 0; i < as.length; i++) {
349 short a = as[i];
350 short b = bs[i];
351 rs[i] = (short)(a & ~b);
352 }
353 }
354
355 bh.consume(rs);
356 }
357
358 @Benchmark
359 public void AND_NOTMasked(Blackhole bh) {
360 short[] as = fa.apply(size);
361 short[] bs = fb.apply(size);
362 short[] rs = fr.apply(size);
363 boolean[] ms = fm.apply(size);
364
365 for (int ic = 0; ic < INVOC_COUNT; ic++) {
366 for (int i = 0; i < as.length; i++) {
367 short a = as[i];
368 short b = bs[i];
369 if (ms[i % ms.length]) {
370 rs[i] = (short)(a & ~b);
371 } else {
372 rs[i] = a;
373 }
374 }
375 }
376 bh.consume(rs);
377 }
378
379 @Benchmark
380 public void OR(Blackhole bh) {
381 short[] as = fa.apply(size);
382 short[] bs = fb.apply(size);
383 short[] rs = fr.apply(size);
384
385 for (int ic = 0; ic < INVOC_COUNT; ic++) {
386 for (int i = 0; i < as.length; i++) {
387 short a = as[i];
388 short b = bs[i];
389 rs[i] = (short)(a | b);
390 }
391 }
392
393 bh.consume(rs);
394 }
395
396 @Benchmark
397 public void ORMasked(Blackhole bh) {
398 short[] as = fa.apply(size);
399 short[] bs = fb.apply(size);
400 short[] rs = fr.apply(size);
401 boolean[] ms = fm.apply(size);
402
403 for (int ic = 0; ic < INVOC_COUNT; ic++) {
404 for (int i = 0; i < as.length; i++) {
405 short a = as[i];
406 short b = bs[i];
407 if (ms[i % ms.length]) {
408 rs[i] = (short)(a | b);
409 } else {
410 rs[i] = a;
411 }
412 }
413 }
414 bh.consume(rs);
415 }
416
417 @Benchmark
418 public void XOR(Blackhole bh) {
419 short[] as = fa.apply(size);
420 short[] bs = fb.apply(size);
421 short[] rs = fr.apply(size);
422
423 for (int ic = 0; ic < INVOC_COUNT; ic++) {
424 for (int i = 0; i < as.length; i++) {
425 short a = as[i];
426 short b = bs[i];
427 rs[i] = (short)(a ^ b);
428 }
429 }
430
431 bh.consume(rs);
432 }
433
434 @Benchmark
435 public void XORMasked(Blackhole bh) {
436 short[] as = fa.apply(size);
437 short[] bs = fb.apply(size);
438 short[] rs = fr.apply(size);
439 boolean[] ms = fm.apply(size);
440
441 for (int ic = 0; ic < INVOC_COUNT; ic++) {
442 for (int i = 0; i < as.length; i++) {
443 short a = as[i];
444 short b = bs[i];
445 if (ms[i % ms.length]) {
446 rs[i] = (short)(a ^ b);
447 } else {
448 rs[i] = a;
449 }
450 }
451 }
452 bh.consume(rs);
453 }
454
455 @Benchmark
456 public void LSHL(Blackhole bh) {
457 short[] as = fa.apply(size);
458 short[] bs = fb.apply(size);
459 short[] rs = fr.apply(size);
460
461 for (int ic = 0; ic < INVOC_COUNT; ic++) {
462 for (int i = 0; i < as.length; i++) {
463 short a = as[i];
464 short b = bs[i];
465 rs[i] = (short)((a << (b & 0xF)));
466 }
467 }
468
469 bh.consume(rs);
470 }
471
472 @Benchmark
473 public void LSHLMasked(Blackhole bh) {
474 short[] as = fa.apply(size);
475 short[] bs = fb.apply(size);
476 short[] rs = fr.apply(size);
477 boolean[] ms = fm.apply(size);
478
479 for (int ic = 0; ic < INVOC_COUNT; ic++) {
480 for (int i = 0; i < as.length; i++) {
481 short a = as[i];
482 short b = bs[i];
483 if (ms[i % ms.length]) {
484 rs[i] = (short)((a << (b & 0xF)));
485 } else {
486 rs[i] = a;
487 }
488 }
489 }
490 bh.consume(rs);
491 }
492
493 @Benchmark
494 public void ASHR(Blackhole bh) {
495 short[] as = fa.apply(size);
496 short[] bs = fb.apply(size);
497 short[] rs = fr.apply(size);
498
499 for (int ic = 0; ic < INVOC_COUNT; ic++) {
500 for (int i = 0; i < as.length; i++) {
501 short a = as[i];
502 short b = bs[i];
503 rs[i] = (short)((a >> (b & 0xF)));
504 }
505 }
506
507 bh.consume(rs);
508 }
509
510 @Benchmark
511 public void ASHRMasked(Blackhole bh) {
512 short[] as = fa.apply(size);
513 short[] bs = fb.apply(size);
514 short[] rs = fr.apply(size);
515 boolean[] ms = fm.apply(size);
516
517 for (int ic = 0; ic < INVOC_COUNT; ic++) {
518 for (int i = 0; i < as.length; i++) {
519 short a = as[i];
520 short b = bs[i];
521 if (ms[i % ms.length]) {
522 rs[i] = (short)((a >> (b & 0xF)));
523 } else {
524 rs[i] = a;
525 }
526 }
527 }
528 bh.consume(rs);
529 }
530
531 @Benchmark
532 public void LSHR(Blackhole bh) {
533 short[] as = fa.apply(size);
534 short[] bs = fb.apply(size);
535 short[] rs = fr.apply(size);
536
537 for (int ic = 0; ic < INVOC_COUNT; ic++) {
538 for (int i = 0; i < as.length; i++) {
539 short a = as[i];
540 short b = bs[i];
541 rs[i] = (short)(((a & 0xFFFF) >>> (b & 0xF)));
542 }
543 }
544
545 bh.consume(rs);
546 }
547
548 @Benchmark
549 public void LSHRMasked(Blackhole bh) {
550 short[] as = fa.apply(size);
551 short[] bs = fb.apply(size);
552 short[] rs = fr.apply(size);
553 boolean[] ms = fm.apply(size);
554
555 for (int ic = 0; ic < INVOC_COUNT; ic++) {
556 for (int i = 0; i < as.length; i++) {
557 short a = as[i];
558 short b = bs[i];
559 if (ms[i % ms.length]) {
560 rs[i] = (short)(((a & 0xFFFF) >>> (b & 0xF)));
561 } else {
562 rs[i] = a;
563 }
564 }
565 }
566 bh.consume(rs);
567 }
568
569 @Benchmark
570 public void LSHLShift(Blackhole bh) {
571 short[] as = fa.apply(size);
572 short[] bs = fb.apply(size);
573 short[] rs = fr.apply(size);
574
575 for (int ic = 0; ic < INVOC_COUNT; ic++) {
576 for (int i = 0; i < as.length; i++) {
577 short a = as[i];
578 short b = bs[i];
579 rs[i] = (short)((a << (b & 15)));
580 }
581 }
582
583 bh.consume(rs);
584 }
585
586 @Benchmark
587 public void LSHLMaskedShift(Blackhole bh) {
588 short[] as = fa.apply(size);
589 short[] bs = fb.apply(size);
590 short[] rs = fr.apply(size);
591 boolean[] ms = fm.apply(size);
592
593 for (int ic = 0; ic < INVOC_COUNT; ic++) {
594 for (int i = 0; i < as.length; i++) {
595 short a = as[i];
596 short b = bs[i];
597 boolean m = ms[i % ms.length];
598 rs[i] = (m ? (short)((a << (b & 15))) : a);
599 }
600 }
601
602 bh.consume(rs);
603 }
604
605 @Benchmark
606 public void LSHRShift(Blackhole bh) {
607 short[] as = fa.apply(size);
608 short[] bs = fb.apply(size);
609 short[] rs = fr.apply(size);
610
611 for (int ic = 0; ic < INVOC_COUNT; ic++) {
612 for (int i = 0; i < as.length; i++) {
613 short a = as[i];
614 short b = bs[i];
615 rs[i] = (short)(((a & 0xFFFF) >>> (b & 15)));
616 }
617 }
618
619 bh.consume(rs);
620 }
621
622 @Benchmark
623 public void LSHRMaskedShift(Blackhole bh) {
624 short[] as = fa.apply(size);
625 short[] bs = fb.apply(size);
626 short[] rs = fr.apply(size);
627 boolean[] ms = fm.apply(size);
628
629 for (int ic = 0; ic < INVOC_COUNT; ic++) {
630 for (int i = 0; i < as.length; i++) {
631 short a = as[i];
632 short b = bs[i];
633 boolean m = ms[i % ms.length];
634 rs[i] = (m ? (short)(((a & 0xFFFF) >>> (b & 15))) : a);
635 }
636 }
637
638 bh.consume(rs);
639 }
640
641 @Benchmark
642 public void ASHRShift(Blackhole bh) {
643 short[] as = fa.apply(size);
644 short[] bs = fb.apply(size);
645 short[] rs = fr.apply(size);
646
647 for (int ic = 0; ic < INVOC_COUNT; ic++) {
648 for (int i = 0; i < as.length; i++) {
649 short a = as[i];
650 short b = bs[i];
651 rs[i] = (short)((a >> (b & 15)));
652 }
653 }
654
655 bh.consume(rs);
656 }
657
658 @Benchmark
659 public void ASHRMaskedShift(Blackhole bh) {
660 short[] as = fa.apply(size);
661 short[] bs = fb.apply(size);
662 short[] rs = fr.apply(size);
663 boolean[] ms = fm.apply(size);
664
665 for (int ic = 0; ic < INVOC_COUNT; ic++) {
666 for (int i = 0; i < as.length; i++) {
667 short a = as[i];
668 short b = bs[i];
669 boolean m = ms[i % ms.length];
670 rs[i] = (m ? (short)((a >> (b & 15))) : a);
671 }
672 }
673
674 bh.consume(rs);
675 }
676
677 @Benchmark
678 public void ROR(Blackhole bh) {
679 short[] as = fa.apply(size);
680 short[] bs = fb.apply(size);
681 short[] rs = fr.apply(size);
682
683 for (int ic = 0; ic < INVOC_COUNT; ic++) {
684 for (int i = 0; i < as.length; i++) {
685 short a = as[i];
686 short b = bs[i];
687 rs[i] = (short)(ROR_scalar(a,b));
688 }
689 }
690
691 bh.consume(rs);
692 }
693
694 @Benchmark
695 public void RORMasked(Blackhole bh) {
696 short[] as = fa.apply(size);
697 short[] bs = fb.apply(size);
698 short[] rs = fr.apply(size);
699 boolean[] ms = fm.apply(size);
700
701 for (int ic = 0; ic < INVOC_COUNT; ic++) {
702 for (int i = 0; i < as.length; i++) {
703 short a = as[i];
704 short b = bs[i];
705 if (ms[i % ms.length]) {
706 rs[i] = (short)(ROR_scalar(a,b));
707 } else {
708 rs[i] = a;
709 }
710 }
711 }
712 bh.consume(rs);
713 }
714
715 @Benchmark
716 public void ROL(Blackhole bh) {
717 short[] as = fa.apply(size);
718 short[] bs = fb.apply(size);
719 short[] rs = fr.apply(size);
720
721 for (int ic = 0; ic < INVOC_COUNT; ic++) {
722 for (int i = 0; i < as.length; i++) {
723 short a = as[i];
724 short b = bs[i];
725 rs[i] = (short)(ROL_scalar(a,b));
726 }
727 }
728
729 bh.consume(rs);
730 }
731
732 @Benchmark
733 public void ROLMasked(Blackhole bh) {
734 short[] as = fa.apply(size);
735 short[] bs = fb.apply(size);
736 short[] rs = fr.apply(size);
737 boolean[] ms = fm.apply(size);
738
739 for (int ic = 0; ic < INVOC_COUNT; ic++) {
740 for (int i = 0; i < as.length; i++) {
741 short a = as[i];
742 short b = bs[i];
743 if (ms[i % ms.length]) {
744 rs[i] = (short)(ROL_scalar(a,b));
745 } else {
746 rs[i] = a;
747 }
748 }
749 }
750 bh.consume(rs);
751 }
752
753 @Benchmark
754 public void RORShift(Blackhole bh) {
755 short[] as = fa.apply(size);
756 short[] bs = fb.apply(size);
757 short[] rs = fr.apply(size);
758
759 for (int ic = 0; ic < INVOC_COUNT; ic++) {
760 for (int i = 0; i < as.length; i++) {
761 short a = as[i];
762 short b = bs[i];
763 rs[i] = (short)(ROR_scalar(a, b));
764 }
765 }
766
767 bh.consume(rs);
768 }
769
770 @Benchmark
771 public void RORMaskedShift(Blackhole bh) {
772 short[] as = fa.apply(size);
773 short[] bs = fb.apply(size);
774 short[] rs = fr.apply(size);
775 boolean[] ms = fm.apply(size);
776
777 for (int ic = 0; ic < INVOC_COUNT; ic++) {
778 for (int i = 0; i < as.length; i++) {
779 short a = as[i];
780 short b = bs[i];
781 boolean m = ms[i % ms.length];
782 rs[i] = (m ? (short)(ROR_scalar(a, b)) : a);
783 }
784 }
785
786 bh.consume(rs);
787 }
788
789 @Benchmark
790 public void ROLShift(Blackhole bh) {
791 short[] as = fa.apply(size);
792 short[] bs = fb.apply(size);
793 short[] rs = fr.apply(size);
794
795 for (int ic = 0; ic < INVOC_COUNT; ic++) {
796 for (int i = 0; i < as.length; i++) {
797 short a = as[i];
798 short b = bs[i];
799 rs[i] = (short)(ROL_scalar(a, b));
800 }
801 }
802
803 bh.consume(rs);
804 }
805
806 @Benchmark
807 public void ROLMaskedShift(Blackhole bh) {
808 short[] as = fa.apply(size);
809 short[] bs = fb.apply(size);
810 short[] rs = fr.apply(size);
811 boolean[] ms = fm.apply(size);
812
813 for (int ic = 0; ic < INVOC_COUNT; ic++) {
814 for (int i = 0; i < as.length; i++) {
815 short a = as[i];
816 short b = bs[i];
817 boolean m = ms[i % ms.length];
818 rs[i] = (m ? (short)(ROL_scalar(a, b)) : a);
819 }
820 }
821
822 bh.consume(rs);
823 }
824
825 @Benchmark
826 public void LSHRShiftConst(Blackhole bh) {
827 short[] as = fa.apply(size);
828 short[] bs = fb.apply(size);
829 short[] rs = fr.apply(size);
830
831 for (int ic = 0; ic < INVOC_COUNT; ic++) {
832 for (int i = 0; i < as.length; i++) {
833 short a = as[i];
834 short b = bs[i];
835 rs[i] = (short)(((a & 0xFFFF) >>> CONST_SHIFT));
836 }
837 }
838
839 bh.consume(rs);
840 }
841
842 @Benchmark
843 public void LSHRMaskedShiftConst(Blackhole bh) {
844 short[] as = fa.apply(size);
845 short[] bs = fb.apply(size);
846 short[] rs = fr.apply(size);
847 boolean[] ms = fm.apply(size);
848
849 for (int ic = 0; ic < INVOC_COUNT; ic++) {
850 for (int i = 0; i < as.length; i++) {
851 short a = as[i];
852 short b = bs[i];
853 boolean m = ms[i % ms.length];
854 rs[i] = (m ? (short)(((a & 0xFFFF) >>> CONST_SHIFT)) : a);
855 }
856 }
857
858 bh.consume(rs);
859 }
860
861 @Benchmark
862 public void LSHLShiftConst(Blackhole bh) {
863 short[] as = fa.apply(size);
864 short[] bs = fb.apply(size);
865 short[] rs = fr.apply(size);
866
867 for (int ic = 0; ic < INVOC_COUNT; ic++) {
868 for (int i = 0; i < as.length; i++) {
869 short a = as[i];
870 short b = bs[i];
871 rs[i] = (short)((a << CONST_SHIFT));
872 }
873 }
874
875 bh.consume(rs);
876 }
877
878 @Benchmark
879 public void LSHLMaskedShiftConst(Blackhole bh) {
880 short[] as = fa.apply(size);
881 short[] bs = fb.apply(size);
882 short[] rs = fr.apply(size);
883 boolean[] ms = fm.apply(size);
884
885 for (int ic = 0; ic < INVOC_COUNT; ic++) {
886 for (int i = 0; i < as.length; i++) {
887 short a = as[i];
888 short b = bs[i];
889 boolean m = ms[i % ms.length];
890 rs[i] = (m ? (short)((a << CONST_SHIFT)) : a);
891 }
892 }
893
894 bh.consume(rs);
895 }
896
897 @Benchmark
898 public void ASHRShiftConst(Blackhole bh) {
899 short[] as = fa.apply(size);
900 short[] bs = fb.apply(size);
901 short[] rs = fr.apply(size);
902
903 for (int ic = 0; ic < INVOC_COUNT; ic++) {
904 for (int i = 0; i < as.length; i++) {
905 short a = as[i];
906 short b = bs[i];
907 rs[i] = (short)((a >> CONST_SHIFT));
908 }
909 }
910
911 bh.consume(rs);
912 }
913
914 @Benchmark
915 public void ASHRMaskedShiftConst(Blackhole bh) {
916 short[] as = fa.apply(size);
917 short[] bs = fb.apply(size);
918 short[] rs = fr.apply(size);
919 boolean[] ms = fm.apply(size);
920
921 for (int ic = 0; ic < INVOC_COUNT; ic++) {
922 for (int i = 0; i < as.length; i++) {
923 short a = as[i];
924 short b = bs[i];
925 boolean m = ms[i % ms.length];
926 rs[i] = (m ? (short)((a >> CONST_SHIFT)) : a);
927 }
928 }
929
930 bh.consume(rs);
931 }
932
933 @Benchmark
934 public void RORShiftConst(Blackhole bh) {
935 short[] as = fa.apply(size);
936 short[] bs = fb.apply(size);
937 short[] rs = fr.apply(size);
938
939 for (int ic = 0; ic < INVOC_COUNT; ic++) {
940 for (int i = 0; i < as.length; i++) {
941 short a = as[i];
942 short b = bs[i];
943 rs[i] = (short)(ROR_scalar(a, CONST_SHIFT));
944 }
945 }
946
947 bh.consume(rs);
948 }
949
950 @Benchmark
951 public void RORMaskedShiftConst(Blackhole bh) {
952 short[] as = fa.apply(size);
953 short[] bs = fb.apply(size);
954 short[] rs = fr.apply(size);
955 boolean[] ms = fm.apply(size);
956
957 for (int ic = 0; ic < INVOC_COUNT; ic++) {
958 for (int i = 0; i < as.length; i++) {
959 short a = as[i];
960 short b = bs[i];
961 boolean m = ms[i % ms.length];
962 rs[i] = (m ? (short)(ROR_scalar(a, CONST_SHIFT)) : a);
963 }
964 }
965
966 bh.consume(rs);
967 }
968
969 @Benchmark
970 public void ROLShiftConst(Blackhole bh) {
971 short[] as = fa.apply(size);
972 short[] bs = fb.apply(size);
973 short[] rs = fr.apply(size);
974
975 for (int ic = 0; ic < INVOC_COUNT; ic++) {
976 for (int i = 0; i < as.length; i++) {
977 short a = as[i];
978 short b = bs[i];
979 rs[i] = (short)(ROL_scalar(a, CONST_SHIFT));
980 }
981 }
982
983 bh.consume(rs);
984 }
985
986 @Benchmark
987 public void ROLMaskedShiftConst(Blackhole bh) {
988 short[] as = fa.apply(size);
989 short[] bs = fb.apply(size);
990 short[] rs = fr.apply(size);
991 boolean[] ms = fm.apply(size);
992
993 for (int ic = 0; ic < INVOC_COUNT; ic++) {
994 for (int i = 0; i < as.length; i++) {
995 short a = as[i];
996 short b = bs[i];
997 boolean m = ms[i % ms.length];
998 rs[i] = (m ? (short)(ROL_scalar(a, CONST_SHIFT)) : a);
999 }
1000 }
1001
1002 bh.consume(rs);
1003 }
1004
1005 @Benchmark
1006 public void MIN(Blackhole bh) {
1007 short[] as = fa.apply(size);
1008 short[] bs = fb.apply(size);
1009 short[] rs = fr.apply(size);
1010
1011 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1012 for (int i = 0; i < as.length; i++) {
1013 short a = as[i];
1014 short b = bs[i];
1015 rs[i] = (short)(Math.min(a, b));
1016 }
1017 }
1018
1019 bh.consume(rs);
1020 }
1021
1022 @Benchmark
1023 public void MAX(Blackhole bh) {
1024 short[] as = fa.apply(size);
1025 short[] bs = fb.apply(size);
1026 short[] rs = fr.apply(size);
1027
1028 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1029 for (int i = 0; i < as.length; i++) {
1030 short a = as[i];
1031 short b = bs[i];
1032 rs[i] = (short)(Math.max(a, b));
1033 }
1034 }
1035
1036 bh.consume(rs);
1037 }
1038
1039 @Benchmark
1040 public void UMIN(Blackhole bh) {
1041 short[] as = fa.apply(size);
1042 short[] bs = fb.apply(size);
1043 short[] rs = fr.apply(size);
1044
1045 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1046 for (int i = 0; i < as.length; i++) {
1047 short a = as[i];
1048 short b = bs[i];
1049 rs[i] = (short)(VectorMath.minUnsigned(a, b));
1050 }
1051 }
1052
1053 bh.consume(rs);
1054 }
1055
1056 @Benchmark
1057 public void UMINMasked(Blackhole bh) {
1058 short[] as = fa.apply(size);
1059 short[] bs = fb.apply(size);
1060 short[] rs = fr.apply(size);
1061 boolean[] ms = fm.apply(size);
1062
1063 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1064 for (int i = 0; i < as.length; i++) {
1065 short a = as[i];
1066 short b = bs[i];
1067 if (ms[i % ms.length]) {
1068 rs[i] = (short)(VectorMath.minUnsigned(a, b));
1069 } else {
1070 rs[i] = a;
1071 }
1072 }
1073 }
1074 bh.consume(rs);
1075 }
1076
1077 @Benchmark
1078 public void UMAX(Blackhole bh) {
1079 short[] as = fa.apply(size);
1080 short[] bs = fb.apply(size);
1081 short[] rs = fr.apply(size);
1082
1083 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1084 for (int i = 0; i < as.length; i++) {
1085 short a = as[i];
1086 short b = bs[i];
1087 rs[i] = (short)(VectorMath.maxUnsigned(a, b));
1088 }
1089 }
1090
1091 bh.consume(rs);
1092 }
1093
1094 @Benchmark
1095 public void UMAXMasked(Blackhole bh) {
1096 short[] as = fa.apply(size);
1097 short[] bs = fb.apply(size);
1098 short[] rs = fr.apply(size);
1099 boolean[] ms = fm.apply(size);
1100
1101 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1102 for (int i = 0; i < as.length; i++) {
1103 short a = as[i];
1104 short b = bs[i];
1105 if (ms[i % ms.length]) {
1106 rs[i] = (short)(VectorMath.maxUnsigned(a, b));
1107 } else {
1108 rs[i] = a;
1109 }
1110 }
1111 }
1112 bh.consume(rs);
1113 }
1114
1115 @Benchmark
1116 public void ANDLanes(Blackhole bh) {
1117 short[] as = fa.apply(size);
1118 short r = -1;
1119 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1120 r = -1;
1121 for (int i = 0; i < as.length; i++) {
1122 r &= as[i];
1123 }
1124 }
1125 bh.consume(r);
1126 }
1127
1128 @Benchmark
1129 public void ANDMaskedLanes(Blackhole bh) {
1130 short[] as = fa.apply(size);
1131 boolean[] ms = fm.apply(size);
1132 short r = -1;
1133 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1134 r = -1;
1135 for (int i = 0; i < as.length; i++) {
1136 if (ms[i % ms.length])
1137 r &= as[i];
1138 }
1139 }
1140 bh.consume(r);
1141 }
1142
1143 @Benchmark
1144 public void ORLanes(Blackhole bh) {
1145 short[] as = fa.apply(size);
1146 short r = 0;
1147 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1148 r = 0;
1149 for (int i = 0; i < as.length; i++) {
1150 r |= as[i];
1151 }
1152 }
1153 bh.consume(r);
1154 }
1155
1156 @Benchmark
1157 public void ORMaskedLanes(Blackhole bh) {
1158 short[] as = fa.apply(size);
1159 boolean[] ms = fm.apply(size);
1160 short r = 0;
1161 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1162 r = 0;
1163 for (int i = 0; i < as.length; i++) {
1164 if (ms[i % ms.length])
1165 r |= as[i];
1166 }
1167 }
1168 bh.consume(r);
1169 }
1170
1171 @Benchmark
1172 public void XORLanes(Blackhole bh) {
1173 short[] as = fa.apply(size);
1174 short r = 0;
1175 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1176 r = 0;
1177 for (int i = 0; i < as.length; i++) {
1178 r ^= as[i];
1179 }
1180 }
1181 bh.consume(r);
1182 }
1183
1184 @Benchmark
1185 public void XORMaskedLanes(Blackhole bh) {
1186 short[] as = fa.apply(size);
1187 boolean[] ms = fm.apply(size);
1188 short r = 0;
1189 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1190 r = 0;
1191 for (int i = 0; i < as.length; i++) {
1192 if (ms[i % ms.length])
1193 r ^= as[i];
1194 }
1195 }
1196 bh.consume(r);
1197 }
1198
1199 @Benchmark
1200 public void ADDLanes(Blackhole bh) {
1201 short[] as = fa.apply(size);
1202 short r = 0;
1203 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1204 r = 0;
1205 for (int i = 0; i < as.length; i++) {
1206 r += as[i];
1207 }
1208 }
1209 bh.consume(r);
1210 }
1211
1212 @Benchmark
1213 public void ADDMaskedLanes(Blackhole bh) {
1214 short[] as = fa.apply(size);
1215 boolean[] ms = fm.apply(size);
1216 short r = 0;
1217 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1218 r = 0;
1219 for (int i = 0; i < as.length; i++) {
1220 if (ms[i % ms.length])
1221 r += as[i];
1222 }
1223 }
1224 bh.consume(r);
1225 }
1226
1227 @Benchmark
1228 public void MULLanes(Blackhole bh) {
1229 short[] as = fa.apply(size);
1230 short r = 1;
1231 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1232 r = 1;
1233 for (int i = 0; i < as.length; i++) {
1234 r *= as[i];
1235 }
1236 }
1237 bh.consume(r);
1238 }
1239
1240 @Benchmark
1241 public void MULMaskedLanes(Blackhole bh) {
1242 short[] as = fa.apply(size);
1243 boolean[] ms = fm.apply(size);
1244 short r = 1;
1245 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1246 r = 1;
1247 for (int i = 0; i < as.length; i++) {
1248 if (ms[i % ms.length])
1249 r *= as[i];
1250 }
1251 }
1252 bh.consume(r);
1253 }
1254
1255 @Benchmark
1256 public void anyTrue(Blackhole bh) {
1257 boolean[] ms = fm.apply(size);
1258 boolean r = false;
1259 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1260 r = false;
1261 for (int i = 0; i < ms.length; i++) {
1262 r |= ms[i];
1263 }
1264 }
1265 bh.consume(r);
1266 }
1267
1268 @Benchmark
1269 public void allTrue(Blackhole bh) {
1270 boolean[] ms = fm.apply(size);
1271 boolean r = true;
1272 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1273 r = true;
1274 for (int i = 0; i < ms.length; i++) {
1275 r &= ms[i];
1276 }
1277 }
1278 bh.consume(r);
1279 }
1280
1281 @Benchmark
1282 public void IS_DEFAULT(Blackhole bh) {
1283 short[] as = fa.apply(size);
1284 boolean r = true;
1285
1286 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1287 for (int i = 0; i < as.length; i++) {
1288 short a = as[i];
1289 r &= (bits(a)==0); // accumulate so JIT can't eliminate the computation
1290 }
1291 }
1292
1293 bh.consume(r);
1294 }
1295
1296 @Benchmark
1297 public void IS_NEGATIVE(Blackhole bh) {
1298 short[] as = fa.apply(size);
1299 boolean r = true;
1300
1301 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1302 for (int i = 0; i < as.length; i++) {
1303 short a = as[i];
1304 r &= (bits(a)<0); // accumulate so JIT can't eliminate the computation
1305 }
1306 }
1307
1308 bh.consume(r);
1309 }
1310
1311 @Benchmark
1312 public void LT(Blackhole bh) {
1313 short[] as = fa.apply(size);
1314 short[] bs = fb.apply(size);
1315 boolean r = true;
1316
1317 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1318 for (int i = 0; i < as.length; i++) {
1319 r &= lt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1320 }
1321 }
1322
1323 bh.consume(r);
1324 }
1325
1326 @Benchmark
1327 public void GT(Blackhole bh) {
1328 short[] as = fa.apply(size);
1329 short[] bs = fb.apply(size);
1330 boolean r = true;
1331
1332 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1333 for (int i = 0; i < as.length; i++) {
1334 r &= gt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1335 }
1336 }
1337
1338 bh.consume(r);
1339 }
1340
1341 @Benchmark
1342 public void EQ(Blackhole bh) {
1343 short[] as = fa.apply(size);
1344 short[] bs = fb.apply(size);
1345 boolean r = true;
1346
1347 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1348 for (int i = 0; i < as.length; i++) {
1349 r &= eq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1350 }
1351 }
1352
1353 bh.consume(r);
1354 }
1355
1356 @Benchmark
1357 public void NE(Blackhole bh) {
1358 short[] as = fa.apply(size);
1359 short[] bs = fb.apply(size);
1360 boolean r = true;
1361
1362 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1363 for (int i = 0; i < as.length; i++) {
1364 r &= neq(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1365 }
1366 }
1367
1368 bh.consume(r);
1369 }
1370
1371 @Benchmark
1372 public void LE(Blackhole bh) {
1373 short[] as = fa.apply(size);
1374 short[] bs = fb.apply(size);
1375 boolean r = true;
1376
1377 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1378 for (int i = 0; i < as.length; i++) {
1379 r &= le(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1380 }
1381 }
1382
1383 bh.consume(r);
1384 }
1385
1386 @Benchmark
1387 public void GE(Blackhole bh) {
1388 short[] as = fa.apply(size);
1389 short[] bs = fb.apply(size);
1390 boolean r = true;
1391
1392 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1393 for (int i = 0; i < as.length; i++) {
1394 r &= ge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1395 }
1396 }
1397
1398 bh.consume(r);
1399 }
1400
1401 @Benchmark
1402 public void ULT(Blackhole bh) {
1403 short[] as = fa.apply(size);
1404 short[] bs = fb.apply(size);
1405 boolean r = true;
1406
1407 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1408 for (int i = 0; i < as.length; i++) {
1409 r &= ult(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1410 }
1411 }
1412
1413 bh.consume(r);
1414 }
1415
1416 @Benchmark
1417 public void UGT(Blackhole bh) {
1418 short[] as = fa.apply(size);
1419 short[] bs = fb.apply(size);
1420 boolean r = true;
1421
1422 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1423 for (int i = 0; i < as.length; i++) {
1424 r &= ugt(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1425 }
1426 }
1427
1428 bh.consume(r);
1429 }
1430
1431 @Benchmark
1432 public void ULE(Blackhole bh) {
1433 short[] as = fa.apply(size);
1434 short[] bs = fb.apply(size);
1435 boolean r = true;
1436
1437 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1438 for (int i = 0; i < as.length; i++) {
1439 r &= ule(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1440 }
1441 }
1442
1443 bh.consume(r);
1444 }
1445
1446 @Benchmark
1447 public void UGE(Blackhole bh) {
1448 short[] as = fa.apply(size);
1449 short[] bs = fb.apply(size);
1450 boolean r = true;
1451
1452 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1453 for (int i = 0; i < as.length; i++) {
1454 r &= uge(as[i], bs[i]); // accumulate so JIT can't eliminate the computation
1455 }
1456 }
1457
1458 bh.consume(r);
1459 }
1460
1461 @Benchmark
1462 public void blend(Blackhole bh) {
1463 short[] as = fa.apply(size);
1464 short[] bs = fb.apply(size);
1465 short[] rs = fr.apply(size);
1466 boolean[] ms = fm.apply(size);
1467
1468 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1469 for (int i = 0; i < as.length; i++) {
1470 short a = as[i];
1471 short b = bs[i];
1472 boolean m = ms[i % ms.length];
1473 rs[i] = (m ? b : a);
1474 }
1475 }
1476
1477 bh.consume(rs);
1478 }
1479
1480 void rearrangeShared(int window, Blackhole bh) {
1481 short[] as = fa.apply(size);
1482 int[] order = fs.apply(size);
1483 short[] rs = fr.apply(size);
1484
1485 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1486 for (int i = 0; i < as.length; i += window) {
1487 for (int j = 0; j < window; j++) {
1488 short a = as[i+j];
1489 int pos = order[j];
1490 rs[i + pos] = a;
1491 }
1492 }
1493 }
1494
1495 bh.consume(rs);
1496 }
1497
1498 @Benchmark
1499 public void rearrange064(Blackhole bh) {
1500 int window = 64 / Short.SIZE;
1501 rearrangeShared(window, bh);
1502 }
1503
1504 @Benchmark
1505 public void rearrange128(Blackhole bh) {
1506 int window = 128 / Short.SIZE;
1507 rearrangeShared(window, bh);
1508 }
1509
1510 @Benchmark
1511 public void rearrange256(Blackhole bh) {
1512 int window = 256 / Short.SIZE;
1513 rearrangeShared(window, bh);
1514 }
1515
1516 @Benchmark
1517 public void rearrange512(Blackhole bh) {
1518 int window = 512 / Short.SIZE;
1519 rearrangeShared(window, bh);
1520 }
1521
1522 @Benchmark
1523 public void compressScalar(Blackhole bh) {
1524 short[] as = fa.apply(size);
1525 short[] rs = new short[size];
1526 boolean[] im = fmt.apply(size);
1527
1528 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1529 for (int i = 0, j = 0; i < as.length; i++) {
1530 if (im[i]) {
1531 rs[j++] = as[i];
1532 }
1533 }
1534 }
1535
1536 bh.consume(rs);
1537 }
1538
1539 @Benchmark
1540 public void expandScalar(Blackhole bh) {
1541 short[] as = fa.apply(size);
1542 short[] rs = new short[size];
1543 boolean[] im = fmt.apply(size);
1544
1545 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1546 for (int i = 0, j = 0; i < as.length; i++) {
1547 if (im[i]) {
1548 rs[i++] = as[j++];
1549 }
1550 }
1551 }
1552
1553 bh.consume(rs);
1554 }
1555
1556 @Benchmark
1557 public void maskCompressScalar(Blackhole bh) {
1558 boolean[] im = fmt.apply(size);
1559 boolean[] rm = new boolean[size];
1560
1561 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1562 for (int i = 0, j = 0; i < im.length; i++) {
1563 if (im[i]) {
1564 rm[j++] = im[i];
1565 }
1566 }
1567 }
1568
1569 bh.consume(rm);
1570 }
1571
1572 void broadcastShared(int window, Blackhole bh) {
1573 short[] as = fa.apply(size);
1574 short[] rs = fr.apply(size);
1575
1576 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1577 for (int i = 0; i < as.length; i += window) {
1578 int idx = i;
1579 for (int j = 0; j < window; j++) {
1580 rs[j] = as[idx];
1581 }
1582 }
1583 }
1584
1585 bh.consume(rs);
1586 }
1587
1588 @Benchmark
1589 public void broadcast064(Blackhole bh) {
1590 int window = 64 / Short.SIZE;
1591 broadcastShared(window, bh);
1592 }
1593
1594 @Benchmark
1595 public void broadcast128(Blackhole bh) {
1596 int window = 128 / Short.SIZE;
1597 broadcastShared(window, bh);
1598 }
1599
1600 @Benchmark
1601 public void broadcast256(Blackhole bh) {
1602 int window = 256 / Short.SIZE;
1603 broadcastShared(window, bh);
1604 }
1605
1606 @Benchmark
1607 public void broadcast512(Blackhole bh) {
1608 int window = 512 / Short.SIZE;
1609 broadcastShared(window, bh);
1610 }
1611
1612 @Benchmark
1613 public void zero(Blackhole bh) {
1614 short[] as = fa.apply(size);
1615
1616 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1617 for (int i = 0; i < as.length; i++) {
1618 as[i] = (short)0;
1619 }
1620 }
1621
1622 bh.consume(as);
1623 }
1624
1625 @Benchmark
1626 public void BITWISE_BLEND(Blackhole bh) {
1627 short[] as = fa.apply(size);
1628 short[] bs = fb.apply(size);
1629 short[] cs = fc.apply(size);
1630 short[] rs = fr.apply(size);
1631
1632 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1633 for (int i = 0; i < as.length; i++) {
1634 short a = as[i];
1635 short b = bs[i];
1636 short c = cs[i];
1637 rs[i] = (short)((a&~(c))|(b&c));
1638 }
1639 }
1640
1641 bh.consume(rs);
1642 }
1643
1644 @Benchmark
1645 public void BITWISE_BLENDMasked(Blackhole bh) {
1646 short[] as = fa.apply(size);
1647 short[] bs = fb.apply(size);
1648 short[] cs = fc.apply(size);
1649 short[] rs = fr.apply(size);
1650 boolean[] ms = fm.apply(size);
1651
1652 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1653 for (int i = 0; i < as.length; i++) {
1654 short a = as[i];
1655 short b = bs[i];
1656 short c = cs[i];
1657 if (ms[i % ms.length]) {
1658 rs[i] = (short)((a&~(c))|(b&c));
1659 } else {
1660 rs[i] = a;
1661 }
1662 }
1663 }
1664 bh.consume(rs);
1665 }
1666 @Benchmark
1667 public void NEG(Blackhole bh) {
1668 short[] as = fa.apply(size);
1669 short[] rs = fr.apply(size);
1670
1671 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1672 for (int i = 0; i < as.length; i++) {
1673 short a = as[i];
1674 rs[i] = (short)(-((short)a));
1675 }
1676 }
1677
1678 bh.consume(rs);
1679 }
1680
1681 @Benchmark
1682 public void NEGMasked(Blackhole bh) {
1683 short[] as = fa.apply(size);
1684 short[] rs = fr.apply(size);
1685 boolean[] ms = fm.apply(size);
1686
1687 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1688 for (int i = 0; i < as.length; i++) {
1689 short a = as[i];
1690 boolean m = ms[i % ms.length];
1691 rs[i] = (m ? (short)(-((short)a)) : a);
1692 }
1693 }
1694
1695 bh.consume(rs);
1696 }
1697 @Benchmark
1698 public void ABS(Blackhole bh) {
1699 short[] as = fa.apply(size);
1700 short[] rs = fr.apply(size);
1701
1702 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1703 for (int i = 0; i < as.length; i++) {
1704 short a = as[i];
1705 rs[i] = (short)(Math.abs((short)a));
1706 }
1707 }
1708
1709 bh.consume(rs);
1710 }
1711
1712 @Benchmark
1713 public void ABSMasked(Blackhole bh) {
1714 short[] as = fa.apply(size);
1715 short[] rs = fr.apply(size);
1716 boolean[] ms = fm.apply(size);
1717
1718 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1719 for (int i = 0; i < as.length; i++) {
1720 short a = as[i];
1721 boolean m = ms[i % ms.length];
1722 rs[i] = (m ? (short)(Math.abs((short)a)) : a);
1723 }
1724 }
1725
1726 bh.consume(rs);
1727 }
1728 @Benchmark
1729 public void NOT(Blackhole bh) {
1730 short[] as = fa.apply(size);
1731 short[] rs = fr.apply(size);
1732
1733 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1734 for (int i = 0; i < as.length; i++) {
1735 short a = as[i];
1736 rs[i] = (short)(~((short)a));
1737 }
1738 }
1739
1740 bh.consume(rs);
1741 }
1742
1743 @Benchmark
1744 public void NOTMasked(Blackhole bh) {
1745 short[] as = fa.apply(size);
1746 short[] rs = fr.apply(size);
1747 boolean[] ms = fm.apply(size);
1748
1749 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1750 for (int i = 0; i < as.length; i++) {
1751 short a = as[i];
1752 boolean m = ms[i % ms.length];
1753 rs[i] = (m ? (short)(~((short)a)) : a);
1754 }
1755 }
1756
1757 bh.consume(rs);
1758 }
1759 @Benchmark
1760 public void ZOMO(Blackhole bh) {
1761 short[] as = fa.apply(size);
1762 short[] rs = fr.apply(size);
1763
1764 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1765 for (int i = 0; i < as.length; i++) {
1766 short a = as[i];
1767 rs[i] = (short)((a==0?0:-1));
1768 }
1769 }
1770
1771 bh.consume(rs);
1772 }
1773
1774 @Benchmark
1775 public void ZOMOMasked(Blackhole bh) {
1776 short[] as = fa.apply(size);
1777 short[] rs = fr.apply(size);
1778 boolean[] ms = fm.apply(size);
1779
1780 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1781 for (int i = 0; i < as.length; i++) {
1782 short a = as[i];
1783 boolean m = ms[i % ms.length];
1784 rs[i] = (m ? (short)((a==0?0:-1)) : a);
1785 }
1786 }
1787
1788 bh.consume(rs);
1789 }
1790 @Benchmark
1791 public void BIT_COUNT(Blackhole bh) {
1792 short[] as = fa.apply(size);
1793 short[] rs = fr.apply(size);
1794
1795 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1796 for (int i = 0; i < as.length; i++) {
1797 short a = as[i];
1798 rs[i] = (short)(Integer.bitCount((int)a & 0xFFFF));
1799 }
1800 }
1801
1802 bh.consume(rs);
1803 }
1804
1805 @Benchmark
1806 public void BIT_COUNTMasked(Blackhole bh) {
1807 short[] as = fa.apply(size);
1808 short[] rs = fr.apply(size);
1809 boolean[] ms = fm.apply(size);
1810
1811 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1812 for (int i = 0; i < as.length; i++) {
1813 short a = as[i];
1814 boolean m = ms[i % ms.length];
1815 rs[i] = (m ? (short)(Integer.bitCount((int)a & 0xFFFF)) : a);
1816 }
1817 }
1818
1819 bh.consume(rs);
1820 }
1821 @Benchmark
1822 public void TRAILING_ZEROS_COUNT(Blackhole bh) {
1823 short[] as = fa.apply(size);
1824 short[] rs = fr.apply(size);
1825
1826 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1827 for (int i = 0; i < as.length; i++) {
1828 short a = as[i];
1829 rs[i] = (short)(TRAILING_ZEROS_COUNT_scalar(a));
1830 }
1831 }
1832
1833 bh.consume(rs);
1834 }
1835
1836 @Benchmark
1837 public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) {
1838 short[] as = fa.apply(size);
1839 short[] rs = fr.apply(size);
1840 boolean[] ms = fm.apply(size);
1841
1842 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1843 for (int i = 0; i < as.length; i++) {
1844 short a = as[i];
1845 boolean m = ms[i % ms.length];
1846 rs[i] = (m ? (short)(TRAILING_ZEROS_COUNT_scalar(a)) : a);
1847 }
1848 }
1849
1850 bh.consume(rs);
1851 }
1852 @Benchmark
1853 public void LEADING_ZEROS_COUNT(Blackhole bh) {
1854 short[] as = fa.apply(size);
1855 short[] rs = fr.apply(size);
1856
1857 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1858 for (int i = 0; i < as.length; i++) {
1859 short a = as[i];
1860 rs[i] = (short)(LEADING_ZEROS_COUNT_scalar(a));
1861 }
1862 }
1863
1864 bh.consume(rs);
1865 }
1866
1867 @Benchmark
1868 public void LEADING_ZEROS_COUNTMasked(Blackhole bh) {
1869 short[] as = fa.apply(size);
1870 short[] rs = fr.apply(size);
1871 boolean[] ms = fm.apply(size);
1872
1873 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1874 for (int i = 0; i < as.length; i++) {
1875 short a = as[i];
1876 boolean m = ms[i % ms.length];
1877 rs[i] = (m ? (short)(LEADING_ZEROS_COUNT_scalar(a)) : a);
1878 }
1879 }
1880
1881 bh.consume(rs);
1882 }
1883 @Benchmark
1884 public void REVERSE(Blackhole bh) {
1885 short[] as = fa.apply(size);
1886 short[] rs = fr.apply(size);
1887
1888 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1889 for (int i = 0; i < as.length; i++) {
1890 short a = as[i];
1891 rs[i] = (short)(REVERSE_scalar(a));
1892 }
1893 }
1894
1895 bh.consume(rs);
1896 }
1897
1898 @Benchmark
1899 public void REVERSEMasked(Blackhole bh) {
1900 short[] as = fa.apply(size);
1901 short[] rs = fr.apply(size);
1902 boolean[] ms = fm.apply(size);
1903
1904 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1905 for (int i = 0; i < as.length; i++) {
1906 short a = as[i];
1907 boolean m = ms[i % ms.length];
1908 rs[i] = (m ? (short)(REVERSE_scalar(a)) : a);
1909 }
1910 }
1911
1912 bh.consume(rs);
1913 }
1914 @Benchmark
1915 public void REVERSE_BYTES(Blackhole bh) {
1916 short[] as = fa.apply(size);
1917 short[] rs = fr.apply(size);
1918
1919 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1920 for (int i = 0; i < as.length; i++) {
1921 short a = as[i];
1922 rs[i] = (short)(Short.reverseBytes(a));
1923 }
1924 }
1925
1926 bh.consume(rs);
1927 }
1928
1929 @Benchmark
1930 public void REVERSE_BYTESMasked(Blackhole bh) {
1931 short[] as = fa.apply(size);
1932 short[] rs = fr.apply(size);
1933 boolean[] ms = fm.apply(size);
1934
1935 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1936 for (int i = 0; i < as.length; i++) {
1937 short a = as[i];
1938 boolean m = ms[i % ms.length];
1939 rs[i] = (m ? (short)(Short.reverseBytes(a)) : a);
1940 }
1941 }
1942
1943 bh.consume(rs);
1944 }
1945 }