1 /*
2 * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package org.openjdk.bench.jdk.incubator.vector.operation;
25
26 // -- This file was mechanically generated: Do not edit! -- //
27
28 import jdk.incubator.vector.Vector;
29 import jdk.incubator.vector.VectorMask;
30 import jdk.incubator.vector.VectorMath;
31 import jdk.incubator.vector.VectorOperators;
32 import jdk.incubator.vector.VectorShape;
33 import jdk.incubator.vector.VectorSpecies;
34 import jdk.incubator.vector.VectorShuffle;
35 import jdk.incubator.vector.ByteVector;
36
37 import java.util.concurrent.TimeUnit;
38 import java.util.function.BiFunction;
39 import java.util.function.IntFunction;
40
41 import org.openjdk.jmh.annotations.*;
42 import org.openjdk.jmh.infra.Blackhole;
43
44 @BenchmarkMode(Mode.Throughput)
45 @OutputTimeUnit(TimeUnit.MILLISECONDS)
46 @State(Scope.Benchmark)
47 @Warmup(iterations = 3, time = 1)
48 @Measurement(iterations = 5, time = 1)
49 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
50 public class Byte128Vector extends AbstractVectorBenchmark {
51 static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_128;
52
53 static final int INVOC_COUNT = 1; // get rid of outer loop
54
55 static ByteVector bcast_vec = ByteVector.broadcast(SPECIES, (byte)10);
56
57 static void replaceZero(byte[] a, byte v) {
58 for (int i = 0; i < a.length; i++) {
59 if (a[i] == 0) {
60 a[i] = v;
61 }
62 }
63 }
64
65 static void replaceZero(byte[] a, boolean[] mask, byte v) {
66 for (int i = 0; i < a.length; i++) {
67 if (mask[i % mask.length] && a[i] == 0) {
68 a[i] = v;
69 }
70 }
71 }
72
73 static byte firstNonZero(byte a, byte b) {
74 return Byte.compare(a, (byte) 0) != 0 ? a : b;
75 }
76
77 private static final byte CONST_SHIFT = Byte.SIZE / 2;
78
79 @Param("1024")
80 int size;
81
82 byte[] fill(IntFunction<Byte> f) {
83 byte[] array = new byte[size];
84 for (int i = 0; i < array.length; i++) {
85 array[i] = f.apply(i);
86 }
87 return array;
88 }
89
90 byte[] a, b, c, r;
91 boolean[] m, mt, rm;
92 int[] s;
93
94 @Setup
95 public void init() {
96 size += size % SPECIES.length(); // FIXME: add post-loops
97
98 a = fill(i -> (byte)(2*i));
99 b = fill(i -> (byte)(i+1));
100 c = fill(i -> (byte)(i+5));
101 r = fill(i -> (byte)0);
102
103 m = fillMask(size, i -> (i % 2) == 0);
104 mt = fillMask(size, i -> true);
105 rm = fillMask(size, i -> false);
106
107 s = fillInt(size, i -> RAND.nextInt(SPECIES.length()));
108 }
109
110 final IntFunction<byte[]> fa = vl -> a;
111 final IntFunction<byte[]> fb = vl -> b;
112 final IntFunction<byte[]> fc = vl -> c;
113 final IntFunction<byte[]> fr = vl -> r;
114 final IntFunction<boolean[]> fm = vl -> m;
115 final IntFunction<boolean[]> fmt = vl -> mt;
116 final IntFunction<boolean[]> fmr = vl -> rm;
117 final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
118
119
120 @Benchmark
121 public void ADD(Blackhole bh) {
122 byte[] a = fa.apply(SPECIES.length());
123 byte[] b = fb.apply(SPECIES.length());
124 byte[] r = fr.apply(SPECIES.length());
125
126 for (int ic = 0; ic < INVOC_COUNT; ic++) {
127 for (int i = 0; i < a.length; i += SPECIES.length()) {
128 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
129 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
130 av.lanewise(VectorOperators.ADD, bv).intoArray(r, i);
131 }
132 }
133
134 bh.consume(r);
135 }
136
137 @Benchmark
138 public void ADDMasked(Blackhole bh) {
139 byte[] a = fa.apply(SPECIES.length());
140 byte[] b = fb.apply(SPECIES.length());
141 byte[] r = fr.apply(SPECIES.length());
142 boolean[] mask = fm.apply(SPECIES.length());
143 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
144
145 for (int ic = 0; ic < INVOC_COUNT; ic++) {
146 for (int i = 0; i < a.length; i += SPECIES.length()) {
147 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
148 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
149 av.lanewise(VectorOperators.ADD, bv, vmask).intoArray(r, i);
150 }
151 }
152
153 bh.consume(r);
154 }
155
156 @Benchmark
157 public void SUB(Blackhole bh) {
158 byte[] a = fa.apply(SPECIES.length());
159 byte[] b = fb.apply(SPECIES.length());
160 byte[] r = fr.apply(SPECIES.length());
161
162 for (int ic = 0; ic < INVOC_COUNT; ic++) {
163 for (int i = 0; i < a.length; i += SPECIES.length()) {
164 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
165 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
166 av.lanewise(VectorOperators.SUB, bv).intoArray(r, i);
167 }
168 }
169
170 bh.consume(r);
171 }
172
173 @Benchmark
174 public void SUBMasked(Blackhole bh) {
175 byte[] a = fa.apply(SPECIES.length());
176 byte[] b = fb.apply(SPECIES.length());
177 byte[] r = fr.apply(SPECIES.length());
178 boolean[] mask = fm.apply(SPECIES.length());
179 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
180
181 for (int ic = 0; ic < INVOC_COUNT; ic++) {
182 for (int i = 0; i < a.length; i += SPECIES.length()) {
183 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
184 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
185 av.lanewise(VectorOperators.SUB, bv, vmask).intoArray(r, i);
186 }
187 }
188
189 bh.consume(r);
190 }
191
192 @Benchmark
193 public void MUL(Blackhole bh) {
194 byte[] a = fa.apply(SPECIES.length());
195 byte[] b = fb.apply(SPECIES.length());
196 byte[] r = fr.apply(SPECIES.length());
197
198 for (int ic = 0; ic < INVOC_COUNT; ic++) {
199 for (int i = 0; i < a.length; i += SPECIES.length()) {
200 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
201 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
202 av.lanewise(VectorOperators.MUL, bv).intoArray(r, i);
203 }
204 }
205
206 bh.consume(r);
207 }
208
209 @Benchmark
210 public void MULMasked(Blackhole bh) {
211 byte[] a = fa.apply(SPECIES.length());
212 byte[] b = fb.apply(SPECIES.length());
213 byte[] r = fr.apply(SPECIES.length());
214 boolean[] mask = fm.apply(SPECIES.length());
215 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
216
217 for (int ic = 0; ic < INVOC_COUNT; ic++) {
218 for (int i = 0; i < a.length; i += SPECIES.length()) {
219 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
220 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
221 av.lanewise(VectorOperators.MUL, bv, vmask).intoArray(r, i);
222 }
223 }
224
225 bh.consume(r);
226 }
227
228 @Benchmark
229 public void DIV(Blackhole bh) {
230 byte[] a = fa.apply(SPECIES.length());
231 byte[] b = fb.apply(SPECIES.length());
232 byte[] r = fr.apply(SPECIES.length());
233
234 replaceZero(b, (byte) 1);
235
236 for (int ic = 0; ic < INVOC_COUNT; ic++) {
237 for (int i = 0; i < a.length; i += SPECIES.length()) {
238 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
239 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
240 av.lanewise(VectorOperators.DIV, bv).intoArray(r, i);
241 }
242 }
243
244 bh.consume(r);
245 }
246
247 @Benchmark
248 public void DIVMasked(Blackhole bh) {
249 byte[] a = fa.apply(SPECIES.length());
250 byte[] b = fb.apply(SPECIES.length());
251 byte[] r = fr.apply(SPECIES.length());
252 boolean[] mask = fm.apply(SPECIES.length());
253 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
254
255 replaceZero(b, mask, (byte) 1);
256
257 for (int ic = 0; ic < INVOC_COUNT; ic++) {
258 for (int i = 0; i < a.length; i += SPECIES.length()) {
259 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
260 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
261 av.lanewise(VectorOperators.DIV, bv, vmask).intoArray(r, i);
262 }
263 }
264
265 bh.consume(r);
266 }
267
268 @Benchmark
269 public void FIRST_NONZERO(Blackhole bh) {
270 byte[] a = fa.apply(SPECIES.length());
271 byte[] b = fb.apply(SPECIES.length());
272 byte[] r = fr.apply(SPECIES.length());
273
274 for (int ic = 0; ic < INVOC_COUNT; ic++) {
275 for (int i = 0; i < a.length; i += SPECIES.length()) {
276 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
277 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
278 av.lanewise(VectorOperators.FIRST_NONZERO, bv).intoArray(r, i);
279 }
280 }
281
282 bh.consume(r);
283 }
284
285 @Benchmark
286 public void FIRST_NONZEROMasked(Blackhole bh) {
287 byte[] a = fa.apply(SPECIES.length());
288 byte[] b = fb.apply(SPECIES.length());
289 byte[] r = fr.apply(SPECIES.length());
290 boolean[] mask = fm.apply(SPECIES.length());
291 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
292
293 for (int ic = 0; ic < INVOC_COUNT; ic++) {
294 for (int i = 0; i < a.length; i += SPECIES.length()) {
295 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
296 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
297 av.lanewise(VectorOperators.FIRST_NONZERO, bv, vmask).intoArray(r, i);
298 }
299 }
300
301 bh.consume(r);
302 }
303
304 @Benchmark
305 public void AND(Blackhole bh) {
306 byte[] a = fa.apply(SPECIES.length());
307 byte[] b = fb.apply(SPECIES.length());
308 byte[] r = fr.apply(SPECIES.length());
309
310 for (int ic = 0; ic < INVOC_COUNT; ic++) {
311 for (int i = 0; i < a.length; i += SPECIES.length()) {
312 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
313 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
314 av.lanewise(VectorOperators.AND, bv).intoArray(r, i);
315 }
316 }
317
318 bh.consume(r);
319 }
320
321 @Benchmark
322 public void ANDMasked(Blackhole bh) {
323 byte[] a = fa.apply(SPECIES.length());
324 byte[] b = fb.apply(SPECIES.length());
325 byte[] r = fr.apply(SPECIES.length());
326 boolean[] mask = fm.apply(SPECIES.length());
327 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
328
329 for (int ic = 0; ic < INVOC_COUNT; ic++) {
330 for (int i = 0; i < a.length; i += SPECIES.length()) {
331 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
332 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
333 av.lanewise(VectorOperators.AND, bv, vmask).intoArray(r, i);
334 }
335 }
336
337 bh.consume(r);
338 }
339
340 @Benchmark
341 public void AND_NOT(Blackhole bh) {
342 byte[] a = fa.apply(SPECIES.length());
343 byte[] b = fb.apply(SPECIES.length());
344 byte[] r = fr.apply(SPECIES.length());
345
346 for (int ic = 0; ic < INVOC_COUNT; ic++) {
347 for (int i = 0; i < a.length; i += SPECIES.length()) {
348 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
349 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
350 av.lanewise(VectorOperators.AND_NOT, bv).intoArray(r, i);
351 }
352 }
353
354 bh.consume(r);
355 }
356
357 @Benchmark
358 public void AND_NOTMasked(Blackhole bh) {
359 byte[] a = fa.apply(SPECIES.length());
360 byte[] b = fb.apply(SPECIES.length());
361 byte[] r = fr.apply(SPECIES.length());
362 boolean[] mask = fm.apply(SPECIES.length());
363 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
364
365 for (int ic = 0; ic < INVOC_COUNT; ic++) {
366 for (int i = 0; i < a.length; i += SPECIES.length()) {
367 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
368 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
369 av.lanewise(VectorOperators.AND_NOT, bv, vmask).intoArray(r, i);
370 }
371 }
372
373 bh.consume(r);
374 }
375
376 @Benchmark
377 public void OR(Blackhole bh) {
378 byte[] a = fa.apply(SPECIES.length());
379 byte[] b = fb.apply(SPECIES.length());
380 byte[] r = fr.apply(SPECIES.length());
381
382 for (int ic = 0; ic < INVOC_COUNT; ic++) {
383 for (int i = 0; i < a.length; i += SPECIES.length()) {
384 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
385 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
386 av.lanewise(VectorOperators.OR, bv).intoArray(r, i);
387 }
388 }
389
390 bh.consume(r);
391 }
392
393 @Benchmark
394 public void ORMasked(Blackhole bh) {
395 byte[] a = fa.apply(SPECIES.length());
396 byte[] b = fb.apply(SPECIES.length());
397 byte[] r = fr.apply(SPECIES.length());
398 boolean[] mask = fm.apply(SPECIES.length());
399 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
400
401 for (int ic = 0; ic < INVOC_COUNT; ic++) {
402 for (int i = 0; i < a.length; i += SPECIES.length()) {
403 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
404 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
405 av.lanewise(VectorOperators.OR, bv, vmask).intoArray(r, i);
406 }
407 }
408
409 bh.consume(r);
410 }
411
412 @Benchmark
413 public void XOR(Blackhole bh) {
414 byte[] a = fa.apply(SPECIES.length());
415 byte[] b = fb.apply(SPECIES.length());
416 byte[] r = fr.apply(SPECIES.length());
417
418 for (int ic = 0; ic < INVOC_COUNT; ic++) {
419 for (int i = 0; i < a.length; i += SPECIES.length()) {
420 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
421 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
422 av.lanewise(VectorOperators.XOR, bv).intoArray(r, i);
423 }
424 }
425
426 bh.consume(r);
427 }
428
429 @Benchmark
430 public void XORMasked(Blackhole bh) {
431 byte[] a = fa.apply(SPECIES.length());
432 byte[] b = fb.apply(SPECIES.length());
433 byte[] r = fr.apply(SPECIES.length());
434 boolean[] mask = fm.apply(SPECIES.length());
435 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
436
437 for (int ic = 0; ic < INVOC_COUNT; ic++) {
438 for (int i = 0; i < a.length; i += SPECIES.length()) {
439 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
440 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
441 av.lanewise(VectorOperators.XOR, bv, vmask).intoArray(r, i);
442 }
443 }
444
445 bh.consume(r);
446 }
447
448 @Benchmark
449 public void LSHL(Blackhole bh) {
450 byte[] a = fa.apply(SPECIES.length());
451 byte[] b = fb.apply(SPECIES.length());
452 byte[] r = fr.apply(SPECIES.length());
453
454 for (int ic = 0; ic < INVOC_COUNT; ic++) {
455 for (int i = 0; i < a.length; i += SPECIES.length()) {
456 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
457 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
458 av.lanewise(VectorOperators.LSHL, bv).intoArray(r, i);
459 }
460 }
461
462 bh.consume(r);
463 }
464
465 @Benchmark
466 public void LSHLMasked(Blackhole bh) {
467 byte[] a = fa.apply(SPECIES.length());
468 byte[] b = fb.apply(SPECIES.length());
469 byte[] r = fr.apply(SPECIES.length());
470 boolean[] mask = fm.apply(SPECIES.length());
471 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
472
473 for (int ic = 0; ic < INVOC_COUNT; ic++) {
474 for (int i = 0; i < a.length; i += SPECIES.length()) {
475 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
476 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
477 av.lanewise(VectorOperators.LSHL, bv, vmask).intoArray(r, i);
478 }
479 }
480
481 bh.consume(r);
482 }
483
484 @Benchmark
485 public void ASHR(Blackhole bh) {
486 byte[] a = fa.apply(SPECIES.length());
487 byte[] b = fb.apply(SPECIES.length());
488 byte[] r = fr.apply(SPECIES.length());
489
490 for (int ic = 0; ic < INVOC_COUNT; ic++) {
491 for (int i = 0; i < a.length; i += SPECIES.length()) {
492 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
493 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
494 av.lanewise(VectorOperators.ASHR, bv).intoArray(r, i);
495 }
496 }
497
498 bh.consume(r);
499 }
500
501 @Benchmark
502 public void ASHRMasked(Blackhole bh) {
503 byte[] a = fa.apply(SPECIES.length());
504 byte[] b = fb.apply(SPECIES.length());
505 byte[] r = fr.apply(SPECIES.length());
506 boolean[] mask = fm.apply(SPECIES.length());
507 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
508
509 for (int ic = 0; ic < INVOC_COUNT; ic++) {
510 for (int i = 0; i < a.length; i += SPECIES.length()) {
511 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
512 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
513 av.lanewise(VectorOperators.ASHR, bv, vmask).intoArray(r, i);
514 }
515 }
516
517 bh.consume(r);
518 }
519
520 @Benchmark
521 public void LSHR(Blackhole bh) {
522 byte[] a = fa.apply(SPECIES.length());
523 byte[] b = fb.apply(SPECIES.length());
524 byte[] r = fr.apply(SPECIES.length());
525
526 for (int ic = 0; ic < INVOC_COUNT; ic++) {
527 for (int i = 0; i < a.length; i += SPECIES.length()) {
528 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
529 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
530 av.lanewise(VectorOperators.LSHR, bv).intoArray(r, i);
531 }
532 }
533
534 bh.consume(r);
535 }
536
537 @Benchmark
538 public void LSHRMasked(Blackhole bh) {
539 byte[] a = fa.apply(SPECIES.length());
540 byte[] b = fb.apply(SPECIES.length());
541 byte[] r = fr.apply(SPECIES.length());
542 boolean[] mask = fm.apply(SPECIES.length());
543 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
544
545 for (int ic = 0; ic < INVOC_COUNT; ic++) {
546 for (int i = 0; i < a.length; i += SPECIES.length()) {
547 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
548 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
549 av.lanewise(VectorOperators.LSHR, bv, vmask).intoArray(r, i);
550 }
551 }
552
553 bh.consume(r);
554 }
555
556 @Benchmark
557 public void LSHLShift(Blackhole bh) {
558 byte[] a = fa.apply(SPECIES.length());
559 byte[] b = fb.apply(SPECIES.length());
560 byte[] r = fr.apply(SPECIES.length());
561
562 for (int ic = 0; ic < INVOC_COUNT; ic++) {
563 for (int i = 0; i < a.length; i += SPECIES.length()) {
564 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
565 av.lanewise(VectorOperators.LSHL, (int)b[i]).intoArray(r, i);
566 }
567 }
568
569 bh.consume(r);
570 }
571
572 @Benchmark
573 public void LSHLMaskedShift(Blackhole bh) {
574 byte[] a = fa.apply(SPECIES.length());
575 byte[] b = fb.apply(SPECIES.length());
576 byte[] r = fr.apply(SPECIES.length());
577 boolean[] mask = fm.apply(SPECIES.length());
578 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
579
580 for (int ic = 0; ic < INVOC_COUNT; ic++) {
581 for (int i = 0; i < a.length; i += SPECIES.length()) {
582 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
583 av.lanewise(VectorOperators.LSHL, (int)b[i], vmask).intoArray(r, i);
584 }
585 }
586
587 bh.consume(r);
588 }
589
590 @Benchmark
591 public void LSHRShift(Blackhole bh) {
592 byte[] a = fa.apply(SPECIES.length());
593 byte[] b = fb.apply(SPECIES.length());
594 byte[] r = fr.apply(SPECIES.length());
595
596 for (int ic = 0; ic < INVOC_COUNT; ic++) {
597 for (int i = 0; i < a.length; i += SPECIES.length()) {
598 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
599 av.lanewise(VectorOperators.LSHR, (int)b[i]).intoArray(r, i);
600 }
601 }
602
603 bh.consume(r);
604 }
605
606 @Benchmark
607 public void LSHRMaskedShift(Blackhole bh) {
608 byte[] a = fa.apply(SPECIES.length());
609 byte[] b = fb.apply(SPECIES.length());
610 byte[] r = fr.apply(SPECIES.length());
611 boolean[] mask = fm.apply(SPECIES.length());
612 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
613
614 for (int ic = 0; ic < INVOC_COUNT; ic++) {
615 for (int i = 0; i < a.length; i += SPECIES.length()) {
616 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
617 av.lanewise(VectorOperators.LSHR, (int)b[i], vmask).intoArray(r, i);
618 }
619 }
620
621 bh.consume(r);
622 }
623
624 @Benchmark
625 public void ASHRShift(Blackhole bh) {
626 byte[] a = fa.apply(SPECIES.length());
627 byte[] b = fb.apply(SPECIES.length());
628 byte[] r = fr.apply(SPECIES.length());
629
630 for (int ic = 0; ic < INVOC_COUNT; ic++) {
631 for (int i = 0; i < a.length; i += SPECIES.length()) {
632 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
633 av.lanewise(VectorOperators.ASHR, (int)b[i]).intoArray(r, i);
634 }
635 }
636
637 bh.consume(r);
638 }
639
640 @Benchmark
641 public void ASHRMaskedShift(Blackhole bh) {
642 byte[] a = fa.apply(SPECIES.length());
643 byte[] b = fb.apply(SPECIES.length());
644 byte[] r = fr.apply(SPECIES.length());
645 boolean[] mask = fm.apply(SPECIES.length());
646 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
647
648 for (int ic = 0; ic < INVOC_COUNT; ic++) {
649 for (int i = 0; i < a.length; i += SPECIES.length()) {
650 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
651 av.lanewise(VectorOperators.ASHR, (int)b[i], vmask).intoArray(r, i);
652 }
653 }
654
655 bh.consume(r);
656 }
657
658 @Benchmark
659 public void ROR(Blackhole bh) {
660 byte[] a = fa.apply(SPECIES.length());
661 byte[] b = fb.apply(SPECIES.length());
662 byte[] r = fr.apply(SPECIES.length());
663
664 for (int ic = 0; ic < INVOC_COUNT; ic++) {
665 for (int i = 0; i < a.length; i += SPECIES.length()) {
666 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
667 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
668 av.lanewise(VectorOperators.ROR, bv).intoArray(r, i);
669 }
670 }
671
672 bh.consume(r);
673 }
674
675 @Benchmark
676 public void RORMasked(Blackhole bh) {
677 byte[] a = fa.apply(SPECIES.length());
678 byte[] b = fb.apply(SPECIES.length());
679 byte[] r = fr.apply(SPECIES.length());
680 boolean[] mask = fm.apply(SPECIES.length());
681 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
682
683 for (int ic = 0; ic < INVOC_COUNT; ic++) {
684 for (int i = 0; i < a.length; i += SPECIES.length()) {
685 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
686 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
687 av.lanewise(VectorOperators.ROR, bv, vmask).intoArray(r, i);
688 }
689 }
690
691 bh.consume(r);
692 }
693
694 @Benchmark
695 public void ROL(Blackhole bh) {
696 byte[] a = fa.apply(SPECIES.length());
697 byte[] b = fb.apply(SPECIES.length());
698 byte[] r = fr.apply(SPECIES.length());
699
700 for (int ic = 0; ic < INVOC_COUNT; ic++) {
701 for (int i = 0; i < a.length; i += SPECIES.length()) {
702 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
703 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
704 av.lanewise(VectorOperators.ROL, bv).intoArray(r, i);
705 }
706 }
707
708 bh.consume(r);
709 }
710
711 @Benchmark
712 public void ROLMasked(Blackhole bh) {
713 byte[] a = fa.apply(SPECIES.length());
714 byte[] b = fb.apply(SPECIES.length());
715 byte[] r = fr.apply(SPECIES.length());
716 boolean[] mask = fm.apply(SPECIES.length());
717 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
718
719 for (int ic = 0; ic < INVOC_COUNT; ic++) {
720 for (int i = 0; i < a.length; i += SPECIES.length()) {
721 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
722 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
723 av.lanewise(VectorOperators.ROL, bv, vmask).intoArray(r, i);
724 }
725 }
726
727 bh.consume(r);
728 }
729
730 @Benchmark
731 public void RORShift(Blackhole bh) {
732 byte[] a = fa.apply(SPECIES.length());
733 byte[] b = fb.apply(SPECIES.length());
734 byte[] r = fr.apply(SPECIES.length());
735
736 for (int ic = 0; ic < INVOC_COUNT; ic++) {
737 for (int i = 0; i < a.length; i += SPECIES.length()) {
738 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
739 av.lanewise(VectorOperators.ROR, (int)b[i]).intoArray(r, i);
740 }
741 }
742
743 bh.consume(r);
744 }
745
746 @Benchmark
747 public void RORMaskedShift(Blackhole bh) {
748 byte[] a = fa.apply(SPECIES.length());
749 byte[] b = fb.apply(SPECIES.length());
750 byte[] r = fr.apply(SPECIES.length());
751 boolean[] mask = fm.apply(SPECIES.length());
752 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
753
754 for (int ic = 0; ic < INVOC_COUNT; ic++) {
755 for (int i = 0; i < a.length; i += SPECIES.length()) {
756 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
757 av.lanewise(VectorOperators.ROR, (int)b[i], vmask).intoArray(r, i);
758 }
759 }
760
761 bh.consume(r);
762 }
763
764 @Benchmark
765 public void ROLShift(Blackhole bh) {
766 byte[] a = fa.apply(SPECIES.length());
767 byte[] b = fb.apply(SPECIES.length());
768 byte[] r = fr.apply(SPECIES.length());
769
770 for (int ic = 0; ic < INVOC_COUNT; ic++) {
771 for (int i = 0; i < a.length; i += SPECIES.length()) {
772 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
773 av.lanewise(VectorOperators.ROL, (int)b[i]).intoArray(r, i);
774 }
775 }
776
777 bh.consume(r);
778 }
779
780 @Benchmark
781 public void ROLMaskedShift(Blackhole bh) {
782 byte[] a = fa.apply(SPECIES.length());
783 byte[] b = fb.apply(SPECIES.length());
784 byte[] r = fr.apply(SPECIES.length());
785 boolean[] mask = fm.apply(SPECIES.length());
786 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
787
788 for (int ic = 0; ic < INVOC_COUNT; ic++) {
789 for (int i = 0; i < a.length; i += SPECIES.length()) {
790 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
791 av.lanewise(VectorOperators.ROL, (int)b[i], vmask).intoArray(r, i);
792 }
793 }
794
795 bh.consume(r);
796 }
797
798 @Benchmark
799 public void LSHRShiftConst(Blackhole bh) {
800 byte[] a = fa.apply(SPECIES.length());
801 byte[] r = fr.apply(SPECIES.length());
802
803 for (int ic = 0; ic < INVOC_COUNT; ic++) {
804 for (int i = 0; i < a.length; i += SPECIES.length()) {
805 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
806 av.lanewise(VectorOperators.LSHR, CONST_SHIFT).intoArray(r, i);
807 }
808 }
809
810 bh.consume(r);
811 }
812
813 @Benchmark
814 public void LSHRMaskedShiftConst(Blackhole bh) {
815 byte[] a = fa.apply(SPECIES.length());
816 byte[] r = fr.apply(SPECIES.length());
817 boolean[] mask = fm.apply(SPECIES.length());
818 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
819
820 for (int ic = 0; ic < INVOC_COUNT; ic++) {
821 for (int i = 0; i < a.length; i += SPECIES.length()) {
822 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
823 av.lanewise(VectorOperators.LSHR, CONST_SHIFT, vmask).intoArray(r, i);
824 }
825 }
826
827 bh.consume(r);
828 }
829
830 @Benchmark
831 public void LSHLShiftConst(Blackhole bh) {
832 byte[] a = fa.apply(SPECIES.length());
833 byte[] r = fr.apply(SPECIES.length());
834
835 for (int ic = 0; ic < INVOC_COUNT; ic++) {
836 for (int i = 0; i < a.length; i += SPECIES.length()) {
837 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
838 av.lanewise(VectorOperators.LSHL, CONST_SHIFT).intoArray(r, i);
839 }
840 }
841
842 bh.consume(r);
843 }
844
845 @Benchmark
846 public void LSHLMaskedShiftConst(Blackhole bh) {
847 byte[] a = fa.apply(SPECIES.length());
848 byte[] r = fr.apply(SPECIES.length());
849 boolean[] mask = fm.apply(SPECIES.length());
850 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
851
852 for (int ic = 0; ic < INVOC_COUNT; ic++) {
853 for (int i = 0; i < a.length; i += SPECIES.length()) {
854 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
855 av.lanewise(VectorOperators.LSHL, CONST_SHIFT, vmask).intoArray(r, i);
856 }
857 }
858
859 bh.consume(r);
860 }
861
862 @Benchmark
863 public void ASHRShiftConst(Blackhole bh) {
864 byte[] a = fa.apply(SPECIES.length());
865 byte[] r = fr.apply(SPECIES.length());
866
867 for (int ic = 0; ic < INVOC_COUNT; ic++) {
868 for (int i = 0; i < a.length; i += SPECIES.length()) {
869 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
870 av.lanewise(VectorOperators.ASHR, CONST_SHIFT).intoArray(r, i);
871 }
872 }
873
874 bh.consume(r);
875 }
876
877 @Benchmark
878 public void ASHRMaskedShiftConst(Blackhole bh) {
879 byte[] a = fa.apply(SPECIES.length());
880 byte[] r = fr.apply(SPECIES.length());
881 boolean[] mask = fm.apply(SPECIES.length());
882 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
883
884 for (int ic = 0; ic < INVOC_COUNT; ic++) {
885 for (int i = 0; i < a.length; i += SPECIES.length()) {
886 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
887 av.lanewise(VectorOperators.ASHR, CONST_SHIFT, vmask).intoArray(r, i);
888 }
889 }
890
891 bh.consume(r);
892 }
893
894 @Benchmark
895 public void RORShiftConst(Blackhole bh) {
896 byte[] a = fa.apply(SPECIES.length());
897 byte[] r = fr.apply(SPECIES.length());
898
899 for (int ic = 0; ic < INVOC_COUNT; ic++) {
900 for (int i = 0; i < a.length; i += SPECIES.length()) {
901 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
902 av.lanewise(VectorOperators.ROR, CONST_SHIFT).intoArray(r, i);
903 }
904 }
905
906 bh.consume(r);
907 }
908
909 @Benchmark
910 public void RORMaskedShiftConst(Blackhole bh) {
911 byte[] a = fa.apply(SPECIES.length());
912 byte[] r = fr.apply(SPECIES.length());
913 boolean[] mask = fm.apply(SPECIES.length());
914 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
915
916 for (int ic = 0; ic < INVOC_COUNT; ic++) {
917 for (int i = 0; i < a.length; i += SPECIES.length()) {
918 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
919 av.lanewise(VectorOperators.ROR, CONST_SHIFT, vmask).intoArray(r, i);
920 }
921 }
922
923 bh.consume(r);
924 }
925
926 @Benchmark
927 public void ROLShiftConst(Blackhole bh) {
928 byte[] a = fa.apply(SPECIES.length());
929 byte[] r = fr.apply(SPECIES.length());
930
931 for (int ic = 0; ic < INVOC_COUNT; ic++) {
932 for (int i = 0; i < a.length; i += SPECIES.length()) {
933 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
934 av.lanewise(VectorOperators.ROL, CONST_SHIFT).intoArray(r, i);
935 }
936 }
937
938 bh.consume(r);
939 }
940
941 @Benchmark
942 public void ROLMaskedShiftConst(Blackhole bh) {
943 byte[] a = fa.apply(SPECIES.length());
944 byte[] r = fr.apply(SPECIES.length());
945 boolean[] mask = fm.apply(SPECIES.length());
946 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
947
948 for (int ic = 0; ic < INVOC_COUNT; ic++) {
949 for (int i = 0; i < a.length; i += SPECIES.length()) {
950 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
951 av.lanewise(VectorOperators.ROL, CONST_SHIFT, vmask).intoArray(r, i);
952 }
953 }
954
955 bh.consume(r);
956 }
957
958 @Benchmark
959 public void MIN_MEM(Blackhole bh) {
960 byte[] a = fa.apply(SPECIES.length());
961 byte[] r = fr.apply(SPECIES.length());
962
963 for (int ic = 0; ic < INVOC_COUNT; ic++) {
964 for (int i = 0; i < a.length; i += SPECIES.length()) {
965 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
966 av.lanewise(VectorOperators.MIN, bcast_vec).intoArray(r, i);
967 }
968 }
969
970 bh.consume(r);
971 }
972
973 @Benchmark
974 public void MINMasked_MEM(Blackhole bh) {
975 byte[] a = fa.apply(SPECIES.length());
976 byte[] r = fr.apply(SPECIES.length());
977 boolean[] mask = fm.apply(SPECIES.length());
978 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
979
980 for (int ic = 0; ic < INVOC_COUNT; ic++) {
981 for (int i = 0; i < a.length; i += SPECIES.length()) {
982 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
983 av.lanewise(VectorOperators.MIN, bcast_vec, vmask).intoArray(r, i);
984 }
985 }
986
987 bh.consume(r);
988 }
989
990 @Benchmark
991 public void MAX_MEM(Blackhole bh) {
992 byte[] a = fa.apply(SPECIES.length());
993 byte[] r = fr.apply(SPECIES.length());
994
995 for (int ic = 0; ic < INVOC_COUNT; ic++) {
996 for (int i = 0; i < a.length; i += SPECIES.length()) {
997 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
998 av.lanewise(VectorOperators.MAX, bcast_vec).intoArray(r, i);
999 }
1000 }
1001
1002 bh.consume(r);
1003 }
1004
1005 @Benchmark
1006 public void MAXMasked_MEM(Blackhole bh) {
1007 byte[] a = fa.apply(SPECIES.length());
1008 byte[] r = fr.apply(SPECIES.length());
1009 boolean[] mask = fm.apply(SPECIES.length());
1010 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1011
1012 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1013 for (int i = 0; i < a.length; i += SPECIES.length()) {
1014 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1015 av.lanewise(VectorOperators.MAX, bcast_vec, vmask).intoArray(r, i);
1016 }
1017 }
1018
1019 bh.consume(r);
1020 }
1021
1022 @Benchmark
1023 public void MIN(Blackhole bh) {
1024 byte[] a = fa.apply(SPECIES.length());
1025 byte[] b = fb.apply(SPECIES.length());
1026 byte[] r = fr.apply(SPECIES.length());
1027
1028 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1029 for (int i = 0; i < a.length; i += SPECIES.length()) {
1030 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1031 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1032 av.lanewise(VectorOperators.MIN, bv).intoArray(r, i);
1033 }
1034 }
1035
1036 bh.consume(r);
1037 }
1038
1039 @Benchmark
1040 public void MAX(Blackhole bh) {
1041 byte[] a = fa.apply(SPECIES.length());
1042 byte[] b = fb.apply(SPECIES.length());
1043 byte[] r = fr.apply(SPECIES.length());
1044
1045 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1046 for (int i = 0; i < a.length; i += SPECIES.length()) {
1047 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1048 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1049 av.lanewise(VectorOperators.MAX, bv).intoArray(r, i);
1050 }
1051 }
1052
1053 bh.consume(r);
1054 }
1055
1056 @Benchmark
1057 public void UMIN(Blackhole bh) {
1058 byte[] a = fa.apply(SPECIES.length());
1059 byte[] b = fb.apply(SPECIES.length());
1060 byte[] r = fr.apply(SPECIES.length());
1061
1062 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1063 for (int i = 0; i < a.length; i += SPECIES.length()) {
1064 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1065 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1066 av.lanewise(VectorOperators.UMIN, bv).intoArray(r, i);
1067 }
1068 }
1069
1070 bh.consume(r);
1071 }
1072
1073 @Benchmark
1074 public void UMINMasked(Blackhole bh) {
1075 byte[] a = fa.apply(SPECIES.length());
1076 byte[] b = fb.apply(SPECIES.length());
1077 byte[] r = fr.apply(SPECIES.length());
1078 boolean[] mask = fm.apply(SPECIES.length());
1079 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1080
1081 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1082 for (int i = 0; i < a.length; i += SPECIES.length()) {
1083 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1084 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1085 av.lanewise(VectorOperators.UMIN, bv, vmask).intoArray(r, i);
1086 }
1087 }
1088
1089 bh.consume(r);
1090 }
1091
1092 @Benchmark
1093 public void UMAX(Blackhole bh) {
1094 byte[] a = fa.apply(SPECIES.length());
1095 byte[] b = fb.apply(SPECIES.length());
1096 byte[] r = fr.apply(SPECIES.length());
1097
1098 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1099 for (int i = 0; i < a.length; i += SPECIES.length()) {
1100 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1101 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1102 av.lanewise(VectorOperators.UMAX, bv).intoArray(r, i);
1103 }
1104 }
1105
1106 bh.consume(r);
1107 }
1108
1109 @Benchmark
1110 public void UMAXMasked(Blackhole bh) {
1111 byte[] a = fa.apply(SPECIES.length());
1112 byte[] b = fb.apply(SPECIES.length());
1113 byte[] r = fr.apply(SPECIES.length());
1114 boolean[] mask = fm.apply(SPECIES.length());
1115 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1116
1117 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1118 for (int i = 0; i < a.length; i += SPECIES.length()) {
1119 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1120 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1121 av.lanewise(VectorOperators.UMAX, bv, vmask).intoArray(r, i);
1122 }
1123 }
1124
1125 bh.consume(r);
1126 }
1127
1128 @Benchmark
1129 public void SADD(Blackhole bh) {
1130 byte[] a = fa.apply(SPECIES.length());
1131 byte[] b = fb.apply(SPECIES.length());
1132 byte[] r = fr.apply(SPECIES.length());
1133
1134 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1135 for (int i = 0; i < a.length; i += SPECIES.length()) {
1136 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1137 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1138 av.lanewise(VectorOperators.SADD, bv).intoArray(r, i);
1139 }
1140 }
1141
1142 bh.consume(r);
1143 }
1144
1145 @Benchmark
1146 public void SADDMasked(Blackhole bh) {
1147 byte[] a = fa.apply(SPECIES.length());
1148 byte[] b = fb.apply(SPECIES.length());
1149 byte[] r = fr.apply(SPECIES.length());
1150 boolean[] mask = fm.apply(SPECIES.length());
1151 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1152
1153 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1154 for (int i = 0; i < a.length; i += SPECIES.length()) {
1155 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1156 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1157 av.lanewise(VectorOperators.SADD, bv, vmask).intoArray(r, i);
1158 }
1159 }
1160
1161 bh.consume(r);
1162 }
1163
1164 @Benchmark
1165 public void SSUB(Blackhole bh) {
1166 byte[] a = fa.apply(SPECIES.length());
1167 byte[] b = fb.apply(SPECIES.length());
1168 byte[] r = fr.apply(SPECIES.length());
1169
1170 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1171 for (int i = 0; i < a.length; i += SPECIES.length()) {
1172 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1173 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1174 av.lanewise(VectorOperators.SSUB, bv).intoArray(r, i);
1175 }
1176 }
1177
1178 bh.consume(r);
1179 }
1180
1181 @Benchmark
1182 public void SSUBMasked(Blackhole bh) {
1183 byte[] a = fa.apply(SPECIES.length());
1184 byte[] b = fb.apply(SPECIES.length());
1185 byte[] r = fr.apply(SPECIES.length());
1186 boolean[] mask = fm.apply(SPECIES.length());
1187 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1188
1189 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1190 for (int i = 0; i < a.length; i += SPECIES.length()) {
1191 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1192 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1193 av.lanewise(VectorOperators.SSUB, bv, vmask).intoArray(r, i);
1194 }
1195 }
1196
1197 bh.consume(r);
1198 }
1199
1200 @Benchmark
1201 public void SUADD(Blackhole bh) {
1202 byte[] a = fa.apply(SPECIES.length());
1203 byte[] b = fb.apply(SPECIES.length());
1204 byte[] r = fr.apply(SPECIES.length());
1205
1206 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1207 for (int i = 0; i < a.length; i += SPECIES.length()) {
1208 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1209 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1210 av.lanewise(VectorOperators.SUADD, bv).intoArray(r, i);
1211 }
1212 }
1213
1214 bh.consume(r);
1215 }
1216
1217 @Benchmark
1218 public void SUADDMasked(Blackhole bh) {
1219 byte[] a = fa.apply(SPECIES.length());
1220 byte[] b = fb.apply(SPECIES.length());
1221 byte[] r = fr.apply(SPECIES.length());
1222 boolean[] mask = fm.apply(SPECIES.length());
1223 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1224
1225 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1226 for (int i = 0; i < a.length; i += SPECIES.length()) {
1227 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1228 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1229 av.lanewise(VectorOperators.SUADD, bv, vmask).intoArray(r, i);
1230 }
1231 }
1232
1233 bh.consume(r);
1234 }
1235
1236 @Benchmark
1237 public void SUSUB(Blackhole bh) {
1238 byte[] a = fa.apply(SPECIES.length());
1239 byte[] b = fb.apply(SPECIES.length());
1240 byte[] r = fr.apply(SPECIES.length());
1241
1242 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1243 for (int i = 0; i < a.length; i += SPECIES.length()) {
1244 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1245 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1246 av.lanewise(VectorOperators.SUSUB, bv).intoArray(r, i);
1247 }
1248 }
1249
1250 bh.consume(r);
1251 }
1252
1253 @Benchmark
1254 public void SUSUBMasked(Blackhole bh) {
1255 byte[] a = fa.apply(SPECIES.length());
1256 byte[] b = fb.apply(SPECIES.length());
1257 byte[] r = fr.apply(SPECIES.length());
1258 boolean[] mask = fm.apply(SPECIES.length());
1259 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1260
1261 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1262 for (int i = 0; i < a.length; i += SPECIES.length()) {
1263 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1264 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1265 av.lanewise(VectorOperators.SUSUB, bv, vmask).intoArray(r, i);
1266 }
1267 }
1268
1269 bh.consume(r);
1270 }
1271
1272 @Benchmark
1273 public void SUADD_ASSOC(Blackhole bh) {
1274 byte[] a = fa.apply(SPECIES.length());
1275 byte[] b = fb.apply(SPECIES.length());
1276 byte[] c = fc.apply(SPECIES.length());
1277 byte[] rl = fr.apply(SPECIES.length());
1278 byte[] rr = fr.apply(SPECIES.length());
1279
1280 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1281 for (int i = 0; i < a.length; i += SPECIES.length()) {
1282 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1283 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1284 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
1285 av.lanewise(VectorOperators.SUADD, bv).lanewise(VectorOperators.SUADD, cv).intoArray(rl, i);
1286 av.lanewise(VectorOperators.SUADD, bv.lanewise(VectorOperators.SUADD, cv)).intoArray(rr, i);
1287 }
1288 }
1289
1290 bh.consume(r);
1291 }
1292
1293 @Benchmark
1294 public void SUADDMasked_ASSOC(Blackhole bh) {
1295 byte[] a = fa.apply(SPECIES.length());
1296 byte[] b = fb.apply(SPECIES.length());
1297 byte[] c = fc.apply(SPECIES.length());
1298 boolean[] mask = fm.apply(SPECIES.length());
1299 byte[] rl = fr.apply(SPECIES.length());
1300 byte[] rr = fr.apply(SPECIES.length());
1301
1302 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1303
1304 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1305 for (int i = 0; i < a.length; i += SPECIES.length()) {
1306 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1307 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1308 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
1309 av.lanewise(VectorOperators.SUADD, bv, vmask).lanewise(VectorOperators.SUADD, cv, vmask).intoArray(rl, i);
1310 av.lanewise(VectorOperators.SUADD, bv.lanewise(VectorOperators.SUADD, cv, vmask), vmask).intoArray(rr, i);
1311 }
1312 }
1313
1314 bh.consume(r);
1315 }
1316
1317 @Benchmark
1318 public void ANDLanes(Blackhole bh) {
1319 byte[] a = fa.apply(SPECIES.length());
1320 byte ra = -1;
1321
1322 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1323 ra = -1;
1324 for (int i = 0; i < a.length; i += SPECIES.length()) {
1325 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1326 ra &= av.reduceLanes(VectorOperators.AND);
1327 }
1328 }
1329 bh.consume(ra);
1330 }
1331
1332 @Benchmark
1333 public void ANDMaskedLanes(Blackhole bh) {
1334 byte[] a = fa.apply(SPECIES.length());
1335 boolean[] mask = fm.apply(SPECIES.length());
1336 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1337 byte ra = -1;
1338
1339 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1340 ra = -1;
1341 for (int i = 0; i < a.length; i += SPECIES.length()) {
1342 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1343 ra &= av.reduceLanes(VectorOperators.AND, vmask);
1344 }
1345 }
1346 bh.consume(ra);
1347 }
1348
1349 @Benchmark
1350 public void ORLanes(Blackhole bh) {
1351 byte[] a = fa.apply(SPECIES.length());
1352 byte ra = 0;
1353
1354 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1355 ra = 0;
1356 for (int i = 0; i < a.length; i += SPECIES.length()) {
1357 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1358 ra |= av.reduceLanes(VectorOperators.OR);
1359 }
1360 }
1361 bh.consume(ra);
1362 }
1363
1364 @Benchmark
1365 public void ORMaskedLanes(Blackhole bh) {
1366 byte[] a = fa.apply(SPECIES.length());
1367 boolean[] mask = fm.apply(SPECIES.length());
1368 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1369 byte ra = 0;
1370
1371 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1372 ra = 0;
1373 for (int i = 0; i < a.length; i += SPECIES.length()) {
1374 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1375 ra |= av.reduceLanes(VectorOperators.OR, vmask);
1376 }
1377 }
1378 bh.consume(ra);
1379 }
1380
1381 @Benchmark
1382 public void XORLanes(Blackhole bh) {
1383 byte[] a = fa.apply(SPECIES.length());
1384 byte ra = 0;
1385
1386 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1387 ra = 0;
1388 for (int i = 0; i < a.length; i += SPECIES.length()) {
1389 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1390 ra ^= av.reduceLanes(VectorOperators.XOR);
1391 }
1392 }
1393 bh.consume(ra);
1394 }
1395
1396 @Benchmark
1397 public void XORMaskedLanes(Blackhole bh) {
1398 byte[] a = fa.apply(SPECIES.length());
1399 boolean[] mask = fm.apply(SPECIES.length());
1400 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1401 byte ra = 0;
1402
1403 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1404 ra = 0;
1405 for (int i = 0; i < a.length; i += SPECIES.length()) {
1406 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1407 ra ^= av.reduceLanes(VectorOperators.XOR, vmask);
1408 }
1409 }
1410 bh.consume(ra);
1411 }
1412
1413 @Benchmark
1414 public void ADDLanes(Blackhole bh) {
1415 byte[] a = fa.apply(SPECIES.length());
1416 byte ra = 0;
1417
1418 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1419 ra = 0;
1420 for (int i = 0; i < a.length; i += SPECIES.length()) {
1421 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1422 ra += av.reduceLanes(VectorOperators.ADD);
1423 }
1424 }
1425 bh.consume(ra);
1426 }
1427
1428 @Benchmark
1429 public void ADDMaskedLanes(Blackhole bh) {
1430 byte[] a = fa.apply(SPECIES.length());
1431 boolean[] mask = fm.apply(SPECIES.length());
1432 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1433 byte ra = 0;
1434
1435 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1436 ra = 0;
1437 for (int i = 0; i < a.length; i += SPECIES.length()) {
1438 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1439 ra += av.reduceLanes(VectorOperators.ADD, vmask);
1440 }
1441 }
1442 bh.consume(ra);
1443 }
1444
1445 @Benchmark
1446 public void MULLanes(Blackhole bh) {
1447 byte[] a = fa.apply(SPECIES.length());
1448 byte ra = 1;
1449
1450 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1451 ra = 1;
1452 for (int i = 0; i < a.length; i += SPECIES.length()) {
1453 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1454 ra *= av.reduceLanes(VectorOperators.MUL);
1455 }
1456 }
1457 bh.consume(ra);
1458 }
1459
1460 @Benchmark
1461 public void MULMaskedLanes(Blackhole bh) {
1462 byte[] a = fa.apply(SPECIES.length());
1463 boolean[] mask = fm.apply(SPECIES.length());
1464 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1465 byte ra = 1;
1466
1467 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1468 ra = 1;
1469 for (int i = 0; i < a.length; i += SPECIES.length()) {
1470 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1471 ra *= av.reduceLanes(VectorOperators.MUL, vmask);
1472 }
1473 }
1474 bh.consume(ra);
1475 }
1476
1477 @Benchmark
1478 public void MINLanes(Blackhole bh) {
1479 byte[] a = fa.apply(SPECIES.length());
1480 byte ra = Byte.MAX_VALUE;
1481
1482 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1483 ra = Byte.MAX_VALUE;
1484 for (int i = 0; i < a.length; i += SPECIES.length()) {
1485 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1486 ra = (byte) Math.min(ra, av.reduceLanes(VectorOperators.MIN));
1487 }
1488 }
1489 bh.consume(ra);
1490 }
1491
1492 @Benchmark
1493 public void MINMaskedLanes(Blackhole bh) {
1494 byte[] a = fa.apply(SPECIES.length());
1495 boolean[] mask = fm.apply(SPECIES.length());
1496 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1497 byte ra = Byte.MAX_VALUE;
1498
1499 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1500 ra = Byte.MAX_VALUE;
1501 for (int i = 0; i < a.length; i += SPECIES.length()) {
1502 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1503 ra = (byte) Math.min(ra, av.reduceLanes(VectorOperators.MIN, vmask));
1504 }
1505 }
1506 bh.consume(ra);
1507 }
1508
1509 @Benchmark
1510 public void MAXLanes(Blackhole bh) {
1511 byte[] a = fa.apply(SPECIES.length());
1512 byte ra = Byte.MIN_VALUE;
1513
1514 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1515 ra = Byte.MIN_VALUE;
1516 for (int i = 0; i < a.length; i += SPECIES.length()) {
1517 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1518 ra = (byte) Math.max(ra, av.reduceLanes(VectorOperators.MAX));
1519 }
1520 }
1521 bh.consume(ra);
1522 }
1523
1524 @Benchmark
1525 public void MAXMaskedLanes(Blackhole bh) {
1526 byte[] a = fa.apply(SPECIES.length());
1527 boolean[] mask = fm.apply(SPECIES.length());
1528 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1529 byte ra = Byte.MIN_VALUE;
1530
1531 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1532 ra = Byte.MIN_VALUE;
1533 for (int i = 0; i < a.length; i += SPECIES.length()) {
1534 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1535 ra = (byte) Math.max(ra, av.reduceLanes(VectorOperators.MAX, vmask));
1536 }
1537 }
1538 bh.consume(ra);
1539 }
1540
1541 @Benchmark
1542 public void UMINLanes(Blackhole bh) {
1543 byte[] a = fa.apply(SPECIES.length());
1544 byte ra = Byte.MAX_VALUE;
1545
1546 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1547 ra = Byte.MAX_VALUE;
1548 for (int i = 0; i < a.length; i += SPECIES.length()) {
1549 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1550 ra = (byte) VectorMath.minUnsigned(ra, av.reduceLanes(VectorOperators.UMIN));
1551 }
1552 }
1553 bh.consume(ra);
1554 }
1555
1556 @Benchmark
1557 public void UMINMaskedLanes(Blackhole bh) {
1558 byte[] a = fa.apply(SPECIES.length());
1559 boolean[] mask = fm.apply(SPECIES.length());
1560 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1561 byte ra = Byte.MAX_VALUE;
1562
1563 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1564 ra = Byte.MAX_VALUE;
1565 for (int i = 0; i < a.length; i += SPECIES.length()) {
1566 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1567 ra = (byte) VectorMath.minUnsigned(ra, av.reduceLanes(VectorOperators.UMIN, vmask));
1568 }
1569 }
1570 bh.consume(ra);
1571 }
1572
1573 @Benchmark
1574 public void UMAXLanes(Blackhole bh) {
1575 byte[] a = fa.apply(SPECIES.length());
1576 byte ra = Byte.MIN_VALUE;
1577
1578 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1579 ra = Byte.MIN_VALUE;
1580 for (int i = 0; i < a.length; i += SPECIES.length()) {
1581 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1582 ra = (byte) VectorMath.maxUnsigned(ra, av.reduceLanes(VectorOperators.UMAX));
1583 }
1584 }
1585 bh.consume(ra);
1586 }
1587
1588 @Benchmark
1589 public void UMAXMaskedLanes(Blackhole bh) {
1590 byte[] a = fa.apply(SPECIES.length());
1591 boolean[] mask = fm.apply(SPECIES.length());
1592 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1593 byte ra = Byte.MIN_VALUE;
1594
1595 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1596 ra = Byte.MIN_VALUE;
1597 for (int i = 0; i < a.length; i += SPECIES.length()) {
1598 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1599 ra = (byte) VectorMath.maxUnsigned(ra, av.reduceLanes(VectorOperators.UMAX, vmask));
1600 }
1601 }
1602 bh.consume(ra);
1603 }
1604
1605 @Benchmark
1606 public void FIRST_NONZEROLanes(Blackhole bh) {
1607 byte[] a = fa.apply(SPECIES.length());
1608 byte ra = (byte) 0;
1609
1610 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1611 ra = (byte) 0;
1612 for (int i = 0; i < a.length; i += SPECIES.length()) {
1613 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1614 ra = firstNonZero(ra, av.reduceLanes(VectorOperators.FIRST_NONZERO));
1615 }
1616 }
1617 bh.consume(ra);
1618 }
1619
1620 @Benchmark
1621 public void FIRST_NONZEROMaskedLanes(Blackhole bh) {
1622 byte[] a = fa.apply(SPECIES.length());
1623 boolean[] mask = fm.apply(SPECIES.length());
1624 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1625 byte ra = (byte) 0;
1626
1627 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1628 ra = (byte) 0;
1629 for (int i = 0; i < a.length; i += SPECIES.length()) {
1630 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1631 ra = firstNonZero(ra, av.reduceLanes(VectorOperators.FIRST_NONZERO, vmask));
1632 }
1633 }
1634 bh.consume(ra);
1635 }
1636
1637 @Benchmark
1638 public void anyTrue(Blackhole bh) {
1639 boolean[] mask = fm.apply(SPECIES.length());
1640 boolean[] r = fmr.apply(SPECIES.length());
1641
1642 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1643 for (int i = 0; i < mask.length; i += SPECIES.length()) {
1644 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
1645 r[i] = vmask.anyTrue();
1646 }
1647 }
1648
1649 bh.consume(r);
1650 }
1651
1652 @Benchmark
1653 public void allTrue(Blackhole bh) {
1654 boolean[] mask = fm.apply(SPECIES.length());
1655 boolean[] r = fmr.apply(SPECIES.length());
1656
1657 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1658 for (int i = 0; i < mask.length; i += SPECIES.length()) {
1659 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
1660 r[i] = vmask.allTrue();
1661 }
1662 }
1663
1664 bh.consume(r);
1665 }
1666
1667 @Benchmark
1668 public void SUADD_REDUCTION(Blackhole bh) {
1669 byte[] a = fa.apply(SPECIES.length());
1670 byte[] r = fr.apply(SPECIES.length());
1671 byte ra = 0;
1672
1673 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1674 for (int i = 0; i < a.length; i += SPECIES.length()) {
1675 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1676 r[i] = av.reduceLanes(VectorOperators.SUADD);
1677 }
1678 }
1679
1680 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1681 ra = 0;
1682 for (int i = 0; i < a.length; i += SPECIES.length()) {
1683 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1684 ra = (byte) VectorMath.addSaturatingUnsigned(ra, av.reduceLanes(VectorOperators.SUADD));
1685 }
1686 }
1687
1688 bh.consume(r);
1689 }
1690
1691 @Benchmark
1692 public void SUADDMasked_REDUCTION(Blackhole bh) {
1693 byte[] a = fa.apply(SPECIES.length());
1694 byte[] r = fr.apply(SPECIES.length());
1695 boolean[] mask = fm.apply(SPECIES.length());
1696 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1697 byte ra = 0;
1698
1699 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1700 for (int i = 0; i < a.length; i += SPECIES.length()) {
1701 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1702 r[i] = av.reduceLanes(VectorOperators.SUADD, vmask);
1703 }
1704 }
1705
1706 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1707 ra = 0;
1708 for (int i = 0; i < a.length; i += SPECIES.length()) {
1709 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1710 ra = (byte) VectorMath.addSaturatingUnsigned(ra, av.reduceLanes(VectorOperators.SUADD, vmask));
1711 }
1712 }
1713
1714 bh.consume(r);
1715 }
1716
1717 @Benchmark
1718 public void withLane(Blackhole bh) {
1719 byte[] a = fa.apply(SPECIES.length());
1720 byte[] b = fb.apply(SPECIES.length());
1721 byte[] r = fr.apply(SPECIES.length());
1722
1723 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1724 for (int i = 0, j = 0; i < a.length; i += SPECIES.length()) {
1725 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1726 av.withLane(j, b[i + j]).intoArray(r, i);
1727 a[i + j] = b[i + j];
1728 j = (j + 1) & (SPECIES.length() - 1);
1729 }
1730 }
1731
1732 bh.consume(r);
1733 }
1734
1735 @Benchmark
1736 public Object IS_DEFAULT() {
1737 byte[] a = fa.apply(size);
1738 boolean[] ms = fmt.apply(size);
1739 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1740
1741 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1742 for (int i = 0; i < a.length; i += SPECIES.length()) {
1743 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1744
1745 // accumulate results, so JIT can't eliminate relevant computations
1746 m = m.and(av.test(VectorOperators.IS_DEFAULT));
1747 }
1748 }
1749
1750 return m;
1751 }
1752
1753 @Benchmark
1754 public Object IS_NEGATIVE() {
1755 byte[] a = fa.apply(size);
1756 boolean[] ms = fmt.apply(size);
1757 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1758
1759 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1760 for (int i = 0; i < a.length; i += SPECIES.length()) {
1761 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1762
1763 // accumulate results, so JIT can't eliminate relevant computations
1764 m = m.and(av.test(VectorOperators.IS_NEGATIVE));
1765 }
1766 }
1767
1768 return m;
1769 }
1770 @Benchmark
1771 public Object LT() {
1772 byte[] a = fa.apply(size);
1773 byte[] b = fb.apply(size);
1774 boolean[] ms = fmt.apply(size);
1775 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1776
1777 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1778 for (int i = 0; i < a.length; i += SPECIES.length()) {
1779 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1780 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1781
1782 // accumulate results, so JIT can't eliminate relevant computations
1783 m = m.and(av.compare(VectorOperators.LT, bv));
1784 }
1785 }
1786
1787 return m;
1788 }
1789 @Benchmark
1790 public Object GT() {
1791 byte[] a = fa.apply(size);
1792 byte[] b = fb.apply(size);
1793 boolean[] ms = fmt.apply(size);
1794 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1795
1796 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1797 for (int i = 0; i < a.length; i += SPECIES.length()) {
1798 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1799 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1800
1801 // accumulate results, so JIT can't eliminate relevant computations
1802 m = m.and(av.compare(VectorOperators.GT, bv));
1803 }
1804 }
1805
1806 return m;
1807 }
1808 @Benchmark
1809 public Object EQ() {
1810 byte[] a = fa.apply(size);
1811 byte[] b = fb.apply(size);
1812 boolean[] ms = fmt.apply(size);
1813 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1814
1815 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1816 for (int i = 0; i < a.length; i += SPECIES.length()) {
1817 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1818 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1819
1820 // accumulate results, so JIT can't eliminate relevant computations
1821 m = m.and(av.compare(VectorOperators.EQ, bv));
1822 }
1823 }
1824
1825 return m;
1826 }
1827 @Benchmark
1828 public Object NE() {
1829 byte[] a = fa.apply(size);
1830 byte[] b = fb.apply(size);
1831 boolean[] ms = fmt.apply(size);
1832 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1833
1834 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1835 for (int i = 0; i < a.length; i += SPECIES.length()) {
1836 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1837 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1838
1839 // accumulate results, so JIT can't eliminate relevant computations
1840 m = m.and(av.compare(VectorOperators.NE, bv));
1841 }
1842 }
1843
1844 return m;
1845 }
1846 @Benchmark
1847 public Object LE() {
1848 byte[] a = fa.apply(size);
1849 byte[] b = fb.apply(size);
1850 boolean[] ms = fmt.apply(size);
1851 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1852
1853 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1854 for (int i = 0; i < a.length; i += SPECIES.length()) {
1855 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1856 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1857
1858 // accumulate results, so JIT can't eliminate relevant computations
1859 m = m.and(av.compare(VectorOperators.LE, bv));
1860 }
1861 }
1862
1863 return m;
1864 }
1865 @Benchmark
1866 public Object GE() {
1867 byte[] a = fa.apply(size);
1868 byte[] b = fb.apply(size);
1869 boolean[] ms = fmt.apply(size);
1870 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1871
1872 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1873 for (int i = 0; i < a.length; i += SPECIES.length()) {
1874 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1875 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1876
1877 // accumulate results, so JIT can't eliminate relevant computations
1878 m = m.and(av.compare(VectorOperators.GE, bv));
1879 }
1880 }
1881
1882 return m;
1883 }
1884 @Benchmark
1885 public Object ULT() {
1886 byte[] a = fa.apply(size);
1887 byte[] b = fb.apply(size);
1888 boolean[] ms = fmt.apply(size);
1889 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1890
1891 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1892 for (int i = 0; i < a.length; i += SPECIES.length()) {
1893 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1894 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1895
1896 // accumulate results, so JIT can't eliminate relevant computations
1897 m = m.and(av.compare(VectorOperators.ULT, bv));
1898 }
1899 }
1900
1901 return m;
1902 }
1903 @Benchmark
1904 public Object UGT() {
1905 byte[] a = fa.apply(size);
1906 byte[] b = fb.apply(size);
1907 boolean[] ms = fmt.apply(size);
1908 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1909
1910 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1911 for (int i = 0; i < a.length; i += SPECIES.length()) {
1912 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1913 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1914
1915 // accumulate results, so JIT can't eliminate relevant computations
1916 m = m.and(av.compare(VectorOperators.UGT, bv));
1917 }
1918 }
1919
1920 return m;
1921 }
1922 @Benchmark
1923 public Object ULE() {
1924 byte[] a = fa.apply(size);
1925 byte[] b = fb.apply(size);
1926 boolean[] ms = fmt.apply(size);
1927 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1928
1929 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1930 for (int i = 0; i < a.length; i += SPECIES.length()) {
1931 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1932 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1933
1934 // accumulate results, so JIT can't eliminate relevant computations
1935 m = m.and(av.compare(VectorOperators.ULE, bv));
1936 }
1937 }
1938
1939 return m;
1940 }
1941 @Benchmark
1942 public Object UGE() {
1943 byte[] a = fa.apply(size);
1944 byte[] b = fb.apply(size);
1945 boolean[] ms = fmt.apply(size);
1946 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
1947
1948 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1949 for (int i = 0; i < a.length; i += SPECIES.length()) {
1950 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1951 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1952
1953 // accumulate results, so JIT can't eliminate relevant computations
1954 m = m.and(av.compare(VectorOperators.UGE, bv));
1955 }
1956 }
1957
1958 return m;
1959 }
1960
1961 @Benchmark
1962 public void blend(Blackhole bh) {
1963 byte[] a = fa.apply(SPECIES.length());
1964 byte[] b = fb.apply(SPECIES.length());
1965 byte[] r = fr.apply(SPECIES.length());
1966 boolean[] mask = fm.apply(SPECIES.length());
1967 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
1968
1969 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1970 for (int i = 0; i < a.length; i += SPECIES.length()) {
1971 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1972 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
1973 av.blend(bv, vmask).intoArray(r, i);
1974 }
1975 }
1976
1977 bh.consume(r);
1978 }
1979
1980 @Benchmark
1981 public void rearrange(Blackhole bh) {
1982 byte[] a = fa.apply(SPECIES.length());
1983 int[] order = fs.apply(a.length, SPECIES.length());
1984 byte[] r = fr.apply(SPECIES.length());
1985
1986 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1987 for (int i = 0; i < a.length; i += SPECIES.length()) {
1988 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1989 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
1990 }
1991 }
1992
1993 bh.consume(r);
1994 }
1995 @Benchmark
1996 public Object compress() {
1997 byte[] a = fa.apply(size);
1998 byte[] r = fb.apply(size);
1999 boolean[] ms = fmt.apply(size);
2000 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
2001
2002 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2003 for (int i = 0; i < a.length; i += SPECIES.length()) {
2004 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2005 av.compress(m).intoArray(r, i);
2006 }
2007 }
2008
2009 return r;
2010 }
2011
2012 @Benchmark
2013 public Object expand() {
2014 byte[] a = fa.apply(size);
2015 byte[] r = fb.apply(size);
2016 boolean[] ms = fmt.apply(size);
2017 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
2018
2019 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2020 for (int i = 0; i < a.length; i += SPECIES.length()) {
2021 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2022 av.expand(m).intoArray(r, i);
2023 }
2024 }
2025
2026 return r;
2027 }
2028
2029 @Benchmark
2030 public Object maskCompress() {
2031 boolean[] ms = fmt.apply(size);
2032 boolean[] rs = fmt.apply(size);
2033
2034 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2035 for (int i = 0, j = 0; i < ms.length; i += SPECIES.length()) {
2036 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, i);
2037 m.compress().intoArray(rs, j);
2038 j += m.trueCount();
2039 }
2040 }
2041
2042 return rs;
2043 }
2044
2045 @Benchmark
2046 public void laneextract(Blackhole bh) {
2047 byte[] a = fa.apply(SPECIES.length());
2048 byte[] r = fr.apply(SPECIES.length());
2049
2050 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2051 for (int i = 0; i < a.length; i += SPECIES.length()) {
2052 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2053 int num_lanes = SPECIES.length();
2054 // Manually unroll because full unroll happens after intrinsification.
2055 // Unroll is needed because get intrinsic requires for index to be a known constant.
2056 if (num_lanes == 1) {
2057 r[i]=av.lane(0);
2058 } else if (num_lanes == 2) {
2059 r[i]=av.lane(0);
2060 r[i+1]=av.lane(1);
2061 } else if (num_lanes == 4) {
2062 r[i]=av.lane(0);
2063 r[i+1]=av.lane(1);
2064 r[i+2]=av.lane(2);
2065 r[i+3]=av.lane(3);
2066 } else if (num_lanes == 8) {
2067 r[i]=av.lane(0);
2068 r[i+1]=av.lane(1);
2069 r[i+2]=av.lane(2);
2070 r[i+3]=av.lane(3);
2071 r[i+4]=av.lane(4);
2072 r[i+5]=av.lane(5);
2073 r[i+6]=av.lane(6);
2074 r[i+7]=av.lane(7);
2075 } else if (num_lanes == 16) {
2076 r[i]=av.lane(0);
2077 r[i+1]=av.lane(1);
2078 r[i+2]=av.lane(2);
2079 r[i+3]=av.lane(3);
2080 r[i+4]=av.lane(4);
2081 r[i+5]=av.lane(5);
2082 r[i+6]=av.lane(6);
2083 r[i+7]=av.lane(7);
2084 r[i+8]=av.lane(8);
2085 r[i+9]=av.lane(9);
2086 r[i+10]=av.lane(10);
2087 r[i+11]=av.lane(11);
2088 r[i+12]=av.lane(12);
2089 r[i+13]=av.lane(13);
2090 r[i+14]=av.lane(14);
2091 r[i+15]=av.lane(15);
2092 } else if (num_lanes == 32) {
2093 r[i]=av.lane(0);
2094 r[i+1]=av.lane(1);
2095 r[i+2]=av.lane(2);
2096 r[i+3]=av.lane(3);
2097 r[i+4]=av.lane(4);
2098 r[i+5]=av.lane(5);
2099 r[i+6]=av.lane(6);
2100 r[i+7]=av.lane(7);
2101 r[i+8]=av.lane(8);
2102 r[i+9]=av.lane(9);
2103 r[i+10]=av.lane(10);
2104 r[i+11]=av.lane(11);
2105 r[i+12]=av.lane(12);
2106 r[i+13]=av.lane(13);
2107 r[i+14]=av.lane(14);
2108 r[i+15]=av.lane(15);
2109 r[i+16]=av.lane(16);
2110 r[i+17]=av.lane(17);
2111 r[i+18]=av.lane(18);
2112 r[i+19]=av.lane(19);
2113 r[i+20]=av.lane(20);
2114 r[i+21]=av.lane(21);
2115 r[i+22]=av.lane(22);
2116 r[i+23]=av.lane(23);
2117 r[i+24]=av.lane(24);
2118 r[i+25]=av.lane(25);
2119 r[i+26]=av.lane(26);
2120 r[i+27]=av.lane(27);
2121 r[i+28]=av.lane(28);
2122 r[i+29]=av.lane(29);
2123 r[i+30]=av.lane(30);
2124 r[i+31]=av.lane(31);
2125 } else if (num_lanes == 64) {
2126 r[i]=av.lane(0);
2127 r[i+1]=av.lane(1);
2128 r[i+2]=av.lane(2);
2129 r[i+3]=av.lane(3);
2130 r[i+4]=av.lane(4);
2131 r[i+5]=av.lane(5);
2132 r[i+6]=av.lane(6);
2133 r[i+7]=av.lane(7);
2134 r[i+8]=av.lane(8);
2135 r[i+9]=av.lane(9);
2136 r[i+10]=av.lane(10);
2137 r[i+11]=av.lane(11);
2138 r[i+12]=av.lane(12);
2139 r[i+13]=av.lane(13);
2140 r[i+14]=av.lane(14);
2141 r[i+15]=av.lane(15);
2142 r[i+16]=av.lane(16);
2143 r[i+17]=av.lane(17);
2144 r[i+18]=av.lane(18);
2145 r[i+19]=av.lane(19);
2146 r[i+20]=av.lane(20);
2147 r[i+21]=av.lane(21);
2148 r[i+22]=av.lane(22);
2149 r[i+23]=av.lane(23);
2150 r[i+24]=av.lane(24);
2151 r[i+25]=av.lane(25);
2152 r[i+26]=av.lane(26);
2153 r[i+27]=av.lane(27);
2154 r[i+28]=av.lane(28);
2155 r[i+29]=av.lane(29);
2156 r[i+30]=av.lane(30);
2157 r[i+31]=av.lane(31);
2158 r[i+32]=av.lane(32);
2159 r[i+33]=av.lane(33);
2160 r[i+34]=av.lane(34);
2161 r[i+35]=av.lane(35);
2162 r[i+36]=av.lane(36);
2163 r[i+37]=av.lane(37);
2164 r[i+38]=av.lane(38);
2165 r[i+39]=av.lane(39);
2166 r[i+40]=av.lane(40);
2167 r[i+41]=av.lane(41);
2168 r[i+42]=av.lane(42);
2169 r[i+43]=av.lane(43);
2170 r[i+44]=av.lane(44);
2171 r[i+45]=av.lane(45);
2172 r[i+46]=av.lane(46);
2173 r[i+47]=av.lane(47);
2174 r[i+48]=av.lane(48);
2175 r[i+49]=av.lane(49);
2176 r[i+50]=av.lane(50);
2177 r[i+51]=av.lane(51);
2178 r[i+52]=av.lane(52);
2179 r[i+53]=av.lane(53);
2180 r[i+54]=av.lane(54);
2181 r[i+55]=av.lane(55);
2182 r[i+56]=av.lane(56);
2183 r[i+57]=av.lane(57);
2184 r[i+58]=av.lane(58);
2185 r[i+59]=av.lane(59);
2186 r[i+60]=av.lane(60);
2187 r[i+61]=av.lane(61);
2188 r[i+62]=av.lane(62);
2189 r[i+63]=av.lane(63);
2190 } else {
2191 for (int j = 0; j < SPECIES.length(); j++) {
2192 r[i+j]=av.lane(j);
2193 }
2194 }
2195 }
2196 }
2197
2198 bh.consume(r);
2199 }
2200
2201 @Benchmark
2202 public void broadcast(Blackhole bh) {
2203 byte[] a = fa.apply(SPECIES.length());
2204 byte[] r = new byte[a.length];
2205
2206 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2207 for (int i = 0; i < a.length; i += SPECIES.length()) {
2208 ByteVector.broadcast(SPECIES, a[i]).intoArray(r, i);
2209 }
2210 }
2211
2212 bh.consume(r);
2213 }
2214
2215 @Benchmark
2216 public void zero(Blackhole bh) {
2217 byte[] a = fa.apply(SPECIES.length());
2218 byte[] r = new byte[a.length];
2219
2220 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2221 for (int i = 0; i < a.length; i += SPECIES.length()) {
2222 ByteVector.zero(SPECIES).intoArray(a, i);
2223 }
2224 }
2225
2226 bh.consume(r);
2227 }
2228
2229 @Benchmark
2230 public void sliceUnary(Blackhole bh) {
2231 byte[] a = fa.apply(SPECIES.length());
2232 byte[] r = new byte[a.length];
2233 int origin = RAND.nextInt(SPECIES.length());
2234 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2235 for (int i = 0; i < a.length; i += SPECIES.length()) {
2236 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2237 av.slice(origin).intoArray(r, i);
2238 }
2239 }
2240
2241 bh.consume(r);
2242 }
2243
2244 @Benchmark
2245 public void sliceBinary(Blackhole bh) {
2246 byte[] a = fa.apply(SPECIES.length());
2247 byte[] b = fb.apply(SPECIES.length());
2248 byte[] r = new byte[a.length];
2249 int origin = RAND.nextInt(SPECIES.length());
2250 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2251 for (int i = 0; i < a.length; i += SPECIES.length()) {
2252 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2253 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2254 av.slice(origin, bv).intoArray(r, i);
2255 }
2256 }
2257
2258 bh.consume(r);
2259 }
2260
2261 @Benchmark
2262 public void sliceMasked(Blackhole bh) {
2263 byte[] a = fa.apply(SPECIES.length());
2264 byte[] b = fb.apply(SPECIES.length());
2265 boolean[] mask = fm.apply(SPECIES.length());
2266 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2267
2268 byte[] r = new byte[a.length];
2269 int origin = RAND.nextInt(SPECIES.length());
2270 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2271 for (int i = 0; i < a.length; i += SPECIES.length()) {
2272 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2273 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2274 av.slice(origin, bv, vmask).intoArray(r, i);
2275 }
2276 }
2277
2278 bh.consume(r);
2279 }
2280
2281 @Benchmark
2282 public void unsliceUnary(Blackhole bh) {
2283 byte[] a = fa.apply(SPECIES.length());
2284 byte[] r = new byte[a.length];
2285 int origin = RAND.nextInt(SPECIES.length());
2286 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2287 for (int i = 0; i < a.length; i += SPECIES.length()) {
2288 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2289 av.unslice(origin).intoArray(r, i);
2290 }
2291 }
2292
2293 bh.consume(r);
2294 }
2295
2296 @Benchmark
2297 public void unsliceBinary(Blackhole bh) {
2298 byte[] a = fa.apply(SPECIES.length());
2299 byte[] b = fb.apply(SPECIES.length());
2300 byte[] r = new byte[a.length];
2301 int origin = RAND.nextInt(SPECIES.length());
2302 int part = RAND.nextInt(2);
2303 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2304 for (int i = 0; i < a.length; i += SPECIES.length()) {
2305 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2306 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2307 av.unslice(origin, bv, part).intoArray(r, i);
2308 }
2309 }
2310
2311 bh.consume(r);
2312 }
2313
2314 @Benchmark
2315 public void unsliceMasked(Blackhole bh) {
2316 byte[] a = fa.apply(SPECIES.length());
2317 byte[] b = fb.apply(SPECIES.length());
2318 boolean[] mask = fm.apply(SPECIES.length());
2319 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2320 byte[] r = new byte[a.length];
2321 int origin = RAND.nextInt(SPECIES.length());
2322 int part = RAND.nextInt(2);
2323 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2324 for (int i = 0; i < a.length; i += SPECIES.length()) {
2325 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2326 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2327 av.unslice(origin, bv, part, vmask).intoArray(r, i);
2328 }
2329 }
2330
2331 bh.consume(r);
2332 }
2333
2334 @Benchmark
2335 public void BITWISE_BLEND(Blackhole bh) {
2336 byte[] a = fa.apply(SPECIES.length());
2337 byte[] b = fb.apply(SPECIES.length());
2338 byte[] c = fc.apply(SPECIES.length());
2339 byte[] r = fr.apply(SPECIES.length());
2340
2341 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2342 for (int i = 0; i < a.length; i += SPECIES.length()) {
2343 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2344 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2345 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
2346 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv).intoArray(r, i);
2347 }
2348 }
2349
2350 bh.consume(r);
2351 }
2352
2353 @Benchmark
2354 public void BITWISE_BLENDMasked(Blackhole bh) {
2355 byte[] a = fa.apply(SPECIES.length());
2356 byte[] b = fb.apply(SPECIES.length());
2357 byte[] c = fc.apply(SPECIES.length());
2358 byte[] r = fr.apply(SPECIES.length());
2359 boolean[] mask = fm.apply(SPECIES.length());
2360 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2361
2362 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2363 for (int i = 0; i < a.length; i += SPECIES.length()) {
2364 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2365 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
2366 ByteVector cv = ByteVector.fromArray(SPECIES, c, i);
2367 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv, vmask).intoArray(r, i);
2368 }
2369 }
2370
2371 bh.consume(r);
2372 }
2373
2374 @Benchmark
2375 public void NEG(Blackhole bh) {
2376 byte[] a = fa.apply(SPECIES.length());
2377 byte[] r = fr.apply(SPECIES.length());
2378
2379 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2380 for (int i = 0; i < a.length; i += SPECIES.length()) {
2381 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2382 av.lanewise(VectorOperators.NEG).intoArray(r, i);
2383 }
2384 }
2385
2386 bh.consume(r);
2387 }
2388
2389 @Benchmark
2390 public void NEGMasked(Blackhole bh) {
2391 byte[] a = fa.apply(SPECIES.length());
2392 byte[] r = fr.apply(SPECIES.length());
2393 boolean[] mask = fm.apply(SPECIES.length());
2394 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2395
2396 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2397 for (int i = 0; i < a.length; i += SPECIES.length()) {
2398 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2399 av.lanewise(VectorOperators.NEG, vmask).intoArray(r, i);
2400 }
2401 }
2402
2403 bh.consume(r);
2404 }
2405
2406 @Benchmark
2407 public void ABS(Blackhole bh) {
2408 byte[] a = fa.apply(SPECIES.length());
2409 byte[] r = fr.apply(SPECIES.length());
2410
2411 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2412 for (int i = 0; i < a.length; i += SPECIES.length()) {
2413 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2414 av.lanewise(VectorOperators.ABS).intoArray(r, i);
2415 }
2416 }
2417
2418 bh.consume(r);
2419 }
2420
2421 @Benchmark
2422 public void ABSMasked(Blackhole bh) {
2423 byte[] a = fa.apply(SPECIES.length());
2424 byte[] r = fr.apply(SPECIES.length());
2425 boolean[] mask = fm.apply(SPECIES.length());
2426 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2427
2428 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2429 for (int i = 0; i < a.length; i += SPECIES.length()) {
2430 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2431 av.lanewise(VectorOperators.ABS, vmask).intoArray(r, i);
2432 }
2433 }
2434
2435 bh.consume(r);
2436 }
2437
2438 @Benchmark
2439 public void NOT(Blackhole bh) {
2440 byte[] a = fa.apply(SPECIES.length());
2441 byte[] r = fr.apply(SPECIES.length());
2442
2443 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2444 for (int i = 0; i < a.length; i += SPECIES.length()) {
2445 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2446 av.lanewise(VectorOperators.NOT).intoArray(r, i);
2447 }
2448 }
2449
2450 bh.consume(r);
2451 }
2452
2453 @Benchmark
2454 public void NOTMasked(Blackhole bh) {
2455 byte[] a = fa.apply(SPECIES.length());
2456 byte[] r = fr.apply(SPECIES.length());
2457 boolean[] mask = fm.apply(SPECIES.length());
2458 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2459
2460 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2461 for (int i = 0; i < a.length; i += SPECIES.length()) {
2462 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2463 av.lanewise(VectorOperators.NOT, vmask).intoArray(r, i);
2464 }
2465 }
2466
2467 bh.consume(r);
2468 }
2469
2470 @Benchmark
2471 public void ZOMO(Blackhole bh) {
2472 byte[] a = fa.apply(SPECIES.length());
2473 byte[] r = fr.apply(SPECIES.length());
2474
2475 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2476 for (int i = 0; i < a.length; i += SPECIES.length()) {
2477 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2478 av.lanewise(VectorOperators.ZOMO).intoArray(r, i);
2479 }
2480 }
2481
2482 bh.consume(r);
2483 }
2484
2485 @Benchmark
2486 public void ZOMOMasked(Blackhole bh) {
2487 byte[] a = fa.apply(SPECIES.length());
2488 byte[] r = fr.apply(SPECIES.length());
2489 boolean[] mask = fm.apply(SPECIES.length());
2490 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2491
2492 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2493 for (int i = 0; i < a.length; i += SPECIES.length()) {
2494 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2495 av.lanewise(VectorOperators.ZOMO, vmask).intoArray(r, i);
2496 }
2497 }
2498
2499 bh.consume(r);
2500 }
2501
2502 @Benchmark
2503 public void BIT_COUNT(Blackhole bh) {
2504 byte[] a = fa.apply(SPECIES.length());
2505 byte[] r = fr.apply(SPECIES.length());
2506
2507 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2508 for (int i = 0; i < a.length; i += SPECIES.length()) {
2509 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2510 av.lanewise(VectorOperators.BIT_COUNT).intoArray(r, i);
2511 }
2512 }
2513
2514 bh.consume(r);
2515 }
2516
2517 @Benchmark
2518 public void BIT_COUNTMasked(Blackhole bh) {
2519 byte[] a = fa.apply(SPECIES.length());
2520 byte[] r = fr.apply(SPECIES.length());
2521 boolean[] mask = fm.apply(SPECIES.length());
2522 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2523
2524 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2525 for (int i = 0; i < a.length; i += SPECIES.length()) {
2526 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2527 av.lanewise(VectorOperators.BIT_COUNT, vmask).intoArray(r, i);
2528 }
2529 }
2530
2531 bh.consume(r);
2532 }
2533
2534 @Benchmark
2535 public void TRAILING_ZEROS_COUNT(Blackhole bh) {
2536 byte[] a = fa.apply(SPECIES.length());
2537 byte[] r = fr.apply(SPECIES.length());
2538
2539 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2540 for (int i = 0; i < a.length; i += SPECIES.length()) {
2541 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2542 av.lanewise(VectorOperators.TRAILING_ZEROS_COUNT).intoArray(r, i);
2543 }
2544 }
2545
2546 bh.consume(r);
2547 }
2548
2549 @Benchmark
2550 public void TRAILING_ZEROS_COUNTMasked(Blackhole bh) {
2551 byte[] a = fa.apply(SPECIES.length());
2552 byte[] r = fr.apply(SPECIES.length());
2553 boolean[] mask = fm.apply(SPECIES.length());
2554 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2555
2556 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2557 for (int i = 0; i < a.length; i += SPECIES.length()) {
2558 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2559 av.lanewise(VectorOperators.TRAILING_ZEROS_COUNT, vmask).intoArray(r, i);
2560 }
2561 }
2562
2563 bh.consume(r);
2564 }
2565
2566 @Benchmark
2567 public void LEADING_ZEROS_COUNT(Blackhole bh) {
2568 byte[] a = fa.apply(SPECIES.length());
2569 byte[] r = fr.apply(SPECIES.length());
2570
2571 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2572 for (int i = 0; i < a.length; i += SPECIES.length()) {
2573 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2574 av.lanewise(VectorOperators.LEADING_ZEROS_COUNT).intoArray(r, i);
2575 }
2576 }
2577
2578 bh.consume(r);
2579 }
2580
2581 @Benchmark
2582 public void LEADING_ZEROS_COUNTMasked(Blackhole bh) {
2583 byte[] a = fa.apply(SPECIES.length());
2584 byte[] r = fr.apply(SPECIES.length());
2585 boolean[] mask = fm.apply(SPECIES.length());
2586 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2587
2588 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2589 for (int i = 0; i < a.length; i += SPECIES.length()) {
2590 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2591 av.lanewise(VectorOperators.LEADING_ZEROS_COUNT, vmask).intoArray(r, i);
2592 }
2593 }
2594
2595 bh.consume(r);
2596 }
2597
2598 @Benchmark
2599 public void REVERSE(Blackhole bh) {
2600 byte[] a = fa.apply(SPECIES.length());
2601 byte[] r = fr.apply(SPECIES.length());
2602
2603 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2604 for (int i = 0; i < a.length; i += SPECIES.length()) {
2605 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2606 av.lanewise(VectorOperators.REVERSE).intoArray(r, i);
2607 }
2608 }
2609
2610 bh.consume(r);
2611 }
2612
2613 @Benchmark
2614 public void REVERSEMasked(Blackhole bh) {
2615 byte[] a = fa.apply(SPECIES.length());
2616 byte[] r = fr.apply(SPECIES.length());
2617 boolean[] mask = fm.apply(SPECIES.length());
2618 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2619
2620 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2621 for (int i = 0; i < a.length; i += SPECIES.length()) {
2622 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2623 av.lanewise(VectorOperators.REVERSE, vmask).intoArray(r, i);
2624 }
2625 }
2626
2627 bh.consume(r);
2628 }
2629
2630 @Benchmark
2631 public void REVERSE_BYTES(Blackhole bh) {
2632 byte[] a = fa.apply(SPECIES.length());
2633 byte[] r = fr.apply(SPECIES.length());
2634
2635 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2636 for (int i = 0; i < a.length; i += SPECIES.length()) {
2637 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2638 av.lanewise(VectorOperators.REVERSE_BYTES).intoArray(r, i);
2639 }
2640 }
2641
2642 bh.consume(r);
2643 }
2644
2645 @Benchmark
2646 public void REVERSE_BYTESMasked(Blackhole bh) {
2647 byte[] a = fa.apply(SPECIES.length());
2648 byte[] r = fr.apply(SPECIES.length());
2649 boolean[] mask = fm.apply(SPECIES.length());
2650 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0);
2651
2652 for (int ic = 0; ic < INVOC_COUNT; ic++) {
2653 for (int i = 0; i < a.length; i += SPECIES.length()) {
2654 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
2655 av.lanewise(VectorOperators.REVERSE_BYTES, vmask).intoArray(r, i);
2656 }
2657 }
2658
2659 bh.consume(r);
2660 }
2661 }