1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import java.lang.reflect.Array;
30 import java.util.Map;
31 import java.util.HashMap;
32 import java.util.Random;
33 import java.nio.ByteOrder;
34
35 /*
36 * @test
37 * @bug 8326139 8348659
38 * @summary Test splitting packs in SuperWord
39 * @library /test/lib /
40 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV
41 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV
42 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV
43 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV
44 */
45
46 public class TestSplitPacks {
47 static int RANGE = 1024*8;
48 static int RANGE_FINAL = 1024*8;
49 private static final Random RANDOM = Utils.getRandomInstance();
50
51 // Inputs
52 byte[] aB;
53 byte[] bB;
54 byte mB = (byte)31;
55 short[] aS;
56 short[] bS;
57 short mS = (short)0xF0F0;
58 int[] aI;
59 int[] bI;
60 int mI = 0xF0F0F0F0;
61 long[] aL;
62 long[] bL;
63 long mL = 0xF0F0F0F0F0F0F0F0L;
64
65 // List of tests
66 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
67
68 // List of gold, the results from the first run before compilation
69 Map<String,Object[]> golds = new HashMap<String,Object[]>();
70
71 interface TestFunction {
72 Object[] run();
73 }
74
75 public static void main(String[] args) {
76 TestFramework framework = new TestFramework(TestSplitPacks.class);
77 framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
78 switch (args[0]) {
79 case "nCOH_nAV" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
80 case "nCOH_yAV" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
81 case "yCOH_nAV" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
82 case "yCOH_yAV" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); }
83 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
84 };
85 framework.start();
86 }
87
88 public TestSplitPacks() {
89 // Generate input once
90 aB = generateB();
91 bB = generateB();
92 aS = generateS();
93 bS = generateS();
94 aI = generateI();
95 bI = generateI();
96 aL = generateL();
97 bL = generateL();
98
99 // Add all tests to list
100 tests.put("test0", () -> { return test0(aI.clone(), bI.clone(), mI); });
101 tests.put("test1a", () -> { return test1a(aI.clone(), bI.clone(), mI); });
102 tests.put("test1b", () -> { return test1b(aI.clone(), bI.clone(), mI); });
103 tests.put("test1c", () -> { return test1c(aI.clone(), bI.clone(), mI); });
104 tests.put("test1d", () -> { return test1d(aI.clone(), bI.clone(), mI); });
105 tests.put("test2a", () -> { return test2a(aI.clone(), bI.clone(), mI); });
106 tests.put("test2b", () -> { return test2b(aI.clone(), bI.clone(), mI); });
107 tests.put("test2c", () -> { return test2c(aI.clone(), bI.clone(), mI); });
108 tests.put("test2d", () -> { return test2d(aI.clone(), bI.clone(), mI); });
109 tests.put("test3a", () -> { return test3a(aS.clone(), bS.clone(), mS); });
110 tests.put("test4a", () -> { return test4a(aS.clone(), bS.clone()); });
111 tests.put("test4b", () -> { return test4b(aS.clone(), bS.clone()); });
112 tests.put("test4c", () -> { return test4c(aS.clone(), bS.clone()); });
113 tests.put("test4d", () -> { return test4d(aS.clone(), bS.clone()); });
114 tests.put("test4e", () -> { return test4e(aS.clone(), bS.clone()); });
115 tests.put("test4f", () -> { return test4f(aS.clone(), bS.clone()); });
116 tests.put("test4g", () -> { return test4g(aS.clone(), bS.clone()); });
117 tests.put("test5a", () -> { return test5a(aS.clone(), bS.clone(), mS); });
118 tests.put("test6a", () -> { return test6a(aI.clone(), bI.clone()); });
119 tests.put("test7a", () -> { return test7a(aI.clone(), bI.clone()); });
120
121 // Compute gold value for all test methods before compilation
122 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
123 String name = entry.getKey();
124 TestFunction test = entry.getValue();
125 Object[] gold = test.run();
126 golds.put(name, gold);
127 }
128 }
129
130 @Warmup(100)
131 @Run(test = {"test0",
132 "test1a",
133 "test1b",
134 "test1c",
135 "test1d",
136 "test2a",
137 "test2b",
138 "test2c",
139 "test2d",
140 "test3a",
141 "test4a",
142 "test4b",
143 "test4c",
144 "test4d",
145 "test4e",
146 "test4f",
147 "test4g",
148 "test5a",
149 "test6a",
150 "test7a"})
151 public void runTests() {
152 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
153 String name = entry.getKey();
154 TestFunction test = entry.getValue();
155 // Recall gold value from before compilation
156 Object[] gold = golds.get(name);
157 // Compute new result
158 Object[] result = test.run();
159 // Compare gold and new result
160 verify(name, gold, result);
161 }
162 }
163
164 static byte[] generateB() {
165 byte[] a = new byte[RANGE];
166 for (int i = 0; i < a.length; i++) {
167 a[i] = (byte)RANDOM.nextInt();
168 }
169 return a;
170 }
171
172 static short[] generateS() {
173 short[] a = new short[RANGE];
174 for (int i = 0; i < a.length; i++) {
175 a[i] = (short)RANDOM.nextInt();
176 }
177 return a;
178 }
179
180 static int[] generateI() {
181 int[] a = new int[RANGE];
182 for (int i = 0; i < a.length; i++) {
183 a[i] = RANDOM.nextInt();
184 }
185 return a;
186 }
187
188 static long[] generateL() {
189 long[] a = new long[RANGE];
190 for (int i = 0; i < a.length; i++) {
191 a[i] = RANDOM.nextLong();
192 }
193 return a;
194 }
195
196 static void verify(String name, Object[] gold, Object[] result) {
197 if (gold.length != result.length) {
198 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
199 gold.length + ", result.length = " + result.length);
200 }
201 for (int i = 0; i < gold.length; i++) {
202 Object g = gold[i];
203 Object r = result[i];
204 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
205 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
206 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
207 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
208 }
209 if (g == r) {
210 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
211 " gold[" + i + "] == result[" + i + "]");
212 }
213 if (Array.getLength(g) != Array.getLength(r)) {
214 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
215 " gold[" + i + "].length = " + Array.getLength(g) +
216 " result[" + i + "].length = " + Array.getLength(r));
217 }
218 Class c = g.getClass().getComponentType();
219 if (c == byte.class) {
220 verifyB(name, i, (byte[])g, (byte[])r);
221 } else if (c == short.class) {
222 verifyS(name, i, (short[])g, (short[])r);
223 } else if (c == int.class) {
224 verifyI(name, i, (int[])g, (int[])r);
225 } else if (c == long.class) {
226 verifyL(name, i, (long[])g, (long[])r);
227 } else {
228 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
229 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
230 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
231 }
232 }
233 }
234
235 static void verifyB(String name, int i, byte[] g, byte[] r) {
236 for (int j = 0; j < g.length; j++) {
237 if (g[j] != r[j]) {
238 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
239 " gold[" + i + "][" + j + "] = " + g[j] +
240 " result[" + i + "][" + j + "] = " + r[j]);
241 }
242 }
243 }
244
245 static void verifyS(String name, int i, short[] g, short[] r) {
246 for (int j = 0; j < g.length; j++) {
247 if (g[j] != r[j]) {
248 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
249 " gold[" + i + "][" + j + "] = " + g[j] +
250 " result[" + i + "][" + j + "] = " + r[j]);
251 }
252 }
253 }
254
255 static void verifyI(String name, int i, int[] g, int[] r) {
256 for (int j = 0; j < g.length; j++) {
257 if (g[j] != r[j]) {
258 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
259 " gold[" + i + "][" + j + "] = " + g[j] +
260 " result[" + i + "][" + j + "] = " + r[j]);
261 }
262 }
263 }
264
265 static void verifyL(String name, int i, long[] g, long[] r) {
266 for (int j = 0; j < g.length; j++) {
267 if (g[j] != r[j]) {
268 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
269 " gold[" + i + "][" + j + "] = " + g[j] +
270 " result[" + i + "][" + j + "] = " + r[j]);
271 }
272 }
273 }
274
275 @Test
276 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
277 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
278 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
279 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
280 IRNode.STORE_VECTOR, "> 0"},
281 applyIf = {"MaxVectorSize", ">=32"},
282 applyIfPlatform = {"64-bit", "true"},
283 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
284 // Load and store are already split
285 //
286 // 0 1 - - 4 5 6 7
287 // | | | | | |
288 // 0 1 - - 4 5 6 7
289 static Object[] test0(int[] a, int[] b, int mask) {
290 for (int i = 0; i < RANGE; i+=8) {
291 int b0 = a[i+0] & mask;
292 int b1 = a[i+1] & mask;
293
294 int b4 = a[i+4] & mask;
295 int b5 = a[i+5] & mask;
296 int b6 = a[i+6] & mask;
297 int b7 = a[i+7] & mask;
298
299 b[i+0] = b0;
300 b[i+1] = b1;
301
302 b[i+4] = b4;
303 b[i+5] = b5;
304 b[i+6] = b6;
305 b[i+7] = b7;
306 }
307 return new Object[]{ a, b };
308 }
309
310 @Test
311 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
312 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
313 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
314 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
315 IRNode.STORE_VECTOR, "> 0"},
316 applyIf = {"MaxVectorSize", ">=32"},
317 applyIfPlatform = {"64-bit", "true"},
318 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
319 // Adjacent Load and Store, but split by Add/Mul
320 static Object[] test1a(int[] a, int[] b, int mask) {
321 for (int i = 0; i < RANGE; i+=8) {
322 b[i+0] = a[i+0] + mask; // Add
323 b[i+1] = a[i+1] + mask;
324 b[i+2] = a[i+2] + mask;
325 b[i+3] = a[i+3] + mask;
326
327 b[i+4] = a[i+4] * mask; // Mul
328 b[i+5] = a[i+5] * mask;
329 }
330 return new Object[]{ a, b };
331 }
332
333 @Test
334 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
335 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
336 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
337 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
338 IRNode.STORE_VECTOR, "> 0"},
339 applyIf = {"MaxVectorSize", ">=32"},
340 applyIfPlatform = {"64-bit", "true"},
341 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
342 // Adjacent Load and Store, but split by Add/Mul
343 static Object[] test1b(int[] a, int[] b, int mask) {
344 for (int i = 0; i < RANGE; i+=8) {
345 b[i+0] = a[i+0] * mask; // Mul
346 b[i+1] = a[i+1] * mask;
347 b[i+2] = a[i+2] * mask;
348 b[i+3] = a[i+3] * mask;
349
350 b[i+4] = a[i+4] + mask; // Add
351 b[i+5] = a[i+5] + mask;
352 }
353 return new Object[]{ a, b };
354 }
355
356 @Test
357 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
358 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
359 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
360 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
361 IRNode.STORE_VECTOR, "> 0"},
362 applyIf = {"MaxVectorSize", ">=32"},
363 applyIfPlatform = {"64-bit", "true"},
364 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
365 // Adjacent Load and Store, but split by Add/Mul
366 static Object[] test1c(int[] a, int[] b, int mask) {
367 for (int i = 0; i < RANGE; i+=8) {
368 b[i+0] = a[i+0] + mask; // Add
369 b[i+1] = a[i+1] + mask;
370
371 b[i+2] = a[i+2] * mask; // Mul
372 b[i+3] = a[i+3] * mask;
373 b[i+4] = a[i+4] * mask;
374 b[i+5] = a[i+5] * mask;
375 }
376 return new Object[]{ a, b };
377 }
378
379 @Test
380 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
381 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
382 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
383 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
384 IRNode.STORE_VECTOR, "> 0"},
385 applyIf = {"MaxVectorSize", ">=32"},
386 applyIfPlatform = {"64-bit", "true"},
387 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
388 // Adjacent Load and Store, but split by Add/Mul
389 static Object[] test1d(int[] a, int[] b, int mask) {
390 for (int i = 0; i < RANGE; i+=8) {
391 b[i+0] = a[i+0] * mask; // Mul
392 b[i+1] = a[i+1] * mask;
393
394 b[i+2] = a[i+2] + mask; // Add
395 b[i+3] = a[i+3] + mask;
396 b[i+4] = a[i+4] + mask;
397 b[i+5] = a[i+5] + mask;
398 }
399 return new Object[]{ a, b };
400 }
401
402 @Test
403 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
404 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
405 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
406 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
407 IRNode.STORE_VECTOR, "> 0"},
408 applyIf = {"MaxVectorSize", ">=32"},
409 applyIfPlatform = {"64-bit", "true"},
410 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
411 // Split the load
412 //
413 // 0 1 2 3 4 5 - -
414 // | | \ \ \ \
415 // | | \ \ \ \
416 // | | \ \ \ \
417 // 0 1 - - 4 5 6 7
418 //
419 static Object[] test2a(int[] a, int[] b, int mask) {
420 for (int i = 0; i < RANGE; i+=8) {
421 int b0 = a[i+0] & mask;
422 int b1 = a[i+1] & mask;
423 int b2 = a[i+2] & mask;
424 int b3 = a[i+3] & mask;
425 int b4 = a[i+4] & mask;
426 int b5 = a[i+5] & mask;
427
428 b[i+0] = b0;
429 b[i+1] = b1;
430
431 b[i+4] = b2;
432 b[i+5] = b3;
433 b[i+6] = b4;
434 b[i+7] = b5;
435 }
436 return new Object[]{ a, b };
437 }
438
439 @Test
440 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
441 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
442 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
443 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
444 IRNode.STORE_VECTOR, "> 0"},
445 applyIf = {"MaxVectorSize", ">=32"},
446 applyIfPlatform = {"64-bit", "true"},
447 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
448 // Split the load
449 //
450 // 0 1 2 3 4 5 - -
451 // | | | | \ \
452 // | | | | \ \
453 // | | | | \ \
454 // 0 1 2 3 -- 6 7
455 //
456 static Object[] test2b(int[] a, int[] b, int mask) {
457 for (int i = 0; i < RANGE; i+=8) {
458 int b0 = a[i+0] & mask;
459 int b1 = a[i+1] & mask;
460 int b2 = a[i+2] & mask;
461 int b3 = a[i+3] & mask;
462 int b4 = a[i+4] & mask;
463 int b5 = a[i+5] & mask;
464
465 b[i+0] = b0;
466 b[i+1] = b1;
467 b[i+2] = b2;
468 b[i+3] = b3;
469
470 b[i+6] = b4;
471 b[i+7] = b5;
472 }
473 return new Object[]{ a, b };
474 }
475
476 @Test
477 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
478 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
479 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
480 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
481 IRNode.STORE_VECTOR, "> 0"},
482 applyIf = {"MaxVectorSize", ">=32"},
483 applyIfPlatform = {"64-bit", "true"},
484 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
485 // Split the load
486 //
487 // 0 1 - - 4 5 6 7
488 // | | / / / /
489 // | | / / / /
490 // | | / / / /
491 // 0 1 2 3 4 5 - -
492 //
493 static Object[] test2c(int[] a, int[] b, int mask) {
494 for (int i = 0; i < RANGE; i+=8) {
495 int b0 = a[i+0] & mask;
496 int b1 = a[i+1] & mask;
497
498 int b4 = a[i+4] & mask;
499 int b5 = a[i+5] & mask;
500 int b6 = a[i+6] & mask;
501 int b7 = a[i+7] & mask;
502
503 b[i+0] = b0;
504 b[i+1] = b1;
505 b[i+2] = b4;
506 b[i+3] = b5;
507 b[i+4] = b6;
508 b[i+5] = b7;
509 }
510 return new Object[]{ a, b };
511 }
512
513 @Test
514 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
515 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
516 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
517 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
518 IRNode.STORE_VECTOR, "> 0"},
519 applyIf = {"MaxVectorSize", ">=32"},
520 applyIfPlatform = {"64-bit", "true"},
521 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
522 // Split the load
523 //
524 // 0 1 2 3 - - 6 7
525 // | | | | / /
526 // | | | | / /
527 // | | | | / /
528 // 0 1 2 3 4 5 - -
529 //
530 static Object[] test2d(int[] a, int[] b, int mask) {
531 for (int i = 0; i < RANGE; i+=8) {
532 int b0 = a[i+0] & mask;
533 int b1 = a[i+1] & mask;
534 int b2 = a[i+2] & mask;
535 int b3 = a[i+3] & mask;
536
537 int b6 = a[i+6] & mask;
538 int b7 = a[i+7] & mask;
539
540 b[i+0] = b0;
541 b[i+1] = b1;
542 b[i+2] = b2;
543 b[i+3] = b3;
544 b[i+4] = b6;
545 b[i+5] = b7;
546 }
547 return new Object[]{ a, b };
548 }
549
550 @Test
551 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
552 IRNode.STORE_VECTOR, "> 0"},
553 applyIf = {"MaxVectorSize", ">=32"},
554 applyIfPlatform = {"64-bit", "true"},
555 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
556 // 0 1 2 3 4 5 6 7 -
557 // | | | | | | | |
558 // | + + + | | | |
559 // | | | | |
560 // | v | | | | v
561 // | | | | | | |
562 // 1 - - 3 4 5 6 7 8
563 static Object[] test3a(short[] a, short[] b, short val) {
564 int sum = 0;
565 for (int i = 0; i < RANGE; i+=16) {
566 short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
567
568 short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
569 short a2 = a[i+2];
570 short a3 = a[i+3];
571
572 short a4 = a[i+4]; // 4-pack
573 short a5 = a[i+5];
574 short a6 = a[i+6];
575 short a7 = a[i+7];
576
577
578 b[i+0] = a0; // required for alignment / offsets, technical limitation.
579
580 sum += a1 + a2 + a3; // not packed
581
582 b[i+3] = val; // adjacent to 4-pack but needs to be split off
583
584 b[i+4] = a4; // 4-pack
585 b[i+5] = a5;
586 b[i+6] = a6;
587 b[i+7] = a7;
588
589 b[i+8] = val; // adjacent to 4-pack but needs to be split off
590 }
591 return new Object[]{ a, b, new int[]{ sum } };
592 }
593
594 @Test
595 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
596 IRNode.STORE_VECTOR, "> 0"},
597 applyIfPlatform = {"64-bit", "true"},
598 applyIfCPUFeatureOr = {"sse4.1", "true"})
599 // Cyclic dependency with distance 2 -> split into 2-packs
600 static Object[] test4a(short[] a, short[] b) {
601 for (int i = 0; i < RANGE-64; i++) {
602 b[i+2] = a[i+0];
603 }
604 return new Object[]{ a, b };
605 }
606
607 @Test
608 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
609 IRNode.STORE_VECTOR, "> 0"},
610 applyIf = {"AlignVector", "false"},
611 applyIfPlatform = {"64-bit", "true"},
612 applyIfCPUFeatureOr = {"sse4.1", "true"})
613 // Cyclic dependency with distance 3 -> split into 2-packs
614 static Object[] test4b(short[] a, short[] b) {
615 for (int i = 0; i < RANGE-64; i++) {
616 b[i+3] = a[i+0];
617 }
618 return new Object[]{ a, b };
619 }
620
621 @Test
622 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
623 IRNode.STORE_VECTOR, "> 0"},
624 applyIf = {"MaxVectorSize", ">=8"},
625 applyIfPlatform = {"64-bit", "true"},
626 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
627 // Cyclic dependency with distance 4 -> split into 4-packs
628 static Object[] test4c(short[] a, short[] b) {
629 for (int i = 0; i < RANGE-64; i++) {
630 b[i+4] = a[i+0];
631 }
632 return new Object[]{ a, b };
633 }
634
635 @Test
636 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
637 IRNode.STORE_VECTOR, "> 0"},
638 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false"},
639 applyIfPlatform = {"64-bit", "true"},
640 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
641 // Cyclic dependency with distance 5 -> split into 4-packs
642 static Object[] test4d(short[] a, short[] b) {
643 for (int i = 0; i < RANGE-64; i++) {
644 b[i+5] = a[i+0];
645 }
646 return new Object[]{ a, b };
647 }
648
649 @Test
650 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
651 IRNode.STORE_VECTOR, "> 0"},
652 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false"},
653 applyIfPlatform = {"64-bit", "true"},
654 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
655 // Cyclic dependency with distance 6 -> split into 4-packs
656 static Object[] test4e(short[] a, short[] b) {
657 for (int i = 0; i < RANGE-64; i++) {
658 b[i+6] = a[i+0];
659 }
660 return new Object[]{ a, b };
661 }
662
663 @Test
664 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
665 IRNode.STORE_VECTOR, "> 0"},
666 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false"},
667 applyIfPlatform = {"64-bit", "true"},
668 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
669 // Cyclic dependency with distance 7 -> split into 4-packs
670 static Object[] test4f(short[] a, short[] b) {
671 for (int i = 0; i < RANGE-64; i++) {
672 b[i+7] = a[i+0];
673 }
674 return new Object[]{ a, b };
675 }
676
677 @Test
678 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
679 IRNode.STORE_VECTOR, "> 0"},
680 applyIf = {"MaxVectorSize", ">=32"},
681 applyIfPlatform = {"64-bit", "true"},
682 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
683 // Cyclic dependency with distance 8 -> split into 8-packs
684 static Object[] test4g(short[] a, short[] b) {
685 for (int i = 0; i < RANGE-64; i++) {
686 b[i+8] = a[i+0];
687 }
688 return new Object[]{ a, b };
689 }
690
691 @Test
692 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
693 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
694 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
695 IRNode.ADD_VS, IRNode.VECTOR_SIZE_2, "> 0",
696 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
697 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
698 IRNode.STORE_VECTOR, "> 0"},
699 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
700 applyIfPlatform = {"64-bit", "true"},
701 applyIfCPUFeature = {"sse4.1", "true"})
702 // aarch64 limits minimum vector size to 8B, thus a vector size of
703 // length 2 for type "short" will not be generated
704 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
705 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
706 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
707 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
708 IRNode.STORE_VECTOR, "> 0"},
709 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
710 applyIfPlatform = {"64-bit", "true"},
711 applyIfCPUFeature = {"sve", "true"})
712 // Split pack into power-of-2 sizes
713 static Object[] test5a(short[] a, short[] b, short val) {
714 for (int i = 0; i < RANGE; i+=16) {
715 b[i+ 0] = (short)(a[i+ 0] + val); // 8 pack
716 b[i+ 1] = (short)(a[i+ 1] + val);
717 b[i+ 2] = (short)(a[i+ 2] + val);
718 b[i+ 3] = (short)(a[i+ 3] + val);
719 b[i+ 4] = (short)(a[i+ 4] + val);
720 b[i+ 5] = (short)(a[i+ 5] + val);
721 b[i+ 6] = (short)(a[i+ 6] + val);
722 b[i+ 7] = (short)(a[i+ 7] + val);
723
724 b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
725 b[i+ 9] = (short)(a[i+ 9] + val);
726 b[i+10] = (short)(a[i+10] + val);
727 b[i+11] = (short)(a[i+11] + val);
728
729 b[i+12] = (short)(a[i+12] + val); // 2-pack
730 b[i+13] = (short)(a[i+13] + val);
731
732 b[i+14] = (short)(a[i+14] + val);
733 }
734 return new Object[]{ a, b };
735 }
736
737 @Test
738 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
739 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
740 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
741 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
742 IRNode.ADD_REDUCTION_V, "> 0"},
743 applyIf = {"MaxVectorSize", ">=32"},
744 applyIfPlatform = {"64-bit", "true"},
745 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
746 // Split packs including reductions
747 static Object[] test6a(int[] a, int[] b) {
748 int s = 0;
749 for (int i = 0; i < RANGE; i+=8) {
750 s += a[i+0] * b[i+0];
751 s += a[i+1] * b[i+1];
752 s += a[i+2] * b[i+2];
753 s += a[i+3] * b[i+3];
754
755 s += a[i+4] & b[i+4];
756 s += a[i+5] & b[i+5];
757 s += a[i+6] & b[i+6];
758 s += a[i+7] & b[i+7];
759 }
760 return new Object[]{ a, b, new int[]{ s } };
761 }
762
763 @Test
764 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
765 IRNode.MUL_VI, "> 0",
766 IRNode.POPULATE_INDEX, "> 0"},
767 applyIfPlatform = {"64-bit", "true"},
768 applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
769 // Index Populate:
770 // There can be an issue when all the (iv + 1), (iv + 2), ...
771 // get packed, but not (iv). Then we have a pack that is one element
772 // too short, and we start splitting everything in a bad way.
773 static Object[] test7a(int[] a, int[] b) {
774 for (int i = 0; i < RANGE; i++) {
775 a[i] = b[i] * i;
776 }
777 return new Object[]{ a, b };
778 }
779 }