1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import java.lang.reflect.Array;
30 import java.util.Map;
31 import java.util.HashMap;
32 import java.util.Random;
33 import java.nio.ByteOrder;
34
35 /*
36 * @test
37 * @bug 8326139 8348659
38 * @summary Test splitting packs in SuperWord
39 * @library /test/lib /
40 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_ySAC
41 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_ySAC
42 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_ySAC
43 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_ySAC
44 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_nSAC
45 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_nSAC
46 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_nSAC
47 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_nSAC
48 */
49
50 public class TestSplitPacks {
51 static int RANGE = 1024*8;
52 static int RANGE_FINAL = 1024*8;
53 private static final Random RANDOM = Utils.getRandomInstance();
54
55 // Inputs
56 byte[] aB;
57 byte[] bB;
58 byte mB = (byte)31;
59 short[] aS;
60 short[] bS;
61 short mS = (short)0xF0F0;
62 int[] aI;
63 int[] bI;
64 int mI = 0xF0F0F0F0;
65 long[] aL;
66 long[] bL;
67 long mL = 0xF0F0F0F0F0F0F0F0L;
68
69 // List of tests
70 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
71
72 // List of gold, the results from the first run before compilation
73 Map<String,Object[]> golds = new HashMap<String,Object[]>();
74
75 interface TestFunction {
76 Object[] run();
77 }
78
79 public static void main(String[] args) {
80 TestFramework framework = new TestFramework(TestSplitPacks.class);
81 framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
82 switch (args[0]) {
83 case "nCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
84 case "nCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
85 case "yCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
86 case "yCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
87 case "nCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
88 case "nCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
89 case "yCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
90 case "yCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
91 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
92 };
93 framework.start();
94 }
95
96 public TestSplitPacks() {
97 // Generate input once
98 aB = generateB();
99 bB = generateB();
100 aS = generateS();
101 bS = generateS();
102 aI = generateI();
103 bI = generateI();
104 aL = generateL();
105 bL = generateL();
106
107 // Add all tests to list
108 tests.put("test0", () -> { return test0(aI.clone(), bI.clone(), mI); });
109 tests.put("test1a", () -> { return test1a(aI.clone(), bI.clone(), mI); });
110 tests.put("test1b", () -> { return test1b(aI.clone(), bI.clone(), mI); });
111 tests.put("test1c", () -> { return test1c(aI.clone(), bI.clone(), mI); });
112 tests.put("test1d", () -> { return test1d(aI.clone(), bI.clone(), mI); });
113 tests.put("test2a", () -> { return test2a(aI.clone(), bI.clone(), mI); });
114 tests.put("test2b", () -> { return test2b(aI.clone(), bI.clone(), mI); });
115 tests.put("test2c", () -> { return test2c(aI.clone(), bI.clone(), mI); });
116 tests.put("test2d", () -> { return test2d(aI.clone(), bI.clone(), mI); });
117 tests.put("test3a", () -> { return test3a(aS.clone(), bS.clone(), mS); });
118 tests.put("test4a", () -> { return test4a(aS.clone(), bS.clone()); });
119 tests.put("test4b", () -> { return test4b(aS.clone(), bS.clone()); });
120 tests.put("test4c", () -> { return test4c(aS.clone(), bS.clone()); });
121 tests.put("test4d", () -> { return test4d(aS.clone(), bS.clone()); });
122 tests.put("test4e", () -> { return test4e(aS.clone(), bS.clone()); });
123 tests.put("test4f", () -> { return test4f(aS.clone(), bS.clone()); });
124 tests.put("test4g", () -> { return test4g(aS.clone(), bS.clone()); });
125 tests.put("test4a_alias",() -> { short[] x = aS.clone(); return test4a_alias(x, x); });
126 tests.put("test4b_alias",() -> { short[] x = aS.clone(); return test4b_alias(x, x); });
127 tests.put("test4c_alias",() -> { short[] x = aS.clone(); return test4c_alias(x, x); });
128 tests.put("test4d_alias",() -> { short[] x = aS.clone(); return test4d_alias(x, x); });
129 tests.put("test4e_alias",() -> { short[] x = aS.clone(); return test4e_alias(x, x); });
130 tests.put("test4f_alias",() -> { short[] x = aS.clone(); return test4f_alias(x, x); });
131 tests.put("test4g_alias",() -> { short[] x = aS.clone(); return test4g_alias(x, x); });
132 tests.put("test5a", () -> { return test5a(aS.clone(), bS.clone(), mS); });
133 tests.put("test6a", () -> { return test6a(aI.clone(), bI.clone()); });
134 tests.put("test7a", () -> { return test7a(aI.clone(), bI.clone()); });
135
136 // Compute gold value for all test methods before compilation
137 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
138 String name = entry.getKey();
139 TestFunction test = entry.getValue();
140 Object[] gold = test.run();
141 golds.put(name, gold);
142 }
143 }
144
145 @Warmup(100)
146 @Run(test = {"test0",
147 "test1a",
148 "test1b",
149 "test1c",
150 "test1d",
151 "test2a",
152 "test2b",
153 "test2c",
154 "test2d",
155 "test3a",
156 "test4a",
157 "test4b",
158 "test4c",
159 "test4d",
160 "test4e",
161 "test4f",
162 "test4g",
163 "test4a_alias",
164 "test4b_alias",
165 "test4c_alias",
166 "test4d_alias",
167 "test4e_alias",
168 "test4f_alias",
169 "test4g_alias",
170 "test5a",
171 "test6a",
172 "test7a"})
173 public void runTests() {
174 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
175 String name = entry.getKey();
176 TestFunction test = entry.getValue();
177 // Recall gold value from before compilation
178 Object[] gold = golds.get(name);
179 // Compute new result
180 Object[] result = test.run();
181 // Compare gold and new result
182 verify(name, gold, result);
183 }
184 }
185
186 static byte[] generateB() {
187 byte[] a = new byte[RANGE];
188 for (int i = 0; i < a.length; i++) {
189 a[i] = (byte)RANDOM.nextInt();
190 }
191 return a;
192 }
193
194 static short[] generateS() {
195 short[] a = new short[RANGE];
196 for (int i = 0; i < a.length; i++) {
197 a[i] = (short)RANDOM.nextInt();
198 }
199 return a;
200 }
201
202 static int[] generateI() {
203 int[] a = new int[RANGE];
204 for (int i = 0; i < a.length; i++) {
205 a[i] = RANDOM.nextInt();
206 }
207 return a;
208 }
209
210 static long[] generateL() {
211 long[] a = new long[RANGE];
212 for (int i = 0; i < a.length; i++) {
213 a[i] = RANDOM.nextLong();
214 }
215 return a;
216 }
217
218 static void verify(String name, Object[] gold, Object[] result) {
219 if (gold.length != result.length) {
220 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
221 gold.length + ", result.length = " + result.length);
222 }
223 for (int i = 0; i < gold.length; i++) {
224 Object g = gold[i];
225 Object r = result[i];
226 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
227 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
228 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
229 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
230 }
231 if (g == r) {
232 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
233 " gold[" + i + "] == result[" + i + "]");
234 }
235 if (Array.getLength(g) != Array.getLength(r)) {
236 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
237 " gold[" + i + "].length = " + Array.getLength(g) +
238 " result[" + i + "].length = " + Array.getLength(r));
239 }
240 Class c = g.getClass().getComponentType();
241 if (c == byte.class) {
242 verifyB(name, i, (byte[])g, (byte[])r);
243 } else if (c == short.class) {
244 verifyS(name, i, (short[])g, (short[])r);
245 } else if (c == int.class) {
246 verifyI(name, i, (int[])g, (int[])r);
247 } else if (c == long.class) {
248 verifyL(name, i, (long[])g, (long[])r);
249 } else {
250 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
251 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
252 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
253 }
254 }
255 }
256
257 static void verifyB(String name, int i, byte[] g, byte[] r) {
258 for (int j = 0; j < g.length; j++) {
259 if (g[j] != r[j]) {
260 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
261 " gold[" + i + "][" + j + "] = " + g[j] +
262 " result[" + i + "][" + j + "] = " + r[j]);
263 }
264 }
265 }
266
267 static void verifyS(String name, int i, short[] g, short[] r) {
268 for (int j = 0; j < g.length; j++) {
269 if (g[j] != r[j]) {
270 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
271 " gold[" + i + "][" + j + "] = " + g[j] +
272 " result[" + i + "][" + j + "] = " + r[j]);
273 }
274 }
275 }
276
277 static void verifyI(String name, int i, int[] g, int[] r) {
278 for (int j = 0; j < g.length; j++) {
279 if (g[j] != r[j]) {
280 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
281 " gold[" + i + "][" + j + "] = " + g[j] +
282 " result[" + i + "][" + j + "] = " + r[j]);
283 }
284 }
285 }
286
287 static void verifyL(String name, int i, long[] g, long[] r) {
288 for (int j = 0; j < g.length; j++) {
289 if (g[j] != r[j]) {
290 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
291 " gold[" + i + "][" + j + "] = " + g[j] +
292 " result[" + i + "][" + j + "] = " + r[j]);
293 }
294 }
295 }
296
297 @Test
298 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
299 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
300 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
301 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
302 IRNode.STORE_VECTOR, "> 0"},
303 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
304 applyIfPlatform = {"64-bit", "true"},
305 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
306 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
307 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
308 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
309 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
310 IRNode.STORE_VECTOR, "> 0"},
311 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
312 applyIfPlatform = {"64-bit", "true"},
313 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
314 // Load and store are already split
315 //
316 // 0 1 - - 4 5 6 7
317 // | | | | | |
318 // 0 1 - - 4 5 6 7
319 static Object[] test0(int[] a, int[] b, int mask) {
320 for (int i = 0; i < RANGE; i+=8) {
321 int b0 = a[i+0] & mask;
322 int b1 = a[i+1] & mask;
323
324 int b4 = a[i+4] & mask;
325 int b5 = a[i+5] & mask;
326 int b6 = a[i+6] & mask;
327 int b7 = a[i+7] & mask;
328
329 b[i+0] = b0;
330 b[i+1] = b1;
331
332 b[i+4] = b4;
333 b[i+5] = b5;
334 b[i+6] = b6;
335 b[i+7] = b7;
336 // With AlignVector, we need 8-byte alignment of vector loads/stores.
337 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
338 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
339 // -> vectorize -> no vectorization
340 }
341 return new Object[]{ a, b };
342 }
343
344 @Test
345 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
346 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
347 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
348 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
349 IRNode.STORE_VECTOR, "> 0"},
350 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
351 applyIfPlatform = {"64-bit", "true"},
352 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
353 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
354 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
355 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
356 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
357 IRNode.STORE_VECTOR, "> 0"},
358 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
359 applyIfPlatform = {"64-bit", "true"},
360 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
361 // Adjacent Load and Store, but split by Add/Mul
362 static Object[] test1a(int[] a, int[] b, int mask) {
363 for (int i = 0; i < RANGE; i+=8) {
364 b[i+0] = a[i+0] + mask; // Add
365 b[i+1] = a[i+1] + mask;
366 b[i+2] = a[i+2] + mask;
367 b[i+3] = a[i+3] + mask;
368
369 b[i+4] = a[i+4] * mask; // Mul
370 b[i+5] = a[i+5] * mask;
371 // With AlignVector, we need 8-byte alignment of vector loads/stores.
372 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
373 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
374 // -> vectorize -> no vectorization
375 }
376 return new Object[]{ a, b };
377 }
378
379 @Test
380 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
381 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
382 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
383 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
384 IRNode.STORE_VECTOR, "> 0"},
385 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
386 applyIfPlatform = {"64-bit", "true"},
387 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
388 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
389 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
390 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
391 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
392 IRNode.STORE_VECTOR, "> 0"},
393 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
394 applyIfPlatform = {"64-bit", "true"},
395 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
396 // Adjacent Load and Store, but split by Add/Mul
397 static Object[] test1b(int[] a, int[] b, int mask) {
398 for (int i = 0; i < RANGE; i+=8) {
399 b[i+0] = a[i+0] * mask; // Mul
400 b[i+1] = a[i+1] * mask;
401 b[i+2] = a[i+2] * mask;
402 b[i+3] = a[i+3] * mask;
403
404 b[i+4] = a[i+4] + mask; // Add
405 b[i+5] = a[i+5] + mask;
406 // With AlignVector, we need 8-byte alignment of vector loads/stores.
407 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
408 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
409 // -> vectorize -> no vectorization
410 }
411 return new Object[]{ a, b };
412 }
413
414 @Test
415 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
416 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
417 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
418 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
419 IRNode.STORE_VECTOR, "> 0"},
420 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
421 applyIfPlatform = {"64-bit", "true"},
422 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
423 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
424 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
425 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
426 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
427 IRNode.STORE_VECTOR, "> 0"},
428 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
429 applyIfPlatform = {"64-bit", "true"},
430 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
431 // Adjacent Load and Store, but split by Add/Mul
432 static Object[] test1c(int[] a, int[] b, int mask) {
433 for (int i = 0; i < RANGE; i+=8) {
434 b[i+0] = a[i+0] + mask; // Add
435 b[i+1] = a[i+1] + mask;
436
437 b[i+2] = a[i+2] * mask; // Mul
438 b[i+3] = a[i+3] * mask;
439 b[i+4] = a[i+4] * mask;
440 b[i+5] = a[i+5] * mask;
441 // With AlignVector, we need 8-byte alignment of vector loads/stores.
442 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
443 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
444 // -> vectorize -> no vectorization
445 }
446 return new Object[]{ a, b };
447 }
448
449 @Test
450 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
451 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
452 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
453 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
454 IRNode.STORE_VECTOR, "> 0"},
455 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
456 applyIfPlatform = {"64-bit", "true"},
457 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
458 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
459 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
460 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
461 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
462 IRNode.STORE_VECTOR, "> 0"},
463 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
464 applyIfPlatform = {"64-bit", "true"},
465 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
466 // Adjacent Load and Store, but split by Add/Mul
467 static Object[] test1d(int[] a, int[] b, int mask) {
468 for (int i = 0; i < RANGE; i+=8) {
469 b[i+0] = a[i+0] * mask; // Mul
470 b[i+1] = a[i+1] * mask;
471
472 b[i+2] = a[i+2] + mask; // Add
473 b[i+3] = a[i+3] + mask;
474 b[i+4] = a[i+4] + mask;
475 b[i+5] = a[i+5] + mask;
476 // With AlignVector, we need 8-byte alignment of vector loads/stores.
477 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
478 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
479 // -> vectorize -> no vectorization
480 }
481 return new Object[]{ a, b };
482 }
483
484 @Test
485 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
486 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
487 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
488 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
489 IRNode.STORE_VECTOR, "> 0"},
490 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
491 applyIfPlatform = {"64-bit", "true"},
492 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
493 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
494 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
495 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
496 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
497 IRNode.STORE_VECTOR, "> 0"},
498 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
499 applyIfPlatform = {"64-bit", "true"},
500 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
501 // Split the load
502 //
503 // 0 1 2 3 4 5 - -
504 // | | \ \ \ \
505 // | | \ \ \ \
506 // | | \ \ \ \
507 // 0 1 - - 4 5 6 7
508 //
509 static Object[] test2a(int[] a, int[] b, int mask) {
510 for (int i = 0; i < RANGE; i+=8) {
511 int b0 = a[i+0] & mask;
512 int b1 = a[i+1] & mask;
513 int b2 = a[i+2] & mask;
514 int b3 = a[i+3] & mask;
515 int b4 = a[i+4] & mask;
516 int b5 = a[i+5] & mask;
517
518 b[i+0] = b0;
519 b[i+1] = b1;
520
521 b[i+4] = b2;
522 b[i+5] = b3;
523 b[i+6] = b4;
524 b[i+7] = b5;
525 // With AlignVector, we need 8-byte alignment of vector loads/stores.
526 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
527 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
528 // -> vectorize -> no vectorization
529 }
530 return new Object[]{ a, b };
531 }
532
533 @Test
534 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
535 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
536 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
537 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
538 IRNode.STORE_VECTOR, "> 0"},
539 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
540 applyIfPlatform = {"64-bit", "true"},
541 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
542 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
543 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
544 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
545 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
546 IRNode.STORE_VECTOR, "> 0"},
547 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
548 applyIfPlatform = {"64-bit", "true"},
549 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
550 // Split the load
551 //
552 // 0 1 2 3 4 5 - -
553 // | | | | \ \
554 // | | | | \ \
555 // | | | | \ \
556 // 0 1 2 3 -- 6 7
557 //
558 static Object[] test2b(int[] a, int[] b, int mask) {
559 for (int i = 0; i < RANGE; i+=8) {
560 int b0 = a[i+0] & mask;
561 int b1 = a[i+1] & mask;
562 int b2 = a[i+2] & mask;
563 int b3 = a[i+3] & mask;
564 int b4 = a[i+4] & mask;
565 int b5 = a[i+5] & mask;
566
567 b[i+0] = b0;
568 b[i+1] = b1;
569 b[i+2] = b2;
570 b[i+3] = b3;
571
572 b[i+6] = b4;
573 b[i+7] = b5;
574 // With AlignVector, we need 8-byte alignment of vector loads/stores.
575 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
576 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
577 // -> vectorize -> no vectorization
578 }
579 return new Object[]{ a, b };
580 }
581
582 @Test
583 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
584 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
585 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
586 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
587 IRNode.STORE_VECTOR, "> 0"},
588 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
589 applyIfPlatform = {"64-bit", "true"},
590 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
591 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
592 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
593 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
594 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
595 IRNode.STORE_VECTOR, "> 0"},
596 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
597 applyIfPlatform = {"64-bit", "true"},
598 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
599 // Split the load
600 //
601 // 0 1 - - 4 5 6 7
602 // | | / / / /
603 // | | / / / /
604 // | | / / / /
605 // 0 1 2 3 4 5 - -
606 //
607 static Object[] test2c(int[] a, int[] b, int mask) {
608 for (int i = 0; i < RANGE; i+=8) {
609 int b0 = a[i+0] & mask;
610 int b1 = a[i+1] & mask;
611
612 int b4 = a[i+4] & mask;
613 int b5 = a[i+5] & mask;
614 int b6 = a[i+6] & mask;
615 int b7 = a[i+7] & mask;
616
617 b[i+0] = b0;
618 b[i+1] = b1;
619 b[i+2] = b4;
620 b[i+3] = b5;
621 b[i+4] = b6;
622 b[i+5] = b7;
623 // With AlignVector, we need 8-byte alignment of vector loads/stores.
624 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
625 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
626 // -> vectorize -> no vectorization
627 }
628 return new Object[]{ a, b };
629 }
630
631 @Test
632 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
633 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
634 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
635 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
636 IRNode.STORE_VECTOR, "> 0"},
637 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
638 applyIfPlatform = {"64-bit", "true"},
639 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
640 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
641 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
642 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
643 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
644 IRNode.STORE_VECTOR, "> 0"},
645 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
646 applyIfPlatform = {"64-bit", "true"},
647 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
648 // Split the load
649 //
650 // 0 1 2 3 - - 6 7
651 // | | | | / /
652 // | | | | / /
653 // | | | | / /
654 // 0 1 2 3 4 5 - -
655 //
656 static Object[] test2d(int[] a, int[] b, int mask) {
657 for (int i = 0; i < RANGE; i+=8) {
658 int b0 = a[i+0] & mask;
659 int b1 = a[i+1] & mask;
660 int b2 = a[i+2] & mask;
661 int b3 = a[i+3] & mask;
662
663 int b6 = a[i+6] & mask;
664 int b7 = a[i+7] & mask;
665
666 b[i+0] = b0;
667 b[i+1] = b1;
668 b[i+2] = b2;
669 b[i+3] = b3;
670 b[i+4] = b6;
671 b[i+5] = b7;
672 // With AlignVector, we need 8-byte alignment of vector loads/stores.
673 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
674 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
675 // -> vectorize -> no vectorization
676 }
677 return new Object[]{ a, b };
678 }
679
680 @Test
681 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
682 IRNode.STORE_VECTOR, "> 0"},
683 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
684 applyIfPlatform = {"64-bit", "true"},
685 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
686 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
687 IRNode.STORE_VECTOR, "> 0"},
688 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
689 applyIfPlatform = {"64-bit", "true"},
690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
691 // 0 1 2 3 4 5 6 7 -
692 // | | | | | | | |
693 // | + + + | | | |
694 // | | | | |
695 // | v | | | | v
696 // | | | | | | |
697 // 1 - - 3 4 5 6 7 8
698 static Object[] test3a(short[] a, short[] b, short val) {
699 int sum = 0;
700 for (int i = 0; i < RANGE; i+=16) {
701 short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
702
703 short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
704 short a2 = a[i+2];
705 short a3 = a[i+3];
706
707 short a4 = a[i+4]; // 4-pack
708 short a5 = a[i+5];
709 short a6 = a[i+6];
710 short a7 = a[i+7];
711
712
713 b[i+0] = a0; // required for alignment / offsets, technical limitation.
714
715 sum += a1 + a2 + a3; // not packed
716
717 b[i+3] = val; // adjacent to 4-pack but needs to be split off
718
719 b[i+4] = a4; // 4-pack
720 b[i+5] = a5;
721 b[i+6] = a6;
722 b[i+7] = a7;
723
724 b[i+8] = val; // adjacent to 4-pack but needs to be split off
725
726 // With AlignVector, we need 8-byte alignment of vector loads/stores.
727 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
728 // adr = base + 16 + 8 + 32*i -> always adr = base + 12 + 8 + 32*i -> never
729 // -> vectorize -> no vectorization
730 }
731 return new Object[]{ a, b, new int[]{ sum } };
732 }
733
734 @Test
735 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
736 IRNode.STORE_VECTOR, "> 0",
737 ".*multiversion.*", "= 0"},
738 phase = CompilePhase.PRINT_IDEAL,
739 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
740 applyIfPlatform = {"64-bit", "true"},
741 applyIfCPUFeatureOr = {"sse4.1", "true"})
742 // Cyclic dependency with distance 2 -> split into 2-packs
743 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
744 IRNode.STORE_VECTOR, "> 0",
745 ".*multiversion.*", "= 0"},
746 phase = CompilePhase.PRINT_IDEAL,
747 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
748 applyIfPlatform = {"64-bit", "true"},
749 applyIfCPUFeatureOr = {"sse4.1", "true"})
750 // Speculative aliasing check -> full vectorization.
751 static Object[] test4a(short[] a, short[] b) {
752 for (int i = 0; i < RANGE-64; i++) {
753 b[i+2] = a[i+0];
754 }
755 return new Object[]{ a, b };
756 }
757
758 @Test
759 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
760 IRNode.STORE_VECTOR, "> 0",
761 ".*multiversion.*", "= 0"},
762 phase = CompilePhase.PRINT_IDEAL,
763 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
764 applyIfPlatform = {"64-bit", "true"},
765 applyIfCPUFeatureOr = {"sse4.1", "true"})
766 // Cyclic dependency with distance 3 -> split into 2-packs
767 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
768 IRNode.STORE_VECTOR, "> 0",
769 ".*multiversion.*", "= 0"},
770 phase = CompilePhase.PRINT_IDEAL,
771 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
772 applyIfPlatform = {"64-bit", "true"},
773 applyIfCPUFeatureOr = {"sse4.1", "true"})
774 // Speculative aliasing check -> full vectorization.
775 static Object[] test4b(short[] a, short[] b) {
776 for (int i = 0; i < RANGE-64; i++) {
777 b[i+3] = a[i+0];
778 }
779 return new Object[]{ a, b };
780 }
781
782 @Test
783 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
784 IRNode.STORE_VECTOR, "> 0",
785 ".*multiversion.*", "= 0"},
786 phase = CompilePhase.PRINT_IDEAL,
787 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
788 applyIfPlatform = {"64-bit", "true"},
789 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
790 // Cyclic dependency with distance 4 -> split into 4-packs
791 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
792 IRNode.STORE_VECTOR, "> 0",
793 ".*multiversion.*", "= 0"},
794 phase = CompilePhase.PRINT_IDEAL,
795 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
796 applyIfPlatform = {"64-bit", "true"},
797 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
798 // Speculative aliasing check -> full vectorization.
799 static Object[] test4c(short[] a, short[] b) {
800 for (int i = 0; i < RANGE-64; i++) {
801 b[i+4] = a[i+0];
802 }
803 return new Object[]{ a, b };
804 }
805
806 @Test
807 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
808 IRNode.STORE_VECTOR, "> 0",
809 ".*multiversion.*", "= 0"},
810 phase = CompilePhase.PRINT_IDEAL,
811 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
812 applyIfPlatform = {"64-bit", "true"},
813 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
814 // Cyclic dependency with distance 5 -> split into 4-packs
815 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
816 IRNode.STORE_VECTOR, "> 0",
817 ".*multiversion.*", "= 0"},
818 phase = CompilePhase.PRINT_IDEAL,
819 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
820 applyIfPlatform = {"64-bit", "true"},
821 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
822 // Speculative aliasing check -> full vectorization.
823 static Object[] test4d(short[] a, short[] b) {
824 for (int i = 0; i < RANGE-64; i++) {
825 b[i+5] = a[i+0];
826 }
827 return new Object[]{ a, b };
828 }
829
830 @Test
831 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
832 IRNode.STORE_VECTOR, "> 0",
833 ".*multiversion.*", "= 0"},
834 phase = CompilePhase.PRINT_IDEAL,
835 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
836 applyIfPlatform = {"64-bit", "true"},
837 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
838 // Cyclic dependency with distance 6 -> split into 4-packs
839 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
840 IRNode.STORE_VECTOR, "> 0",
841 ".*multiversion.*", "= 0"},
842 phase = CompilePhase.PRINT_IDEAL,
843 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
844 applyIfPlatform = {"64-bit", "true"},
845 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
846 // Speculative aliasing check -> full vectorization.
847 static Object[] test4e(short[] a, short[] b) {
848 for (int i = 0; i < RANGE-64; i++) {
849 b[i+6] = a[i+0];
850 }
851 return new Object[]{ a, b };
852 }
853
854 @Test
855 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
856 IRNode.STORE_VECTOR, "> 0",
857 ".*multiversion.*", "= 0"},
858 phase = CompilePhase.PRINT_IDEAL,
859 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
860 applyIfPlatform = {"64-bit", "true"},
861 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
862 // Cyclic dependency with distance 7 -> split into 4-packs
863 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
864 IRNode.STORE_VECTOR, "> 0",
865 ".*multiversion.*", "= 0"},
866 phase = CompilePhase.PRINT_IDEAL,
867 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
868 applyIfPlatform = {"64-bit", "true"},
869 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
870 // Speculative aliasing check -> full vectorization.
871 static Object[] test4f(short[] a, short[] b) {
872 for (int i = 0; i < RANGE-64; i++) {
873 b[i+7] = a[i+0];
874 }
875 return new Object[]{ a, b };
876 }
877
878 @Test
879 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
880 IRNode.STORE_VECTOR, "> 0",
881 ".*multiversion.*", "= 0"},
882 phase = CompilePhase.PRINT_IDEAL,
883 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
884 applyIfPlatform = {"64-bit", "true"},
885 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
886 // Cyclic dependency with distance 8 -> split into 8-packs
887 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
888 IRNode.STORE_VECTOR, "> 0",
889 ".*multiversion.*", "= 0"},
890 phase = CompilePhase.PRINT_IDEAL,
891 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
892 applyIfPlatform = {"64-bit", "true"},
893 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
894 // Speculative aliasing check -> full vectorization.
895 static Object[] test4g(short[] a, short[] b) {
896 for (int i = 0; i < RANGE-64; i++) {
897 b[i+8] = a[i+0];
898 }
899 return new Object[]{ a, b };
900 }
901
902 @Test
903 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
904 IRNode.STORE_VECTOR, "> 0",
905 ".*multiversion.*", "= 0"},
906 phase = CompilePhase.PRINT_IDEAL,
907 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
908 applyIfPlatform = {"64-bit", "true"},
909 applyIfCPUFeatureOr = {"sse4.1", "true"})
910 // Cyclic dependency with distance 2 -> split into 2-packs
911 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
912 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
913 IRNode.STORE_VECTOR, "> 0",
914 ".*multiversion.*", "> 0"},
915 phase = CompilePhase.PRINT_IDEAL,
916 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
917 applyIfPlatform = {"64-bit", "true"},
918 applyIfCPUFeatureOr = {"sse4.1", "true"})
919 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
920 static Object[] test4a_alias(short[] a, short[] b) {
921 for (int i = 0; i < RANGE-64; i++) {
922 b[i+2] = a[i+0];
923 }
924 return new Object[]{ a, b };
925 }
926
927 @Test
928 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
929 IRNode.STORE_VECTOR, "> 0",
930 ".*multiversion.*", "= 0"},
931 phase = CompilePhase.PRINT_IDEAL,
932 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
933 applyIfPlatform = {"64-bit", "true"},
934 applyIfCPUFeatureOr = {"sse4.1", "true"})
935 // Cyclic dependency with distance 3 -> split into 2-packs
936 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
937 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
938 IRNode.STORE_VECTOR, "> 0",
939 ".*multiversion.*", "> 0"},
940 phase = CompilePhase.PRINT_IDEAL,
941 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
942 applyIfPlatform = {"64-bit", "true"},
943 applyIfCPUFeatureOr = {"sse4.1", "true"})
944 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
945 static Object[] test4b_alias(short[] a, short[] b) {
946 for (int i = 0; i < RANGE-64; i++) {
947 b[i+3] = a[i+0];
948 }
949 return new Object[]{ a, b };
950 }
951
952 @Test
953 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
954 IRNode.STORE_VECTOR, "> 0",
955 ".*multiversion.*", "= 0"},
956 phase = CompilePhase.PRINT_IDEAL,
957 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
958 applyIfPlatform = {"64-bit", "true"},
959 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
960 // Cyclic dependency with distance 4 -> split into 4-packs
961 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
962 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
963 IRNode.STORE_VECTOR, "> 0",
964 ".*multiversion.*", "> 0"},
965 phase = CompilePhase.PRINT_IDEAL,
966 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
967 applyIfPlatform = {"64-bit", "true"},
968 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
969 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
970 static Object[] test4c_alias(short[] a, short[] b) {
971 for (int i = 0; i < RANGE-64; i++) {
972 b[i+4] = a[i+0];
973 }
974 return new Object[]{ a, b };
975 }
976
977 @Test
978 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
979 IRNode.STORE_VECTOR, "> 0",
980 ".*multiversion.*", "= 0"},
981 phase = CompilePhase.PRINT_IDEAL,
982 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
983 applyIfPlatform = {"64-bit", "true"},
984 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
985 // Cyclic dependency with distance 5 -> split into 4-packs
986 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
987 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
988 IRNode.STORE_VECTOR, "> 0",
989 ".*multiversion.*", "> 0"},
990 phase = CompilePhase.PRINT_IDEAL,
991 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
992 applyIfPlatform = {"64-bit", "true"},
993 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
994 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
995 static Object[] test4d_alias(short[] a, short[] b) {
996 for (int i = 0; i < RANGE-64; i++) {
997 b[i+5] = a[i+0];
998 }
999 return new Object[]{ a, b };
1000 }
1001
1002 @Test
1003 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1004 IRNode.STORE_VECTOR, "> 0",
1005 ".*multiversion.*", "= 0"},
1006 phase = CompilePhase.PRINT_IDEAL,
1007 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1008 applyIfPlatform = {"64-bit", "true"},
1009 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1010 // Cyclic dependency with distance 6 -> split into 4-packs
1011 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1012 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1013 IRNode.STORE_VECTOR, "> 0",
1014 ".*multiversion.*", "> 0"},
1015 phase = CompilePhase.PRINT_IDEAL,
1016 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1017 applyIfPlatform = {"64-bit", "true"},
1018 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1019 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1020 static Object[] test4e_alias(short[] a, short[] b) {
1021 for (int i = 0; i < RANGE-64; i++) {
1022 b[i+6] = a[i+0];
1023 }
1024 return new Object[]{ a, b };
1025 }
1026
1027 @Test
1028 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1029 IRNode.STORE_VECTOR, "> 0",
1030 ".*multiversion.*", "= 0"},
1031 phase = CompilePhase.PRINT_IDEAL,
1032 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1033 applyIfPlatform = {"64-bit", "true"},
1034 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1035 // Cyclic dependency with distance 7 -> split into 4-packs
1036 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1037 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1038 IRNode.STORE_VECTOR, "> 0",
1039 ".*multiversion.*", "> 0"},
1040 phase = CompilePhase.PRINT_IDEAL,
1041 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1042 applyIfPlatform = {"64-bit", "true"},
1043 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1044 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1045 static Object[] test4f_alias(short[] a, short[] b) {
1046 for (int i = 0; i < RANGE-64; i++) {
1047 b[i+7] = a[i+0];
1048 }
1049 return new Object[]{ a, b };
1050 }
1051
1052 @Test
1053 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1054 IRNode.STORE_VECTOR, "> 0",
1055 ".*multiversion.*", "= 0"},
1056 phase = CompilePhase.PRINT_IDEAL,
1057 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1058 applyIfPlatform = {"64-bit", "true"},
1059 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1060 // Cyclic dependency with distance 8 -> split into 8-packs
1061 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1062 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1063 IRNode.STORE_VECTOR, "> 0",
1064 ".*multiversion.*", "> 0"},
1065 phase = CompilePhase.PRINT_IDEAL,
1066 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1067 applyIfPlatform = {"64-bit", "true"},
1068 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1069 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1070 static Object[] test4g_alias(short[] a, short[] b) {
1071 for (int i = 0; i < RANGE-64; i++) {
1072 b[i+8] = a[i+0];
1073 }
1074 return new Object[]{ a, b };
1075 }
1076
1077 @Test
1078 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
1079 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1080 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1081 IRNode.ADD_VS, IRNode.VECTOR_SIZE_2, "> 0",
1082 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
1083 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
1084 IRNode.STORE_VECTOR, "> 0"},
1085 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1086 applyIfPlatform = {"64-bit", "true"},
1087 applyIfCPUFeature = {"sse4.1", "true"})
1088 // aarch64 limits minimum vector size to 8B, thus a vector size of
1089 // length 2 for type "short" will not be generated
1090 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1091 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1092 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
1093 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
1094 IRNode.STORE_VECTOR, "> 0"},
1095 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1096 applyIfPlatform = {"64-bit", "true"},
1097 applyIfCPUFeature = {"sve", "true"})
1098 // Split pack into power-of-2 sizes
1099 static Object[] test5a(short[] a, short[] b, short val) {
1100 for (int i = 0; i < RANGE; i+=16) {
1101 b[i+ 0] = (short)(a[i+ 0] + val); // 8 pack
1102 b[i+ 1] = (short)(a[i+ 1] + val);
1103 b[i+ 2] = (short)(a[i+ 2] + val);
1104 b[i+ 3] = (short)(a[i+ 3] + val);
1105 b[i+ 4] = (short)(a[i+ 4] + val);
1106 b[i+ 5] = (short)(a[i+ 5] + val);
1107 b[i+ 6] = (short)(a[i+ 6] + val);
1108 b[i+ 7] = (short)(a[i+ 7] + val);
1109
1110 b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
1111 b[i+ 9] = (short)(a[i+ 9] + val);
1112 b[i+10] = (short)(a[i+10] + val);
1113 b[i+11] = (short)(a[i+11] + val);
1114
1115 b[i+12] = (short)(a[i+12] + val); // 2-pack
1116 b[i+13] = (short)(a[i+13] + val);
1117
1118 b[i+14] = (short)(a[i+14] + val);
1119 }
1120 return new Object[]{ a, b };
1121 }
1122
1123 @Test
1124 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1125 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1126 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1127 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1128 IRNode.ADD_REDUCTION_V, "> 0"},
1129 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1130 applyIfPlatform = {"64-bit", "true"},
1131 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1132 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1133 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1134 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1135 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1136 IRNode.ADD_REDUCTION_V, "> 0"},
1137 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
1138 applyIfPlatform = {"64-bit", "true"},
1139 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1140 // Split packs including reductions
1141 static Object[] test6a(int[] a, int[] b) {
1142 int s = 0;
1143 for (int i = 0; i < RANGE; i+=8) {
1144 s += a[i+0] * b[i+0];
1145 s += a[i+1] * b[i+1];
1146 s += a[i+2] * b[i+2];
1147 s += a[i+3] * b[i+3];
1148
1149 s += a[i+4] & b[i+4];
1150 s += a[i+5] & b[i+5];
1151 s += a[i+6] & b[i+6];
1152 s += a[i+7] & b[i+7];
1153 // With AlignVector, we need 8-byte alignment of vector loads/stores.
1154 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
1155 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
1156 // -> vectorize -> no vectorization
1157 }
1158 return new Object[]{ a, b, new int[]{ s } };
1159 }
1160
1161 @Test
1162 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1163 IRNode.MUL_VI, "> 0",
1164 IRNode.POPULATE_INDEX, "> 0"},
1165 applyIfPlatform = {"64-bit", "true"},
1166 applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1167 // Index Populate:
1168 // There can be an issue when all the (iv + 1), (iv + 2), ...
1169 // get packed, but not (iv). Then we have a pack that is one element
1170 // too short, and we start splitting everything in a bad way.
1171 static Object[] test7a(int[] a, int[] b) {
1172 for (int i = 0; i < RANGE; i++) {
1173 a[i] = b[i] * i;
1174 }
1175 return new Object[]{ a, b };
1176 }
1177 }