1 /*
2 * Copyright (c) 2022, 2023, Arm Limited. All rights reserved.
3 * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 */
24
25 /*
26 * @test
27 * @summary Vectorization test on combined operations
28 * @library /test/lib /
29 *
30 * @build jdk.test.whitebox.WhiteBox
31 * compiler.vectorization.runner.VectorizationTestRunner
32 *
33 * @requires vm.compiler2.enabled
34 *
35 * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
36 *
37 * @run main/othervm -Xbootclasspath/a:.
38 * -XX:+UnlockDiagnosticVMOptions
39 * -XX:+WhiteBoxAPI
40 * compiler.vectorization.runner.LoopCombinedOpTest nCOH_nAV
41 *
42 * @run main/othervm -Xbootclasspath/a:.
43 * -XX:+UnlockDiagnosticVMOptions
44 * -XX:+WhiteBoxAPI
45 * compiler.vectorization.runner.LoopCombinedOpTest nCOH_yAV
46 *
47 * @run main/othervm -Xbootclasspath/a:.
48 * -XX:+UnlockDiagnosticVMOptions
49 * -XX:+WhiteBoxAPI
50 * compiler.vectorization.runner.LoopCombinedOpTest yCOH_nAV
51 *
52 * @run main/othervm -Xbootclasspath/a:.
53 * -XX:+UnlockDiagnosticVMOptions
54 * -XX:+WhiteBoxAPI
55 * compiler.vectorization.runner.LoopCombinedOpTest yCOH_yAV
56 */
57
58 package compiler.vectorization.runner;
59
60 import compiler.lib.ir_framework.*;
61
62 import java.util.Random;
63
64 public class LoopCombinedOpTest extends VectorizationTestRunner {
65
66 // We must pass the flags directly to the test-VM, and not the driver vm in the @run above.
67 @Override
68 protected String[] testVMFlags(String[] args) {
69 return switch (args[0]) {
70 case "nCOH_nAV" -> new String[]{"-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"};
71 case "nCOH_yAV" -> new String[]{"-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"};
72 case "yCOH_nAV" -> new String[]{"-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"};
73 case "yCOH_yAV" -> new String[]{"-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"};
74 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
75 };
76 }
77
78 private static final int SIZE = 543;
79
80 private int[] a;
81 private int[] b;
82 private int[] c;
83 private int[] d;
84 private long[] l1;
85 private long[] l2;
86 private short[] s1;
87 private short[] s2;
88 private int intInv;
89
90 public LoopCombinedOpTest() {
91 a = new int[SIZE];
92 b = new int[SIZE];
93 c = new int[SIZE];
94 d = new int[SIZE];
95 l1 = new long[SIZE];
96 l2 = new long[SIZE];
97 s1 = new short[SIZE];
98 s2 = new short[SIZE];
99 for (int i = 0; i < SIZE; i++) {
100 a[i] = -654321 * i;
101 b[i] = 123456 * i;
102 c[i] = -998877 * i;
103 d[i] = 778899 * i;
104 l1[i] = 5000000000L * i;
105 l2[i] = -600000000L * i;
106 s1[i] = (short) (3 * i);
107 s2[i] = (short) (-2 * i);
108 }
109 Random ran = new Random(999);
110 intInv = ran.nextInt();
111 }
112
113 @Test
114 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
115 counts = {IRNode.STORE_VECTOR, ">0",
116 IRNode.LOAD_VECTOR_I, "> 0"})
117 public int[] opWithConstant() {
118 int[] res = new int[SIZE];
119 for (int i = 0; i < SIZE; i++) {
120 res[i] = a[i] + 1234567890;
121 }
122 return res;
123 }
124
125 @Test
126 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true", "rvv", "true"},
127 counts = {IRNode.STORE_VECTOR, ">0",
128 IRNode.LOAD_VECTOR_I, "> 0"})
129 public int[] opWithLoopInvariant() {
130 int[] res = new int[SIZE];
131 for (int i = 0; i < SIZE; i++) {
132 res[i] = b[i] * intInv;
133 }
134 return res;
135 }
136
137 @Test
138 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true", "rvv", "true"},
139 counts = {IRNode.STORE_VECTOR, ">0",
140 IRNode.LOAD_VECTOR_I, "> 0"})
141 public int[] opWithConstantAndLoopInvariant() {
142 int[] res = new int[SIZE];
143 for (int i = 0; i < SIZE; i++) {
144 res[i] = c[i] * (intInv & 0xfff);
145 }
146 return res;
147 }
148
149 @Test
150 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
151 counts = {IRNode.STORE_VECTOR, ">0",
152 IRNode.LOAD_VECTOR_I, "> 0"})
153 public int[] multipleOps() {
154 int[] res = new int[SIZE];
155 for (int i = 0; i < SIZE; i++) {
156 res[i] = a[i] & b[i] + c[i] & d[i];
157 }
158 return res;
159 }
160
161 @Test
162 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true", "rvv", "true"},
163 counts = {IRNode.STORE_VECTOR, ">0",
164 IRNode.LOAD_VECTOR_I, "> 0"})
165 public int[] multipleOpsWithMultipleConstants() {
166 int[] res = new int[SIZE];
167 for (int i = 0; i < SIZE; i++) {
168 res[i] = a[i] * 12345678 + 87654321 + b[i] & 0xffff - c[i] * d[i] * 2;
169 }
170 return res;
171 }
172
173 @Test
174 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true", "rvv", "true"},
175 counts = {IRNode.STORE_VECTOR, ">0",
176 IRNode.LOAD_VECTOR_I, "> 0"})
177 // With sse2, the MulI does not vectorize. This means we have vectorized stores
178 // to res1, but scalar loads from res1. The store-to-load-forwarding failure
179 // detection catches this and rejects vectorization.
180 public int[] multipleStores() {
181 int[] res1 = new int[SIZE];
182 int[] res2 = new int[SIZE];
183 int[] res3 = new int[SIZE];
184 for (int i = 0; i < SIZE; i++) {
185 res1[i] = a[i] & b[i];
186 res2[i] = c[i] | d[i];
187 res3[i] = res1[i] * res2[i];
188 }
189 return res3;
190 }
191
192 @Test
193 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true", "rvv", "true"},
194 counts = {IRNode.STORE_VECTOR, ">0",
195 IRNode.LOAD_VECTOR_I, "> 0"})
196 public int[] multipleStoresWithCommonSubExpression() {
197 int[] res1 = new int[SIZE];
198 int[] res2 = new int[SIZE];
199 int[] res3 = new int[SIZE];
200 for (int i = 0; i < SIZE; i++) {
201 res1[i] = a[i] * b[i];
202 res2[i] = c[i] * d[i];
203 res3[i] = res1[i] + res2[i];
204 }
205 return res3;
206 }
207
208 @Test
209 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
210 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
211 counts = {IRNode.STORE_VECTOR, ">0",
212 IRNode.LOAD_VECTOR_S, "> 0",
213 IRNode.LOAD_VECTOR_I, "> 0"})
214 public int[] multipleOpsWith2DifferentTypes() {
215 short[] res1 = new short[SIZE];
216 int[] res2 = new int[SIZE];
217 for (int i = 0; i < SIZE; i++) {
218 res1[i] = (short) (s1[i] + s2[i]);
219 res2[i] = a[i] + b[i];
220 // We have a mix of int and short loads/stores.
221 // With UseCompactObjectHeaders and AlignVector,
222 // we must 8-byte align all vector loads/stores.
223 //
224 // int:
225 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
226 // = 16 (or 12 if UseCompactObjectHeaders=true)
227 // If UseCompactObjectHeaders=false: iter % 2 = 0
228 // If UseCompactObjectHeaders=true: iter % 2 = 1
229 //
230 // byte:
231 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
232 // = 16 (or 12 if UseCompactObjectHeaders=true)
233 // If UseCompactObjectHeaders=false: iter % 8 = 0
234 // If UseCompactObjectHeaders=true: iter % 8 = 4
235 //
236 // -> we cannot align both if UseCompactObjectHeaders=true.
237 }
238 return res2;
239 }
240
241 @Test
242 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
243 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
244 counts = {IRNode.STORE_VECTOR, ">0",
245 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_ANY, "> 0",
246 IRNode.LOAD_VECTOR_L, "> 0"})
247 public long[] multipleOpsWith3DifferentTypes() {
248 short[] res1 = new short[SIZE];
249 int[] res2 = new int[SIZE];
250 long[] res3 = new long[SIZE];
251 for (int i = 0; i < SIZE; i++) {
252 res1[i] = (short) (s1[i] + s2[i]);
253 res2[i] = a[i] + b[i];
254 res3[i] = l1[i] + l2[i];
255 // We have a mix of int and short loads/stores.
256 // With UseCompactObjectHeaders and AlignVector,
257 // we must 8-byte align all vector loads/stores.
258 //
259 // int:
260 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
261 // = 16 (or 12 if UseCompactObjectHeaders=true)
262 // If UseCompactObjectHeaders=false: iter % 2 = 0
263 // If UseCompactObjectHeaders=true: iter % 2 = 1
264 //
265 // byte:
266 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
267 // = 16 (or 12 if UseCompactObjectHeaders=true)
268 // If UseCompactObjectHeaders=false: iter % 8 = 0
269 // If UseCompactObjectHeaders=true: iter % 8 = 4
270 //
271 // -> we cannot align both if UseCompactObjectHeaders=true.
272 }
273 return res3;
274 }
275
276 @Test
277 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
278 counts = {IRNode.STORE_VECTOR, ">0",
279 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_ANY, "> 0",
280 IRNode.LOAD_VECTOR_L, "> 0"})
281 public long[] multipleOpsWith2NonAdjacentTypes() {
282 short[] res1 = new short[SIZE];
283 long[] res2 = new long[SIZE];
284 for (int i = 0; i < SIZE; i++) {
285 res1[i] = (short) (s1[i] + s2[i]);
286 res2[i] = l1[i] + l2[i];
287 }
288 return res2;
289 }
290
291 @Test
292 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
293 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
294 counts = {IRNode.STORE_VECTOR, ">0",
295 IRNode.LOAD_VECTOR_S, "> 0",
296 IRNode.LOAD_VECTOR_I, "> 0"})
297 public int[] multipleOpsWith2DifferentTypesAndConstant() {
298 short[] res1 = new short[SIZE];
299 int[] res2 = new int[SIZE];
300 for (int i = 0; i < SIZE; i++) {
301 res1[i] = (short) (s1[i] + s2[i]);
302 res2[i] = a[i] + 88888888;;
303 // We have a mix of int and short loads/stores.
304 // With UseCompactObjectHeaders and AlignVector,
305 // we must 8-byte align all vector loads/stores.
306 //
307 // int:
308 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
309 // = 16 (or 12 if UseCompactObjectHeaders=true)
310 // If UseCompactObjectHeaders=false: iter % 2 = 0
311 // If UseCompactObjectHeaders=true: iter % 2 = 1
312 //
313 // byte:
314 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
315 // = 16 (or 12 if UseCompactObjectHeaders=true)
316 // If UseCompactObjectHeaders=false: iter % 8 = 0
317 // If UseCompactObjectHeaders=true: iter % 8 = 4
318 //
319 // -> we cannot align both if UseCompactObjectHeaders=true.
320 }
321 return res2;
322 }
323
324 @Test
325 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true", "rvv", "true"},
326 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
327 counts = {IRNode.STORE_VECTOR, ">0",
328 IRNode.LOAD_VECTOR_S, "> 0",
329 IRNode.LOAD_VECTOR_I, "> 0"})
330 public int[] multipleOpsWith2DifferentTypesAndInvariant() {
331 short[] res1 = new short[SIZE];
332 int[] res2 = new int[SIZE];
333 for (int i = 0; i < SIZE; i++) {
334 res1[i] = (short) (s1[i] + s2[i]);
335 res2[i] = a[i] * intInv;
336 // We have a mix of int and short loads/stores.
337 // With UseCompactObjectHeaders and AlignVector,
338 // we must 8-byte align all vector loads/stores.
339 //
340 // int:
341 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
342 // = 16 (or 12 if UseCompactObjectHeaders=true)
343 // If UseCompactObjectHeaders=false: iter % 2 = 0
344 // If UseCompactObjectHeaders=true: iter % 2 = 1
345 //
346 // byte:
347 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
348 // = 16 (or 12 if UseCompactObjectHeaders=true)
349 // If UseCompactObjectHeaders=false: iter % 8 = 0
350 // If UseCompactObjectHeaders=true: iter % 8 = 4
351 //
352 // -> we cannot align both if UseCompactObjectHeaders=true.
353 }
354 return res2;
355 }
356
357 @Test
358 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true", "rvv", "true"},
359 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
360 counts = {IRNode.STORE_VECTOR, ">0",
361 IRNode.LOAD_VECTOR_S, "> 0",
362 IRNode.LOAD_VECTOR_I, "> 0"})
363 public int[] multipleOpsWith2DifferentTypesAndComplexExpression() {
364 short[] res1 = new short[SIZE];
365 int[] res2 = new int[SIZE];
366 for (int i = 0; i < SIZE; i++) {
367 res1[i] = (short) (s1[i] + s2[i]);
368 res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa);
369 // same argument as in multipleOpsWith2DifferentTypesAndInvariant.
370 }
371 return res2;
372 }
373
374 @Test
375 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse3", "true", "rvv", "true"},
376 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
377 counts = {IRNode.STORE_VECTOR, ">0",
378 IRNode.LOAD_VECTOR_S, "> 0",
379 IRNode.LOAD_VECTOR_I, "> 0"})
380 public int[] multipleOpsWith2DifferentTypesAndSharedOp() {
381 int i = 0, sum = 0;
382 int[] res1 = new int[SIZE];
383 short[] res2 = new short[SIZE];
384 while (++i < SIZE) {
385 sum += (res1[i]--);
386 res2[i]++;
387 // We have a mix of int and short loads/stores.
388 // With UseCompactObjectHeaders and AlignVector,
389 // we must 8-byte align all vector loads/stores.
390 //
391 // int:
392 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
393 // = 16 (or 12 if UseCompactObjectHeaders=true)
394 // If UseCompactObjectHeaders=false: iter % 2 = 0
395 // If UseCompactObjectHeaders=true: iter % 2 = 1
396 //
397 // byte:
398 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
399 // = 16 (or 12 if UseCompactObjectHeaders=true)
400 // If UseCompactObjectHeaders=false: iter % 8 = 0
401 // If UseCompactObjectHeaders=true: iter % 8 = 4
402 //
403 // -> we cannot align both if UseCompactObjectHeaders=true.
404 }
405 return res1;
406 }
407
408 @Test
409 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
410 public int[] fillIndexPlusStride() {
411 int[] res = new int[SIZE];
412 for (int i = 0; i < SIZE; i++) {
413 res[i] = i + 1;
414 }
415 return res;
416 }
417
418 @Test
419 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
420 public int[] addArrayWithIndex() {
421 int[] res = new int[SIZE];
422 for (int i = 0; i < SIZE; i++) {
423 res[i] = a[i] + i;
424 }
425 return res;
426 }
427
428 @Test
429 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
430 public short[] multiplyAddShortIndex() {
431 short[] res = new short[SIZE];
432 for (int i = 0; i < SIZE; i++) {
433 res[i] = (short) (i * i + i);
434 }
435 return res;
436 }
437
438 @Test
439 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
440 public int[] multiplyBySumOfIndexAndInvariant() {
441 int[] res = new int[SIZE];
442 for (int i = 0; i < SIZE; i++) {
443 res[i] = a[i] * (i + 10 + intInv);
444 }
445 return res;
446 }
447
448 @Test
449 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true", "rvv", "true"},
450 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
451 counts = {IRNode.STORE_VECTOR, ">0"})
452 public int[] manuallyUnrolledStride2() {
453 int[] res = new int[SIZE];
454 for (int i = 0; i < SIZE - 1; i += 2) {
455 res[i] = a[i] * b[i];
456 res[i + 1] = a[i + 1] * b[i + 1];
457 // Hand-unrolling can mess with alignment!
458 //
459 // With UseCompactObjectHeaders and AlignVector,
460 // we must 8-byte align all vector loads/stores.
461 //
462 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
463 // = 16 (or 12 if UseCompactObjectHeaders=true)
464 // If UseCompactObjectHeaders=false: 16 divisible by 8 -> vectorize
465 // If UseCompactObjectHeaders=true: 12 not divisibly by 8 -> not vectorize
466 }
467 return res;
468 }
469
470 @Test
471 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true", "rvv", "true"},
472 counts = {IRNode.STORE_VECTOR, ">0",
473 IRNode.LOAD_VECTOR_I, "> 0"})
474 public int partialVectorizableLoop() {
475 int[] res = new int[SIZE];
476 int k = 9;
477 for (int i = 0; i < SIZE / 2; i++) {
478 res[i] = a[i] * b[i];
479 k = 3 * k + 1;
480 }
481 return k;
482 }
483 }