1 /*
2 * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /**
25 * @test
26 * @bug 8310886 8325252 8320622
27 * @summary Test MulAddS2I vectorization.
28 * @library /test/lib /
29 * @run driver compiler.loopopts.superword.TestMulAddS2I
30 */
31
32 package compiler.loopopts.superword;
33
34 import compiler.lib.ir_framework.*;
35 import jdk.test.lib.Asserts;
36 import jdk.test.lib.Platform;
37
38 public class TestMulAddS2I {
39 static final int RANGE = 1024*16;
40 static final int ITER = RANGE/2 - 1;
41
42 static short[] sArr1 = new short[RANGE];
43 static short[] sArr2 = new short[RANGE];
44 static final int[] GOLDEN_A;
45 static final int[] GOLDEN_B;
46 static final int[] GOLDEN_C;
47 static final int[] GOLDEN_D;
48 static final int[] GOLDEN_E;
49 static final int[] GOLDEN_F;
50 static final int[] GOLDEN_G;
51 static final int[] GOLDEN_H;
52 static final int[] GOLDEN_I;
53 static final int[] GOLDEN_J;
54 static final int[] GOLDEN_K;
55 static final int[] GOLDEN_L;
56 static final int[] GOLDEN_M;
57
58 static {
59 for (int i = 0; i < RANGE; i++) {
60 sArr1[i] = (short)(AbstractInfo.getRandom().nextInt());
61 sArr2[i] = (short)(AbstractInfo.getRandom().nextInt());
62 }
63 GOLDEN_A = testa();
64 GOLDEN_B = testb();
65 GOLDEN_C = testc(new int[ITER]);
66 GOLDEN_D = testd(new int[ITER]);
67 GOLDEN_E = teste(new int[ITER]);
68 GOLDEN_F = testf(new int[ITER]);
69 GOLDEN_G = testg(new int[ITER]);
70 GOLDEN_H = testh(new int[ITER]);
71 GOLDEN_I = testi(new int[ITER]);
72 GOLDEN_J = testj(new int[ITER]);
73 GOLDEN_K = testk(new int[ITER]);
74 GOLDEN_L = testl(new int[ITER]);
75 GOLDEN_M = testm(new int[ITER]);
76 }
77
78
79 public static void main(String[] args) {
80 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:-UseCompactObjectHeaers");
81 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:-UseCompactObjectHeaers");
82 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:+UseCompactObjectHeaers");
83 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:+UseCompactObjectHeaers");
84 }
85
86 @Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh",
87 "testi", "testj", "testk", "testl", "testm"})
88 @Warmup(0)
89 public static void run() {
90 compare(testa(), GOLDEN_A, "testa");
91 compare(testb(), GOLDEN_B, "testb");
92 compare(testc(new int[ITER]), GOLDEN_C, "testc");
93 compare(testd(new int[ITER]), GOLDEN_D, "testd");
94 compare(teste(new int[ITER]), GOLDEN_E, "teste");
95 compare(testf(new int[ITER]), GOLDEN_F, "testf");
96 compare(testg(new int[ITER]), GOLDEN_G, "testg");
97 compare(testh(new int[ITER]), GOLDEN_H, "testh");
98 compare(testi(new int[ITER]), GOLDEN_I, "testi");
99 compare(testj(new int[ITER]), GOLDEN_J, "testj");
100 compare(testk(new int[ITER]), GOLDEN_K, "testk");
101 compare(testl(new int[ITER]), GOLDEN_L, "testl");
102 compare(testm(new int[ITER]), GOLDEN_M, "testm");
103 }
104
105 public static void compare(int[] out, int[] golden, String name) {
106 for (int i = 0; i < ITER; i++) {
107 Asserts.assertEQ(out[i], golden[i], "wrong result for '" + name + "' out[" + i + "]");
108 }
109 }
110
111 @Test
112 @IR(applyIfCPUFeature = {"sse2", "true"},
113 applyIfPlatform = {"64-bit", "true"},
114 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
115 @IR(applyIfCPUFeature = {"asimd", "true"},
116 applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
117 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
118 @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
119 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
120 public static int[] testa() {
121 int[] out = new int[ITER];
122 int[] out2 = new int[ITER];
123 for (int i = 0; i < ITER; i++) {
124 out[i] += ((sArr1[2*i] * sArr1[2*i]) + (sArr1[2*i+1] * sArr1[2*i+1]));
125 out2[i] += out[i];
126 }
127 return out;
128 }
129
130 @Test
131 @IR(applyIfCPUFeature = {"sse2", "true"},
132 applyIfPlatform = {"64-bit", "true"},
133 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
134 @IR(applyIfCPUFeature = {"asimd", "true"},
135 applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
136 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
137 @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
138 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
139 public static int[] testb() {
140 int[] out = new int[ITER];
141 int[] out2 = new int[ITER];
142 for (int i = 0; i < ITER; i++) {
143 out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
144 out2[i] += out[i];
145 }
146 return out;
147 }
148
149 @Test
150 @IR(applyIfCPUFeature = {"sse2", "true"},
151 applyIfPlatform = {"64-bit", "true"},
152 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
153 @IR(applyIfCPUFeature = {"asimd", "true"},
154 applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
155 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
156 @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
157 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
158 public static int[] testc(int[] out) {
159 for (int i = 0; i < ITER; i++) {
160 out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
161 }
162 return out;
163 }
164
165 @Test
166 @IR(applyIfCPUFeature = {"sse2", "true"},
167 applyIfPlatform = {"64-bit", "true"},
168 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
169 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
170 @IR(applyIfCPUFeature = {"asimd", "true"},
171 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false"}, // AD file requires vector_length = 16
172 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
173 @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
174 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
175 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
176 public static int[] testd(int[] out) {
177 for (int i = 0; i < ITER-2; i+=2) {
178 // Unrolled, with the same structure.
179 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
180 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3]));
181 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
182 // We need all addresses 8-byte aligned.
183 //
184 // out:
185 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
186 // = 16 (or 12 if UseCompactObjectHeaders=true)
187 // -> never aligned!
188 }
189 return out;
190 }
191
192 @Test
193 @IR(applyIfCPUFeature = {"sse2", "true"},
194 applyIfPlatform = {"64-bit", "true"},
195 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
196 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
197 @IR(applyIfCPUFeature = {"asimd", "true"},
198 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
199 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
200 @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
201 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
202 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
203 public static int[] teste(int[] out) {
204 for (int i = 0; i < ITER-2; i+=2) {
205 // Unrolled, with some swaps.
206 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
207 out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // swap(1 2)
208 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
209 // We need all addresses 8-byte aligned.
210 //
211 // out:
212 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
213 // = 16 (or 12 if UseCompactObjectHeaders=true)
214 // -> never aligned!
215 }
216 return out;
217 }
218
219 @Test
220 @IR(applyIfCPUFeature = {"sse2", "true"},
221 applyIfPlatform = {"64-bit", "true"},
222 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
223 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
224 @IR(applyIfCPUFeature = {"asimd", "true"},
225 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
226 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
227 @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
228 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
229 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
230 public static int[] testf(int[] out) {
231 for (int i = 0; i < ITER-2; i+=2) {
232 // Unrolled, with some swaps.
233 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
234 out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr2[2*i+3] * sArr1[2*i+3])); // swap(1 2), swap(3 4)
235 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
236 // We need all addresses 8-byte aligned.
237 //
238 // out:
239 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
240 // = 16 (or 12 if UseCompactObjectHeaders=true)
241 // -> never aligned!
242 }
243 return out;
244 }
245
246 @Test
247 @IR(applyIfCPUFeature = {"sse2", "true"},
248 applyIfPlatform = {"64-bit", "true"},
249 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
250 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
251 @IR(applyIfCPUFeature = {"asimd", "true"},
252 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
253 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
254 @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
255 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
256 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
257 public static int[] testg(int[] out) {
258 for (int i = 0; i < ITER-2; i+=2) {
259 // Unrolled, with some swaps.
260 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
261 out[i+1] += ((sArr1[2*i+3] * sArr2[2*i+3]) + (sArr1[2*i+2] * sArr2[2*i+2])); // swap(1 3), swap(2 4)
262 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
263 // We need all addresses 8-byte aligned.
264 //
265 // out:
266 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
267 // = 16 (or 12 if UseCompactObjectHeaders=true)
268 // -> never aligned!
269 }
270 return out;
271 }
272
273 @Test
274 @IR(applyIfCPUFeature = {"sse2", "true"},
275 applyIfPlatform = {"64-bit", "true"},
276 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
277 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
278 @IR(applyIfCPUFeature = {"asimd", "true"},
279 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
280 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
281 @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
282 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
283 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
284 public static int[] testh(int[] out) {
285 for (int i = 0; i < ITER-2; i+=2) {
286 // Unrolled, with some swaps.
287 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
288 out[i+1] += ((sArr2[2*i+3] * sArr1[2*i+3]) + (sArr2[2*i+2] * sArr1[2*i+2])); // swap(1 4), swap(2 3)
289 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
290 // We need all addresses 8-byte aligned.
291 //
292 // out:
293 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
294 // = 16 (or 12 if UseCompactObjectHeaders=true)
295 // -> never aligned!
296 }
297 return out;
298 }
299
300 @Test
301 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
302 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
303 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
304 public static int[] testi(int[] out) {
305 for (int i = 0; i < ITER-2; i+=2) {
306 // Unrolled, with some swaps that prevent vectorization.
307 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); // ok
308 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
309 }
310 return out;
311 }
312
313 @Test
314 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
315 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
316 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
317 public static int[] testj(int[] out) {
318 for (int i = 0; i < ITER-2; i+=2) {
319 // Unrolled, with some swaps that prevent vectorization.
320 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
321 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
322 }
323 return out;
324 }
325
326 @Test
327 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
328 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
329 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
330 public static int[] testk(int[] out) {
331 for (int i = 0; i < ITER-2; i+=2) {
332 // Unrolled, with some swaps that prevent vectorization.
333 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
334 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok
335 }
336 return out;
337 }
338
339 @Test
340 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
341 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
342 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
343 public static int[] testl(int[] out) {
344 for (int i = 0; i < ITER-2; i+=2) {
345 // Unrolled, with some swaps that prevent vectorization.
346 out[i+0] += ((sArr1[2*i+1] * sArr2[2*i+1]) + (sArr1[2*i+0] * sArr2[2*i+0])); // ok
347 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
348 }
349 return out;
350 }
351
352 @Test
353 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
354 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
355 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
356 public static int[] testm(int[] out) {
357 for (int i = 0; i < ITER-4; i+=4) {
358 // Unrolled, with some swaps that prevent vectorization.
359 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
360 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok
361 // 2-element gap
362 }
363 return out;
364 }
365 }