1 /*
  2  * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 /**
 25  * @test
 26  * @bug 8310886 8325252 8320622
 27  * @summary Test MulAddS2I vectorization.
 28  * @library /test/lib /
 29  * @run driver compiler.loopopts.superword.TestMulAddS2I
 30  */
 31 
 32 package compiler.loopopts.superword;
 33 
 34 import compiler.lib.ir_framework.*;
 35 import jdk.test.lib.Asserts;
 36 import jdk.test.lib.Platform;
 37 
 38 public class TestMulAddS2I {
 39     static final int RANGE = 1024*16;
 40     static final int ITER  = RANGE/2 - 1;
 41 
 42     static short[] sArr1 = new short[RANGE];
 43     static short[] sArr2 = new short[RANGE];
 44     static final int[] GOLDEN_A;
 45     static final int[] GOLDEN_B;
 46     static final int[] GOLDEN_C;
 47     static final int[] GOLDEN_D;
 48     static final int[] GOLDEN_E;
 49     static final int[] GOLDEN_F;
 50     static final int[] GOLDEN_G;
 51     static final int[] GOLDEN_H;
 52     static final int[] GOLDEN_I;
 53     static final int[] GOLDEN_J;
 54     static final int[] GOLDEN_K;
 55     static final int[] GOLDEN_L;
 56     static final int[] GOLDEN_M;
 57 
 58     static {
 59         for (int i = 0; i < RANGE; i++) {
 60             sArr1[i] = (short)(AbstractInfo.getRandom().nextInt());
 61             sArr2[i] = (short)(AbstractInfo.getRandom().nextInt());
 62         }
 63         GOLDEN_A = testa();
 64         GOLDEN_B = testb();
 65         GOLDEN_C = testc(new int[ITER]);
 66         GOLDEN_D = testd(new int[ITER]);
 67         GOLDEN_E = teste(new int[ITER]);
 68         GOLDEN_F = testf(new int[ITER]);
 69         GOLDEN_G = testg(new int[ITER]);
 70         GOLDEN_H = testh(new int[ITER]);
 71         GOLDEN_I = testi(new int[ITER]);
 72         GOLDEN_J = testj(new int[ITER]);
 73         GOLDEN_K = testk(new int[ITER]);
 74         GOLDEN_L = testl(new int[ITER]);
 75         GOLDEN_M = testm(new int[ITER]);
 76     }
 77 
 78 
 79     public static void main(String[] args) {
 80         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaers");
 81         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaers");
 82         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaers");
 83         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaers");
 84     }
 85 
 86     @Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh",
 87                  "testi", "testj", "testk", "testl", "testm"})
 88     @Warmup(0)
 89     public static void run() {
 90         compare(testa(), GOLDEN_A, "testa");
 91         compare(testb(), GOLDEN_B, "testb");
 92         compare(testc(new int[ITER]), GOLDEN_C, "testc");
 93         compare(testd(new int[ITER]), GOLDEN_D, "testd");
 94         compare(teste(new int[ITER]), GOLDEN_E, "teste");
 95         compare(testf(new int[ITER]), GOLDEN_F, "testf");
 96         compare(testg(new int[ITER]), GOLDEN_G, "testg");
 97         compare(testh(new int[ITER]), GOLDEN_H, "testh");
 98         compare(testi(new int[ITER]), GOLDEN_I, "testi");
 99         compare(testj(new int[ITER]), GOLDEN_J, "testj");
100         compare(testk(new int[ITER]), GOLDEN_K, "testk");
101         compare(testl(new int[ITER]), GOLDEN_L, "testl");
102         compare(testm(new int[ITER]), GOLDEN_M, "testm");
103     }
104 
105     public static void compare(int[] out, int[] golden, String name) {
106         for (int i = 0; i < ITER; i++) {
107             Asserts.assertEQ(out[i], golden[i], "wrong result for '" + name + "' out[" + i + "]");
108         }
109     }
110 
111     @Test
112     @IR(applyIfCPUFeature = {"sse2", "true"},
113         applyIfPlatform = {"64-bit", "true"},
114         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
115     @IR(applyIfCPUFeature = {"asimd", "true"},
116         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
117         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
118     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
119         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
120     public static int[] testa() {
121         int[] out = new int[ITER];
122         int[] out2 = new int[ITER];
123         for (int i = 0; i < ITER; i++) {
124             out[i] += ((sArr1[2*i] * sArr1[2*i]) + (sArr1[2*i+1] * sArr1[2*i+1]));
125             out2[i] += out[i];
126         }
127         return out;
128     }
129 
130     @Test
131     @IR(applyIfCPUFeature = {"sse2", "true"},
132         applyIfPlatform = {"64-bit", "true"},
133         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
134     @IR(applyIfCPUFeature = {"asimd", "true"},
135         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
136         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
137     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
138         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
139     public static int[] testb() {
140         int[] out = new int[ITER];
141         int[] out2 = new int[ITER];
142         for (int i = 0; i < ITER; i++) {
143             out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
144             out2[i] += out[i];
145         }
146         return out;
147     }
148 
149     @Test
150     @IR(applyIfCPUFeature = {"sse2", "true"},
151         applyIfPlatform = {"64-bit", "true"},
152         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
153     @IR(applyIfCPUFeature = {"asimd", "true"},
154         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
155         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
156     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
157         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
158     public static int[] testc(int[] out) {
159         for (int i = 0; i < ITER; i++) {
160             out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
161         }
162         return out;
163     }
164 
165     @Test
166     @IR(applyIfCPUFeature = {"sse2", "true"},
167         applyIfPlatform = {"64-bit", "true"},
168         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
169         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
170     @IR(applyIfCPUFeature = {"asimd", "true"},
171         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false"}, // AD file requires vector_length = 16
172         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
173     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
174         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
175         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
176     public static int[] testd(int[] out) {
177         for (int i = 0; i < ITER-2; i+=2) {
178             // Unrolled, with the same structure.
179             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
180             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3]));
181             // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
182             // We need all addresses 8-byte aligned.
183             //
184             // out:
185             //   adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
186             //                = 16 (or 12 if UseCompactObjectHeaders=true)
187             // -> never aligned!
188         }
189         return out;
190     }
191 
192     @Test
193     @IR(applyIfCPUFeature = {"sse2", "true"},
194         applyIfPlatform = {"64-bit", "true"},
195         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
196         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
197     @IR(applyIfCPUFeature = {"asimd", "true"},
198         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
199         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
200     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
201         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
202         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
203     public static int[] teste(int[] out) {
204         for (int i = 0; i < ITER-2; i+=2) {
205             // Unrolled, with some swaps.
206             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
207             out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // swap(1 2)
208             // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
209             // We need all addresses 8-byte aligned.
210             //
211             // out:
212             //   adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
213             //                = 16 (or 12 if UseCompactObjectHeaders=true)
214             // -> never aligned!
215         }
216         return out;
217     }
218 
219     @Test
220     @IR(applyIfCPUFeature = {"sse2", "true"},
221         applyIfPlatform = {"64-bit", "true"},
222         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
223         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
224     @IR(applyIfCPUFeature = {"asimd", "true"},
225         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
226         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
227     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
228         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
229         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
230     public static int[] testf(int[] out) {
231         for (int i = 0; i < ITER-2; i+=2) {
232             // Unrolled, with some swaps.
233             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
234             out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr2[2*i+3] * sArr1[2*i+3])); // swap(1 2), swap(3 4)
235             // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
236             // We need all addresses 8-byte aligned.
237             //
238             // out:
239             //   adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
240             //                = 16 (or 12 if UseCompactObjectHeaders=true)
241             // -> never aligned!
242         }
243         return out;
244     }
245 
246     @Test
247     @IR(applyIfCPUFeature = {"sse2", "true"},
248         applyIfPlatform = {"64-bit", "true"},
249         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
250         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
251     @IR(applyIfCPUFeature = {"asimd", "true"},
252         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
253         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
254     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
255         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
256         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
257     public static int[] testg(int[] out) {
258         for (int i = 0; i < ITER-2; i+=2) {
259             // Unrolled, with some swaps.
260             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
261             out[i+1] += ((sArr1[2*i+3] * sArr2[2*i+3]) + (sArr1[2*i+2] * sArr2[2*i+2])); // swap(1 3), swap(2 4)
262             // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
263             // We need all addresses 8-byte aligned.
264             //
265             // out:
266             //   adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
267             //                = 16 (or 12 if UseCompactObjectHeaders=true)
268             // -> never aligned!
269         }
270         return out;
271     }
272 
273     @Test
274     @IR(applyIfCPUFeature = {"sse2", "true"},
275         applyIfPlatform = {"64-bit", "true"},
276         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
277         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
278     @IR(applyIfCPUFeature = {"asimd", "true"},
279         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
280         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
281     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
282         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" },
283         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
284     public static int[] testh(int[] out) {
285         for (int i = 0; i < ITER-2; i+=2) {
286             // Unrolled, with some swaps.
287             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
288             out[i+1] += ((sArr2[2*i+3] * sArr1[2*i+3]) + (sArr2[2*i+2] * sArr1[2*i+2])); // swap(1 4), swap(2 3)
289             // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders.
290             // We need all addresses 8-byte aligned.
291             //
292             // out:
293             //   adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
294             //                = 16 (or 12 if UseCompactObjectHeaders=true)
295             // -> never aligned!
296         }
297         return out;
298     }
299 
300     @Test
301     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
302         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
303     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
304     public static int[] testi(int[] out) {
305         for (int i = 0; i < ITER-2; i+=2) {
306             // Unrolled, with some swaps that prevent vectorization.
307             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); // ok
308             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
309         }
310         return out;
311     }
312 
313     @Test
314     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
315         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
316     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
317     public static int[] testj(int[] out) {
318         for (int i = 0; i < ITER-2; i+=2) {
319             // Unrolled, with some swaps that prevent vectorization.
320             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
321             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
322         }
323         return out;
324     }
325 
326     @Test
327     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
328         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
329     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
330     public static int[] testk(int[] out) {
331         for (int i = 0; i < ITER-2; i+=2) {
332             // Unrolled, with some swaps that prevent vectorization.
333             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
334             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok
335         }
336         return out;
337     }
338 
339     @Test
340     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
341         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
342     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
343     public static int[] testl(int[] out) {
344         for (int i = 0; i < ITER-2; i+=2) {
345             // Unrolled, with some swaps that prevent vectorization.
346             out[i+0] += ((sArr1[2*i+1] * sArr2[2*i+1]) + (sArr1[2*i+0] * sArr2[2*i+0])); // ok
347             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
348         }
349         return out;
350     }
351 
352     @Test
353     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
354         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
355     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
356     public static int[] testm(int[] out) {
357         for (int i = 0; i < ITER-4; i+=4) {
358             // Unrolled, with some swaps that prevent vectorization.
359             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
360             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok
361             // 2-element gap
362         }
363         return out;
364     }
365 }