1 /*
  2  * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 /**
 25  * @test
 26  * @bug 8310886 8325252 8320622
 27  * @summary Test MulAddS2I vectorization.
 28  * @library /test/lib /
 29  * @run driver compiler.loopopts.superword.TestMulAddS2I
 30  */
 31 
 32 package compiler.loopopts.superword;
 33 
 34 import compiler.lib.ir_framework.*;
 35 import jdk.test.lib.Asserts;
 36 import jdk.test.lib.Platform;
 37 
 38 public class TestMulAddS2I {
 39     static final int RANGE = 1024*16;
 40     static final int ITER  = RANGE/2 - 1;
 41 
 42     static short[] sArr1 = new short[RANGE];
 43     static short[] sArr2 = new short[RANGE];
 44     static final int[] GOLDEN_A;
 45     static final int[] GOLDEN_B;
 46     static final int[] GOLDEN_C;
 47     static final int[] GOLDEN_D;
 48     static final int[] GOLDEN_E;
 49     static final int[] GOLDEN_F;
 50     static final int[] GOLDEN_G;
 51     static final int[] GOLDEN_H;
 52     static final int[] GOLDEN_I;
 53     static final int[] GOLDEN_J;
 54     static final int[] GOLDEN_K;
 55     static final int[] GOLDEN_L;
 56     static final int[] GOLDEN_M;
 57 
 58     static {
 59         for (int i = 0; i < RANGE; i++) {
 60             sArr1[i] = (short)(AbstractInfo.getRandom().nextInt());
 61             sArr2[i] = (short)(AbstractInfo.getRandom().nextInt());
 62         }
 63         GOLDEN_A = testa();
 64         GOLDEN_B = testb();
 65         GOLDEN_C = testc(new int[ITER]);
 66         GOLDEN_D = testd(new int[ITER]);
 67         GOLDEN_E = teste(new int[ITER]);
 68         GOLDEN_F = testf(new int[ITER]);
 69         GOLDEN_G = testg(new int[ITER]);
 70         GOLDEN_H = testh(new int[ITER]);
 71         GOLDEN_I = testi(new int[ITER]);
 72         GOLDEN_J = testj(new int[ITER]);
 73         GOLDEN_K = testk(new int[ITER]);
 74         GOLDEN_L = testl(new int[ITER]);
 75         GOLDEN_M = testm(new int[ITER]);
 76     }
 77 
 78 
 79     public static void main(String[] args) {
 80         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaers");
 81         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaers");
 82         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaers");
 83         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaers");
 84     }
 85 
 86     @Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh",
 87                  "testi", "testj", "testk", "testl", "testm"})
 88     @Warmup(0)
 89     public static void run() {
 90         compare(testa(), GOLDEN_A, "testa");
 91         compare(testb(), GOLDEN_B, "testb");
 92         compare(testc(new int[ITER]), GOLDEN_C, "testc");
 93         compare(testd(new int[ITER]), GOLDEN_D, "testd");
 94         compare(teste(new int[ITER]), GOLDEN_E, "teste");
 95         compare(testf(new int[ITER]), GOLDEN_F, "testf");
 96         compare(testg(new int[ITER]), GOLDEN_G, "testg");
 97         compare(testh(new int[ITER]), GOLDEN_H, "testh");
 98         compare(testi(new int[ITER]), GOLDEN_I, "testi");
 99         compare(testj(new int[ITER]), GOLDEN_J, "testj");
100         compare(testk(new int[ITER]), GOLDEN_K, "testk");
101         compare(testl(new int[ITER]), GOLDEN_L, "testl");
102         compare(testm(new int[ITER]), GOLDEN_M, "testm");
103     }
104 
105     public static void compare(int[] out, int[] golden, String name) {
106         for (int i = 0; i < ITER; i++) {
107             Asserts.assertEQ(out[i], golden[i], "wrong result for '" + name + "' out[" + i + "]");
108         }
109     }
110 
111     @Test
112     @IR(applyIfCPUFeature = {"sse2", "true"},
113         applyIfPlatform = {"64-bit", "true"},
114         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
115     @IR(applyIfCPUFeature = {"asimd", "true"},
116         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
117         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
118     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
119         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
120     public static int[] testa() {
121         int[] out = new int[ITER];
122         int[] out2 = new int[ITER];
123         for (int i = 0; i < ITER; i++) {
124             out[i] += ((sArr1[2*i] * sArr1[2*i]) + (sArr1[2*i+1] * sArr1[2*i+1]));
125             out2[i] += out[i];
126         }
127         return out;
128     }
129 
130     @Test
131     @IR(applyIfCPUFeature = {"sse2", "true"},
132         applyIfPlatform = {"64-bit", "true"},
133         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
134     @IR(applyIfCPUFeature = {"asimd", "true"},
135         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
136         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
137     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
138         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
139     public static int[] testb() {
140         int[] out = new int[ITER];
141         int[] out2 = new int[ITER];
142         for (int i = 0; i < ITER; i++) {
143             out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
144             out2[i] += out[i];
145         }
146         return out;
147     }
148 
149     @Test
150     @IR(applyIfCPUFeature = {"sse2", "true"},
151         applyIfPlatform = {"64-bit", "true"},
152         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
153     @IR(applyIfCPUFeature = {"asimd", "true"},
154         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
155         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
156     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
157         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
158     public static int[] testc(int[] out) {
159         for (int i = 0; i < ITER; i++) {
160             out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
161         }
162         return out;
163     }
164 
165     @Test
166     @IR(applyIfCPUFeature = {"sse2", "true"},
167         applyIfPlatform = {"64-bit", "true"},
168         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
169     @IR(applyIfCPUFeature = {"asimd", "true"},
170         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
171         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
172     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
173         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
174     public static int[] testd(int[] out) {
175         for (int i = 0; i < ITER-2; i+=2) {
176             // Unrolled, with the same structure.
177             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
178             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3]));
179         }
180         return out;
181     }
182 
183     @Test
184     @IR(applyIfCPUFeature = {"sse2", "true"},
185         applyIfPlatform = {"64-bit", "true"},
186         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
187     @IR(applyIfCPUFeature = {"asimd", "true"},
188         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
189         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
190     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
191         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
192     public static int[] teste(int[] out) {
193         for (int i = 0; i < ITER-2; i+=2) {
194             // Unrolled, with some swaps.
195             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
196             out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // swap(1 2)
197         }
198         return out;
199     }
200 
201     @Test
202     @IR(applyIfCPUFeature = {"sse2", "true"},
203         applyIfPlatform = {"64-bit", "true"},
204         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
205     @IR(applyIfCPUFeature = {"asimd", "true"},
206         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
207         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
208     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
209         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
210     public static int[] testf(int[] out) {
211         for (int i = 0; i < ITER-2; i+=2) {
212             // Unrolled, with some swaps.
213             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
214             out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr2[2*i+3] * sArr1[2*i+3])); // swap(1 2), swap(3 4)
215         }
216         return out;
217     }
218 
219     @Test
220     @IR(applyIfCPUFeature = {"sse2", "true"},
221         applyIfPlatform = {"64-bit", "true"},
222         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
223     @IR(applyIfCPUFeature = {"asimd", "true"},
224         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
225         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
226     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
227         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
228     public static int[] testg(int[] out) {
229         for (int i = 0; i < ITER-2; i+=2) {
230             // Unrolled, with some swaps.
231             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
232             out[i+1] += ((sArr1[2*i+3] * sArr2[2*i+3]) + (sArr1[2*i+2] * sArr2[2*i+2])); // swap(1 3), swap(2 4)
233         }
234         return out;
235     }
236 
237     @Test
238     @IR(applyIfCPUFeature = {"sse2", "true"},
239         applyIfPlatform = {"64-bit", "true"},
240         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
241     @IR(applyIfCPUFeature = {"asimd", "true"},
242         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
243         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
244     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
245         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
246     public static int[] testh(int[] out) {
247         for (int i = 0; i < ITER-2; i+=2) {
248             // Unrolled, with some swaps.
249             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
250             out[i+1] += ((sArr2[2*i+3] * sArr1[2*i+3]) + (sArr2[2*i+2] * sArr1[2*i+2])); // swap(1 4), swap(2 3)
251         }
252         return out;
253     }
254 
255     @Test
256     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
257         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
258     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
259     public static int[] testi(int[] out) {
260         for (int i = 0; i < ITER-2; i+=2) {
261             // Unrolled, with some swaps that prevent vectorization.
262             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); // ok
263             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
264         }
265         return out;
266     }
267 
268     @Test
269     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
270         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
271     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
272     public static int[] testj(int[] out) {
273         for (int i = 0; i < ITER-2; i+=2) {
274             // Unrolled, with some swaps that prevent vectorization.
275             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
276             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
277         }
278         return out;
279     }
280 
281     @Test
282     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
283         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
284     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
285     public static int[] testk(int[] out) {
286         for (int i = 0; i < ITER-2; i+=2) {
287             // Unrolled, with some swaps that prevent vectorization.
288             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
289             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok
290         }
291         return out;
292     }
293 
294     @Test
295     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
296         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
297     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
298     public static int[] testl(int[] out) {
299         for (int i = 0; i < ITER-2; i+=2) {
300             // Unrolled, with some swaps that prevent vectorization.
301             out[i+0] += ((sArr1[2*i+1] * sArr2[2*i+1]) + (sArr1[2*i+0] * sArr2[2*i+0])); // ok
302             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
303         }
304         return out;
305     }
306 
307     @Test
308     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
309         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
310     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
311     public static int[] testm(int[] out) {
312         for (int i = 0; i < ITER-4; i+=4) {
313             // Unrolled, with some swaps that prevent vectorization.
314             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
315             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok
316             // 2-element gap
317         }
318         return out;
319     }
320 }