1 /*
  2  * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 /**
 25  * @test
 26  * @bug 8310886 8325252 8320622
 27  * @summary Test MulAddS2I vectorization.
 28  * @library /test/lib /
 29  * @run driver compiler.loopopts.superword.TestMulAddS2I
 30  */
 31 
 32 package compiler.loopopts.superword;
 33 
 34 import compiler.lib.ir_framework.*;
 35 import jdk.test.lib.Asserts;
 36 import jdk.test.lib.Platform;
 37 
 38 public class TestMulAddS2I {
 39     static final int RANGE = 1024*16;
 40     static final int ITER  = RANGE/2 - 1;
 41 
 42     static short[] sArr1 = new short[RANGE];
 43     static short[] sArr2 = new short[RANGE];
 44     static final int[] GOLDEN_A;
 45     static final int[] GOLDEN_B;
 46     static final int[] GOLDEN_C;
 47     static final int[] GOLDEN_D;
 48     static final int[] GOLDEN_E;
 49     static final int[] GOLDEN_F;
 50     static final int[] GOLDEN_G;
 51     static final int[] GOLDEN_H;
 52     static final int[] GOLDEN_I;
 53     static final int[] GOLDEN_J;
 54     static final int[] GOLDEN_K;
 55     static final int[] GOLDEN_L;
 56 
 57     static {
 58         for (int i = 0; i < RANGE; i++) {
 59             sArr1[i] = (short)(AbstractInfo.getRandom().nextInt());
 60             sArr2[i] = (short)(AbstractInfo.getRandom().nextInt());
 61         }
 62         GOLDEN_A = testa();
 63         GOLDEN_B = testb();
 64         GOLDEN_C = testc(new int[ITER]);
 65         GOLDEN_D = testd(new int[ITER]);
 66         GOLDEN_E = teste(new int[ITER]);
 67         GOLDEN_F = testf(new int[ITER]);
 68         GOLDEN_G = testg(new int[ITER]);
 69         GOLDEN_H = testh(new int[ITER]);
 70         GOLDEN_I = testi(new int[ITER]);
 71         GOLDEN_J = testj(new int[ITER]);
 72         GOLDEN_K = testk(new int[ITER]);
 73         GOLDEN_L = testl(new int[ITER]);
 74     }
 75 
 76 
 77     public static void main(String[] args) {
 78         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector");
 79         TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector");
 80     }
 81 
 82     @Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh",
 83                  "testi", "testj", "testk", "testl"})
 84     @Warmup(0)
 85     public static void run() {
 86         compare(testa(), GOLDEN_A, "testa");
 87         compare(testb(), GOLDEN_B, "testb");
 88         compare(testc(new int[ITER]), GOLDEN_C, "testc");
 89         compare(testd(new int[ITER]), GOLDEN_D, "testd");
 90         compare(teste(new int[ITER]), GOLDEN_E, "teste");
 91         compare(testf(new int[ITER]), GOLDEN_F, "testf");
 92         compare(testg(new int[ITER]), GOLDEN_G, "testg");
 93         compare(testh(new int[ITER]), GOLDEN_H, "testh");
 94         compare(testi(new int[ITER]), GOLDEN_I, "testi");
 95         compare(testj(new int[ITER]), GOLDEN_J, "testj");
 96         compare(testk(new int[ITER]), GOLDEN_K, "testk");
 97         compare(testl(new int[ITER]), GOLDEN_L, "testl");
 98     }
 99 
100     public static void compare(int[] out, int[] golden, String name) {
101         for (int i = 0; i < ITER; i++) {
102             Asserts.assertEQ(out[i], golden[i], "wrong result for '" + name + "' out[" + i + "]");
103         }
104     }
105 
106     @Test
107     @IR(applyIfCPUFeature = {"sse2", "true"},
108         applyIfPlatform = {"64-bit", "true"},
109         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
110     @IR(applyIfCPUFeature = {"asimd", "true"},
111         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
112         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
113     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
114         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
115     public static int[] testa() {
116         int[] out = new int[ITER];
117         int[] out2 = new int[ITER];
118         for (int i = 0; i < ITER; i++) {
119             out[i] += ((sArr1[2*i] * sArr1[2*i]) + (sArr1[2*i+1] * sArr1[2*i+1]));
120             out2[i] += out[i];
121         }
122         return out;
123     }
124 
125     @Test
126     @IR(applyIfCPUFeature = {"sse2", "true"},
127         applyIfPlatform = {"64-bit", "true"},
128         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
129     @IR(applyIfCPUFeature = {"asimd", "true"},
130         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
131         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
132     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
133         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
134     public static int[] testb() {
135         int[] out = new int[ITER];
136         int[] out2 = new int[ITER];
137         for (int i = 0; i < ITER; i++) {
138             out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
139             out2[i] += out[i];
140         }
141         return out;
142     }
143 
144     @Test
145     @IR(applyIfCPUFeature = {"sse2", "true"},
146         applyIfPlatform = {"64-bit", "true"},
147         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
148     @IR(applyIfCPUFeature = {"asimd", "true"},
149         applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
150         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
151     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
152         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
153     public static int[] testc(int[] out) {
154         for (int i = 0; i < ITER; i++) {
155             out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
156         }
157         return out;
158     }
159 
160     @Test
161     @IR(applyIfCPUFeature = {"sse2", "true"},
162         applyIfPlatform = {"64-bit", "true"},
163         applyIf = { "UseCompactObjectHeaders", "false" },
164         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
165     @IR(applyIfCPUFeature = {"asimd", "true"},
166         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
167         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
168     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
169         applyIf = { "UseCompactObjectHeaders", "false" },
170         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
171     public static int[] testd(int[] out) {
172         for (int i = 0; i < ITER-2; i+=2) {
173             // Unrolled, with the same structure.
174             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
175             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3]));
176         }
177         return out;
178     }
179 
180     @Test
181     @IR(applyIfCPUFeature = {"sse2", "true"},
182         applyIfPlatform = {"64-bit", "true"},
183         applyIf = { "UseCompactObjectHeaders", "false" },
184         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
185     @IR(applyIfCPUFeature = {"asimd", "true"},
186         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
187         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
188     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
189         applyIf = { "UseCompactObjectHeaders", "false" },
190         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
191     public static int[] teste(int[] out) {
192         for (int i = 0; i < ITER-2; i+=2) {
193             // Unrolled, with some swaps.
194             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
195             out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // swap(1 2)
196         }
197         return out;
198     }
199 
200     @Test
201     @IR(applyIfCPUFeature = {"sse2", "true"},
202         applyIfPlatform = {"64-bit", "true"},
203         applyIf = { "UseCompactObjectHeaders", "false" },
204         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
205     @IR(applyIfCPUFeature = {"asimd", "true"},
206         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
207         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
208     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
209         applyIf = { "UseCompactObjectHeaders", "false" },
210         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
211     public static int[] testf(int[] out) {
212         for (int i = 0; i < ITER-2; i+=2) {
213             // Unrolled, with some swaps.
214             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
215             out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr2[2*i+3] * sArr1[2*i+3])); // swap(1 2), swap(3 4)
216         }
217         return out;
218     }
219 
220     @Test
221     @IR(applyIfCPUFeature = {"sse2", "true"},
222         applyIfPlatform = {"64-bit", "true"},
223         applyIf = { "UseCompactObjectHeaders", "false" },
224         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
225     @IR(applyIfCPUFeature = {"asimd", "true"},
226         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
227         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
228     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
229         applyIf = { "UseCompactObjectHeaders", "false" },
230         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
231     public static int[] testg(int[] out) {
232         for (int i = 0; i < ITER-2; i+=2) {
233             // Unrolled, with some swaps.
234             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
235             out[i+1] += ((sArr1[2*i+3] * sArr2[2*i+3]) + (sArr1[2*i+2] * sArr2[2*i+2])); // swap(1 3), swap(2 4)
236         }
237         return out;
238     }
239 
240     @Test
241     @IR(applyIfCPUFeature = {"sse2", "true"},
242         applyIfPlatform = {"64-bit", "true"},
243         applyIf = { "UseCompactObjectHeaders", "false" },
244         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
245     @IR(applyIfCPUFeature = {"asimd", "true"},
246         applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16
247         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
248     @IR(applyIfCPUFeature = {"avx512_vnni", "true"},
249         applyIf = { "UseCompactObjectHeaders", "false" },
250         counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
251     public static int[] testh(int[] out) {
252         for (int i = 0; i < ITER-2; i+=2) {
253             // Unrolled, with some swaps.
254             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1]));
255             out[i+1] += ((sArr2[2*i+3] * sArr1[2*i+3]) + (sArr2[2*i+2] * sArr1[2*i+2])); // swap(1 4), swap(2 3)
256         }
257         return out;
258     }
259 
260     @Test
261     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
262         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
263     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
264     public static int[] testi(int[] out) {
265         for (int i = 0; i < ITER-2; i+=2) {
266             // Unrolled, with some swaps that prevent vectorization.
267             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); // ok
268             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
269         }
270         return out;
271     }
272 
273     @Test
274     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
275         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
276     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
277     public static int[] testj(int[] out) {
278         for (int i = 0; i < ITER-2; i+=2) {
279             // Unrolled, with some swaps that prevent vectorization.
280             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
281             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
282         }
283         return out;
284     }
285 
286     @Test
287     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
288         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
289     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
290     public static int[] testk(int[] out) {
291         for (int i = 0; i < ITER-2; i+=2) {
292             // Unrolled, with some swaps that prevent vectorization.
293             out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad
294             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok
295         }
296         return out;
297     }
298 
299     @Test
300     @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"},
301         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
302     @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"})
303     public static int[] testl(int[] out) {
304         for (int i = 0; i < ITER-2; i+=2) {
305             // Unrolled, with some swaps that prevent vectorization.
306             out[i+0] += ((sArr1[2*i+1] * sArr2[2*i+1]) + (sArr1[2*i+0] * sArr2[2*i+0])); // ok
307             out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad
308         }
309         return out;
310     }
311 
312 }