1 /*
  2  * Copyright (c) 2022, 2023, Arm Limited. All rights reserved.
  3  * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  */
 24 
 25 /*
 26  * @test
 27  * @summary Vectorization test on combined operations
 28  * @library /test/lib /
 29  *
 30  * @build jdk.test.whitebox.WhiteBox
 31  *        compiler.vectorization.runner.VectorizationTestRunner
 32  *
 33  * @requires vm.compiler2.enabled
 34  *
 35  * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
 36  *
 37  * @run main/othervm -Xbootclasspath/a:.
 38  *                   -XX:+UnlockDiagnosticVMOptions
 39  *                   -XX:+WhiteBoxAPI
 40  *                   compiler.vectorization.runner.LoopCombinedOpTest nCOH_nAV
 41  *
 42  * @run main/othervm -Xbootclasspath/a:.
 43  *                   -XX:+UnlockDiagnosticVMOptions
 44  *                   -XX:+WhiteBoxAPI
 45  *                   compiler.vectorization.runner.LoopCombinedOpTest nCOH_yAV
 46  *
 47  * @run main/othervm -Xbootclasspath/a:.
 48  *                   -XX:+UnlockDiagnosticVMOptions
 49  *                   -XX:+WhiteBoxAPI
 50  *                   compiler.vectorization.runner.LoopCombinedOpTest yCOH_nAV
 51  *
 52  * @run main/othervm -Xbootclasspath/a:.
 53  *                   -XX:+UnlockDiagnosticVMOptions
 54  *                   -XX:+WhiteBoxAPI
 55  *                   compiler.vectorization.runner.LoopCombinedOpTest yCOH_yAV
 56  */
 57 
 58 package compiler.vectorization.runner;
 59 
 60 import compiler.lib.ir_framework.*;
 61 
 62 import java.util.Random;
 63 
 64 public class LoopCombinedOpTest extends VectorizationTestRunner {
 65 
 66     // We must pass the flags directly to the test-VM, and not the driver vm in the @run above.
 67     @Override
 68     protected String[] testVMFlags(String[] args) {
 69         return switch (args[0]) {
 70             case "nCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"};
 71             case "nCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"};
 72             case "yCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"};
 73             case "yCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"};
 74             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
 75         };
 76     }
 77 
 78     private static final int SIZE = 543;
 79 
 80     private int[] a;
 81     private int[] b;
 82     private int[] c;
 83     private int[] d;
 84     private long[] l1;
 85     private long[] l2;
 86     private short[] s1;
 87     private short[] s2;
 88     private int intInv;
 89 
 90     public LoopCombinedOpTest() {
 91         a = new int[SIZE];
 92         b = new int[SIZE];
 93         c = new int[SIZE];
 94         d = new int[SIZE];
 95         l1 = new long[SIZE];
 96         l2 = new long[SIZE];
 97         s1 = new short[SIZE];
 98         s2 = new short[SIZE];
 99         for (int i = 0; i < SIZE; i++) {
100             a[i] = -654321 * i;
101             b[i] =  123456 * i;
102             c[i] = -998877 * i;
103             d[i] =  778899 * i;
104             l1[i] = 5000000000L * i;
105             l2[i] = -600000000L * i;
106             s1[i] = (short) (3 * i);
107             s2[i] = (short) (-2 * i);
108         }
109         Random ran = new Random(999);
110         intInv = ran.nextInt();
111     }
112 
113     @Test
114     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
115         counts = {IRNode.STORE_VECTOR, ">0",
116                   IRNode.LOAD_VECTOR_I, "> 0"})
117     public int[] opWithConstant() {
118         int[] res = new int[SIZE];
119         for (int i = 0; i < SIZE; i++) {
120             res[i] = a[i] + 1234567890;
121         }
122         return res;
123     }
124 
125     @Test
126     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
127         counts = {IRNode.STORE_VECTOR, ">0",
128                   IRNode.LOAD_VECTOR_I, "> 0"})
129     public int[] opWithLoopInvariant() {
130         int[] res = new int[SIZE];
131         for (int i = 0; i < SIZE; i++) {
132             res[i] = b[i] * intInv;
133         }
134         return res;
135     }
136 
137     @Test
138     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
139         counts = {IRNode.STORE_VECTOR, ">0",
140                   IRNode.LOAD_VECTOR_I, "> 0"})
141     public int[] opWithConstantAndLoopInvariant() {
142         int[] res = new int[SIZE];
143         for (int i = 0; i < SIZE; i++) {
144             res[i] = c[i] * (intInv & 0xfff);
145         }
146         return res;
147     }
148 
149     @Test
150     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
151         counts = {IRNode.STORE_VECTOR, ">0",
152                   IRNode.LOAD_VECTOR_I, "> 0"})
153     public int[] multipleOps() {
154         int[] res = new int[SIZE];
155         for (int i = 0; i < SIZE; i++) {
156             res[i] = a[i] & b[i] + c[i] & d[i];
157         }
158         return res;
159     }
160 
161     @Test
162     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
163         counts = {IRNode.STORE_VECTOR, ">0",
164                   IRNode.LOAD_VECTOR_I, "> 0"})
165     public int[] multipleOpsWithMultipleConstants() {
166         int[] res = new int[SIZE];
167         for (int i = 0; i < SIZE; i++) {
168             res[i] = a[i] * 12345678 + 87654321 + b[i] & 0xffff - c[i] * d[i] * 2;
169         }
170         return res;
171     }
172 
173     @Test
174     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
175         counts = {IRNode.STORE_VECTOR, ">0",
176                   IRNode.LOAD_VECTOR_I, "> 0"})
177     // With sse2, the MulI does not vectorize. This means we have vectorized stores
178     // to res1, but scalar loads from res1. The store-to-load-forwarding failure
179     // detection catches this and rejects vectorization.
180     public int[] multipleStores() {
181         int[] res1 = new int[SIZE];
182         int[] res2 = new int[SIZE];
183         int[] res3 = new int[SIZE];
184         for (int i = 0; i < SIZE; i++) {
185             res1[i] = a[i] & b[i];
186             res2[i] = c[i] | d[i];
187             res3[i] = res1[i] * res2[i];
188         }
189         return res3;
190     }
191 
192     @Test
193     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
194         counts = {IRNode.STORE_VECTOR, ">0",
195                   IRNode.LOAD_VECTOR_I, "> 0"})
196     public int[] multipleStoresWithCommonSubExpression() {
197         int[] res1 = new int[SIZE];
198         int[] res2 = new int[SIZE];
199         int[] res3 = new int[SIZE];
200         for (int i = 0; i < SIZE; i++) {
201             res1[i] = a[i] * b[i];
202             res2[i] = c[i] * d[i];
203             res3[i] = res1[i] + res2[i];
204         }
205         return res3;
206     }
207 
208     @Test
209     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
210         counts = {IRNode.STORE_VECTOR, ">0",
211                   IRNode.LOAD_VECTOR_S, "> 0",
212                   IRNode.LOAD_VECTOR_I, "> 0"})
213     public int[] multipleOpsWith2DifferentTypes() {
214         short[] res1 = new short[SIZE];
215         int[] res2 = new int[SIZE];
216         for (int i = 0; i < SIZE; i++) {
217             res1[i] = (short) (s1[i] + s2[i]);
218             res2[i] = a[i] + b[i];
219         }
220         return res2;
221     }
222 
223     @Test
224     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
225         counts = {IRNode.STORE_VECTOR, ">0",
226                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_ANY, "> 0",
227                   IRNode.LOAD_VECTOR_L,                         "> 0"})
228     public long[] multipleOpsWith3DifferentTypes() {
229         short[] res1 = new short[SIZE];
230         int[] res2 = new int[SIZE];
231         long[] res3 = new long[SIZE];
232         for (int i = 0; i < SIZE; i++) {
233             res1[i] = (short) (s1[i] + s2[i]);
234             res2[i] = a[i] + b[i];
235             res3[i] = l1[i] + l2[i];
236         }
237         return res3;
238     }
239 
240     @Test
241     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
242         counts = {IRNode.STORE_VECTOR, ">0",
243                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_ANY, "> 0",
244                   IRNode.LOAD_VECTOR_L,                         "> 0"})
245     public long[] multipleOpsWith2NonAdjacentTypes() {
246         short[] res1 = new short[SIZE];
247         long[] res2 = new long[SIZE];
248         for (int i = 0; i < SIZE; i++) {
249             res1[i] = (short) (s1[i] + s2[i]);
250             res2[i] = l1[i] + l2[i];
251         }
252         return res2;
253     }
254 
255     @Test
256     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
257         counts = {IRNode.STORE_VECTOR, ">0",
258                   IRNode.LOAD_VECTOR_S, "> 0",
259                   IRNode.LOAD_VECTOR_I, "> 0"})
260     public int[] multipleOpsWith2DifferentTypesAndConstant() {
261         short[] res1 = new short[SIZE];
262         int[] res2 = new int[SIZE];
263         for (int i = 0; i < SIZE; i++) {
264             res1[i] = (short) (s1[i] + s2[i]);
265             res2[i] = a[i] + 88888888;;
266         }
267         return res2;
268     }
269 
270     @Test
271     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
272         counts = {IRNode.STORE_VECTOR, ">0",
273                   IRNode.LOAD_VECTOR_S, "> 0",
274                   IRNode.LOAD_VECTOR_I, "> 0"})
275     public int[] multipleOpsWith2DifferentTypesAndInvariant() {
276         short[] res1 = new short[SIZE];
277         int[] res2 = new int[SIZE];
278         for (int i = 0; i < SIZE; i++) {
279             res1[i] = (short) (s1[i] + s2[i]);
280             res2[i] = a[i] * intInv;
281         }
282         return res2;
283     }
284 
285     @Test
286     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
287         counts = {IRNode.STORE_VECTOR, ">0",
288                   IRNode.LOAD_VECTOR_S, "> 0",
289                   IRNode.LOAD_VECTOR_I, "> 0"})
290     public int[] multipleOpsWith2DifferentTypesAndComplexExpression() {
291         short[] res1 = new short[SIZE];
292         int[] res2 = new int[SIZE];
293         for (int i = 0; i < SIZE; i++) {
294             res1[i] = (short) (s1[i] + s2[i]);
295             res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa);
296         }
297         return res2;
298     }
299 
300     @Test
301     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse3", "true"},
302         counts = {IRNode.STORE_VECTOR, ">0",
303                   IRNode.LOAD_VECTOR_S, "> 0",
304                   IRNode.LOAD_VECTOR_I, "> 0"})
305     public int[] multipleOpsWith2DifferentTypesAndSharedOp() {
306         int i = 0, sum = 0;
307         int[] res1 = new int[SIZE];
308         short[] res2 = new short[SIZE];
309         while (++i < SIZE) {
310             sum += (res1[i]--);
311             res2[i]++;
312         }
313         return res1;
314     }
315 
316     @Test
317     // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
318     public int[] fillIndexPlusStride() {
319         int[] res = new int[SIZE];
320         for (int i = 0; i < SIZE; i++) {
321             res[i] = i + 1;
322         }
323         return res;
324     }
325 
326     @Test
327     // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
328     public int[] addArrayWithIndex() {
329         int[] res = new int[SIZE];
330         for (int i = 0; i < SIZE; i++) {
331             res[i] = a[i] + i;
332         }
333         return res;
334     }
335 
336     @Test
337     // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
338     public short[] multiplyAddShortIndex() {
339         short[] res = new short[SIZE];
340         for (int i = 0; i < SIZE; i++) {
341             res[i] = (short) (i * i + i);
342         }
343         return res;
344     }
345 
346     @Test
347     // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
348     public int[] multiplyBySumOfIndexAndInvariant() {
349         int[] res = new int[SIZE];
350         for (int i = 0; i < SIZE; i++) {
351             res[i] = a[i] * (i + 10 + intInv);
352         }
353         return res;
354     }
355 
356     @Test
357     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
358         counts = {IRNode.STORE_VECTOR, ">0"})
359     public int[] manuallyUnrolledStride2() {
360         int[] res = new int[SIZE];
361         for (int i = 0; i < SIZE - 1; i += 2) {
362             res[i] = a[i] * b[i];
363             res[i + 1] = a[i + 1] * b[i + 1];
364         }
365         return res;
366     }
367 
368     @Test
369     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
370         counts = {IRNode.STORE_VECTOR, ">0",
371                   IRNode.LOAD_VECTOR_I, "> 0"})
372     public int partialVectorizableLoop() {
373         int[] res = new int[SIZE];
374         int k = 9;
375         for (int i = 0; i < SIZE / 2; i++) {
376             res[i] = a[i] * b[i];
377             k = 3 * k + 1;
378         }
379         return k;
380     }
381 }