1 /*
  2  * Copyright (c) 2022, 2023, Arm Limited. All rights reserved.
  3  * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  */
 24 
 25 /*
 26  * @test
 27  * @summary Vectorization test on combined operations
 28  * @library /test/lib /
 29  *
 30  * @build jdk.test.whitebox.WhiteBox
 31  *        compiler.vectorization.runner.VectorizationTestRunner
 32  *
 33  * @requires vm.compiler2.enabled
 34  *
 35  * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
 36  *
 37  * @run main/othervm -Xbootclasspath/a:.
 38  *                   -XX:+UnlockDiagnosticVMOptions
 39  *                   -XX:+WhiteBoxAPI
 40  *                   compiler.vectorization.runner.LoopCombinedOpTest nCOH_nAV
 41  *
 42  * @run main/othervm -Xbootclasspath/a:.
 43  *                   -XX:+UnlockDiagnosticVMOptions
 44  *                   -XX:+WhiteBoxAPI
 45  *                   compiler.vectorization.runner.LoopCombinedOpTest nCOH_yAV
 46  *
 47  * @run main/othervm -Xbootclasspath/a:.
 48  *                   -XX:+UnlockDiagnosticVMOptions
 49  *                   -XX:+WhiteBoxAPI
 50  *                   compiler.vectorization.runner.LoopCombinedOpTest yCOH_nAV
 51  *
 52  * @run main/othervm -Xbootclasspath/a:.
 53  *                   -XX:+UnlockDiagnosticVMOptions
 54  *                   -XX:+WhiteBoxAPI
 55  *                   compiler.vectorization.runner.LoopCombinedOpTest yCOH_yAV
 56  */
 57 
 58 package compiler.vectorization.runner;
 59 
 60 import compiler.lib.ir_framework.*;
 61 
 62 import java.util.Random;
 63 
 64 public class LoopCombinedOpTest extends VectorizationTestRunner {
 65 
 66     // We must pass the flags directly to the test-VM, and not the driver vm in the @run above.
 67     @Override
 68     protected String[] testVMFlags(String[] args) {
 69         return switch (args[0]) {
 70             case "nCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"};
 71             case "nCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"};
 72             case "yCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"};
 73             case "yCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"};
 74             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
 75         };
 76     }
 77 
 78     private static final int SIZE = 543;
 79 
 80     private int[] a;
 81     private int[] b;
 82     private int[] c;
 83     private int[] d;
 84     private long[] l1;
 85     private long[] l2;
 86     private short[] s1;
 87     private short[] s2;
 88     private int intInv;
 89 
 90     public LoopCombinedOpTest() {
 91         a = new int[SIZE];
 92         b = new int[SIZE];
 93         c = new int[SIZE];
 94         d = new int[SIZE];
 95         l1 = new long[SIZE];
 96         l2 = new long[SIZE];
 97         s1 = new short[SIZE];
 98         s2 = new short[SIZE];
 99         for (int i = 0; i < SIZE; i++) {
100             a[i] = -654321 * i;
101             b[i] =  123456 * i;
102             c[i] = -998877 * i;
103             d[i] =  778899 * i;
104             l1[i] = 5000000000L * i;
105             l2[i] = -600000000L * i;
106             s1[i] = (short) (3 * i);
107             s2[i] = (short) (-2 * i);
108         }
109         Random ran = new Random(999);
110         intInv = ran.nextInt();
111     }
112 
113     @Test
114     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
115         counts = {IRNode.STORE_VECTOR, ">0",
116                   IRNode.LOAD_VECTOR_I, "> 0"})
117     public int[] opWithConstant() {
118         int[] res = new int[SIZE];
119         for (int i = 0; i < SIZE; i++) {
120             res[i] = a[i] + 1234567890;
121         }
122         return res;
123     }
124 
125     @Test
126     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
127         counts = {IRNode.STORE_VECTOR, ">0",
128                   IRNode.LOAD_VECTOR_I, "> 0"})
129     public int[] opWithLoopInvariant() {
130         int[] res = new int[SIZE];
131         for (int i = 0; i < SIZE; i++) {
132             res[i] = b[i] * intInv;
133         }
134         return res;
135     }
136 
137     @Test
138     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
139         counts = {IRNode.STORE_VECTOR, ">0",
140                   IRNode.LOAD_VECTOR_I, "> 0"})
141     public int[] opWithConstantAndLoopInvariant() {
142         int[] res = new int[SIZE];
143         for (int i = 0; i < SIZE; i++) {
144             res[i] = c[i] * (intInv & 0xfff);
145         }
146         return res;
147     }
148 
149     @Test
150     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
151         counts = {IRNode.STORE_VECTOR, ">0",
152                   IRNode.LOAD_VECTOR_I, "> 0"})
153     public int[] multipleOps() {
154         int[] res = new int[SIZE];
155         for (int i = 0; i < SIZE; i++) {
156             res[i] = a[i] & b[i] + c[i] & d[i];
157         }
158         return res;
159     }
160 
161     @Test
162     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
163         counts = {IRNode.STORE_VECTOR, ">0",
164                   IRNode.LOAD_VECTOR_I, "> 0"})
165     public int[] multipleOpsWithMultipleConstants() {
166         int[] res = new int[SIZE];
167         for (int i = 0; i < SIZE; i++) {
168             res[i] = a[i] * 12345678 + 87654321 + b[i] & 0xffff - c[i] * d[i] * 2;
169         }
170         return res;
171     }
172 
173     @Test
174     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
175         counts = {IRNode.STORE_VECTOR, ">0",
176                   IRNode.LOAD_VECTOR_I, "> 0"})
177     // With sse2, the MulI does not vectorize. This means we have vectorized stores
178     // to res1, but scalar loads from res1. The store-to-load-forwarding failure
179     // detection catches this and rejects vectorization.
180     public int[] multipleStores() {
181         int[] res1 = new int[SIZE];
182         int[] res2 = new int[SIZE];
183         int[] res3 = new int[SIZE];
184         for (int i = 0; i < SIZE; i++) {
185             res1[i] = a[i] & b[i];
186             res2[i] = c[i] | d[i];
187             res3[i] = res1[i] * res2[i];
188         }
189         return res3;
190     }
191 
192     @Test
193     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
194         counts = {IRNode.STORE_VECTOR, ">0",
195                   IRNode.LOAD_VECTOR_I, "> 0"})
196     public int[] multipleStoresWithCommonSubExpression() {
197         int[] res1 = new int[SIZE];
198         int[] res2 = new int[SIZE];
199         int[] res3 = new int[SIZE];
200         for (int i = 0; i < SIZE; i++) {
201             res1[i] = a[i] * b[i];
202             res2[i] = c[i] * d[i];
203             res3[i] = res1[i] + res2[i];
204         }
205         return res3;
206     }
207 
208     @Test
209     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
210         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
211         counts = {IRNode.STORE_VECTOR, ">0",
212                   IRNode.LOAD_VECTOR_S, "> 0",
213                   IRNode.LOAD_VECTOR_I, "> 0"})
214     public int[] multipleOpsWith2DifferentTypes() {
215         short[] res1 = new short[SIZE];
216         int[] res2 = new int[SIZE];
217         for (int i = 0; i < SIZE; i++) {
218             res1[i] = (short) (s1[i] + s2[i]);
219             res2[i] = a[i] + b[i];
220             // We have a mix of int and short loads/stores.
221             // With UseCompactObjectHeaders and AlignVector,
222             // we must 8-byte align all vector loads/stores.
223             //
224             // int:
225             // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
226             //              = 16 (or 12 if UseCompactObjectHeaders=true)
227             // If UseCompactObjectHeaders=false: iter % 2 = 0
228             // If UseCompactObjectHeaders=true:  iter % 2 = 1
229             //
230             // byte:
231             // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
232             //              = 16 (or 12 if UseCompactObjectHeaders=true)
233             // If UseCompactObjectHeaders=false: iter % 8 = 0
234             // If UseCompactObjectHeaders=true:  iter % 8 = 4
235             //
236             // -> we cannot align both if UseCompactObjectHeaders=true.
237         }
238         return res2;
239     }
240 
241     @Test
242     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
243         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
244         counts = {IRNode.STORE_VECTOR, ">0",
245                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_ANY, "> 0",
246                   IRNode.LOAD_VECTOR_L,                         "> 0"})
247     public long[] multipleOpsWith3DifferentTypes() {
248         short[] res1 = new short[SIZE];
249         int[] res2 = new int[SIZE];
250         long[] res3 = new long[SIZE];
251         for (int i = 0; i < SIZE; i++) {
252             res1[i] = (short) (s1[i] + s2[i]);
253             res2[i] = a[i] + b[i];
254             res3[i] = l1[i] + l2[i];
255             // We have a mix of int and short loads/stores.
256             // With UseCompactObjectHeaders and AlignVector,
257             // we must 8-byte align all vector loads/stores.
258             //
259             // int:
260             // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
261             //              = 16 (or 12 if UseCompactObjectHeaders=true)
262             // If UseCompactObjectHeaders=false: iter % 2 = 0
263             // If UseCompactObjectHeaders=true:  iter % 2 = 1
264             //
265             // byte:
266             // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
267             //              = 16 (or 12 if UseCompactObjectHeaders=true)
268             // If UseCompactObjectHeaders=false: iter % 8 = 0
269             // If UseCompactObjectHeaders=true:  iter % 8 = 4
270             //
271             // -> we cannot align both if UseCompactObjectHeaders=true.
272         }
273         return res3;
274     }
275 
276     @Test
277     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
278         counts = {IRNode.STORE_VECTOR, ">0",
279                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_ANY, "> 0",
280                   IRNode.LOAD_VECTOR_L,                         "> 0"})
281     public long[] multipleOpsWith2NonAdjacentTypes() {
282         short[] res1 = new short[SIZE];
283         long[] res2 = new long[SIZE];
284         for (int i = 0; i < SIZE; i++) {
285             res1[i] = (short) (s1[i] + s2[i]);
286             res2[i] = l1[i] + l2[i];
287         }
288         return res2;
289     }
290 
291     @Test
292     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
293         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
294         counts = {IRNode.STORE_VECTOR, ">0",
295                   IRNode.LOAD_VECTOR_S, "> 0",
296                   IRNode.LOAD_VECTOR_I, "> 0"})
297     public int[] multipleOpsWith2DifferentTypesAndConstant() {
298         short[] res1 = new short[SIZE];
299         int[] res2 = new int[SIZE];
300         for (int i = 0; i < SIZE; i++) {
301             res1[i] = (short) (s1[i] + s2[i]);
302             res2[i] = a[i] + 88888888;;
303             // We have a mix of int and short loads/stores.
304             // With UseCompactObjectHeaders and AlignVector,
305             // we must 8-byte align all vector loads/stores.
306             //
307             // int:
308             // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
309             //              = 16 (or 12 if UseCompactObjectHeaders=true)
310             // If UseCompactObjectHeaders=false: iter % 2 = 0
311             // If UseCompactObjectHeaders=true:  iter % 2 = 1
312             //
313             // byte:
314             // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
315             //              = 16 (or 12 if UseCompactObjectHeaders=true)
316             // If UseCompactObjectHeaders=false: iter % 8 = 0
317             // If UseCompactObjectHeaders=true:  iter % 8 = 4
318             //
319             // -> we cannot align both if UseCompactObjectHeaders=true.
320         }
321         return res2;
322     }
323 
324     @Test
325     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
326         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
327         counts = {IRNode.STORE_VECTOR, ">0",
328                   IRNode.LOAD_VECTOR_S, "> 0",
329                   IRNode.LOAD_VECTOR_I, "> 0"})
330     public int[] multipleOpsWith2DifferentTypesAndInvariant() {
331         short[] res1 = new short[SIZE];
332         int[] res2 = new int[SIZE];
333         for (int i = 0; i < SIZE; i++) {
334             res1[i] = (short) (s1[i] + s2[i]);
335             res2[i] = a[i] * intInv;
336             // We have a mix of int and short loads/stores.
337             // With UseCompactObjectHeaders and AlignVector,
338             // we must 8-byte align all vector loads/stores.
339             //
340             // int:
341             // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
342             //              = 16 (or 12 if UseCompactObjectHeaders=true)
343             // If UseCompactObjectHeaders=false: iter % 2 = 0
344             // If UseCompactObjectHeaders=true:  iter % 2 = 1
345             //
346             // byte:
347             // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
348             //              = 16 (or 12 if UseCompactObjectHeaders=true)
349             // If UseCompactObjectHeaders=false: iter % 8 = 0
350             // If UseCompactObjectHeaders=true:  iter % 8 = 4
351             //
352             // -> we cannot align both if UseCompactObjectHeaders=true.
353         }
354         return res2;
355     }
356 
357     @Test
358     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
359         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
360         counts = {IRNode.STORE_VECTOR, ">0",
361                   IRNode.LOAD_VECTOR_S, "> 0",
362                   IRNode.LOAD_VECTOR_I, "> 0"})
363     public int[] multipleOpsWith2DifferentTypesAndComplexExpression() {
364         short[] res1 = new short[SIZE];
365         int[] res2 = new int[SIZE];
366         for (int i = 0; i < SIZE; i++) {
367             res1[i] = (short) (s1[i] + s2[i]);
368             res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa);
369             // same argument as in multipleOpsWith2DifferentTypesAndInvariant.
370         }
371         return res2;
372     }
373 
374     @Test
375     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse3", "true"},
376         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
377         counts = {IRNode.STORE_VECTOR, ">0",
378                   IRNode.LOAD_VECTOR_S, "> 0",
379                   IRNode.LOAD_VECTOR_I, "> 0"})
380     public int[] multipleOpsWith2DifferentTypesAndSharedOp() {
381         int i = 0, sum = 0;
382         int[] res1 = new int[SIZE];
383         short[] res2 = new short[SIZE];
384         while (++i < SIZE) {
385             sum += (res1[i]--);
386             res2[i]++;
387             // We have a mix of int and short loads/stores.
388             // With UseCompactObjectHeaders and AlignVector,
389             // we must 8-byte align all vector loads/stores.
390             //
391             // int:
392             // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
393             //              = 16 (or 12 if UseCompactObjectHeaders=true)
394             // If UseCompactObjectHeaders=false: iter % 2 = 0
395             // If UseCompactObjectHeaders=true:  iter % 2 = 1
396             //
397             // byte:
398             // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
399             //              = 16 (or 12 if UseCompactObjectHeaders=true)
400             // If UseCompactObjectHeaders=false: iter % 8 = 0
401             // If UseCompactObjectHeaders=true:  iter % 8 = 4
402             //
403             // -> we cannot align both if UseCompactObjectHeaders=true.
404         }
405         return res1;
406     }
407 
408     @Test
409     // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
410     public int[] fillIndexPlusStride() {
411         int[] res = new int[SIZE];
412         for (int i = 0; i < SIZE; i++) {
413             res[i] = i + 1;
414         }
415         return res;
416     }
417 
418     @Test
419     // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
420     public int[] addArrayWithIndex() {
421         int[] res = new int[SIZE];
422         for (int i = 0; i < SIZE; i++) {
423             res[i] = a[i] + i;
424         }
425         return res;
426     }
427 
428     @Test
429     // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
430     public short[] multiplyAddShortIndex() {
431         short[] res = new short[SIZE];
432         for (int i = 0; i < SIZE; i++) {
433             res[i] = (short) (i * i + i);
434         }
435         return res;
436     }
437 
438     @Test
439     // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878.
440     public int[] multiplyBySumOfIndexAndInvariant() {
441         int[] res = new int[SIZE];
442         for (int i = 0; i < SIZE; i++) {
443             res[i] = a[i] * (i + 10 + intInv);
444         }
445         return res;
446     }
447 
448     @Test
449     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
450         applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"},
451         counts = {IRNode.STORE_VECTOR, ">0"})
452     public int[] manuallyUnrolledStride2() {
453         int[] res = new int[SIZE];
454         for (int i = 0; i < SIZE - 1; i += 2) {
455             res[i] = a[i] * b[i];
456             res[i + 1] = a[i + 1] * b[i + 1];
457             // Hand-unrolling can mess with alignment!
458             //
459             // With UseCompactObjectHeaders and AlignVector,
460             // we must 8-byte align all vector loads/stores.
461             //
462             // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter
463             //              = 16 (or 12 if UseCompactObjectHeaders=true)
464             // If UseCompactObjectHeaders=false: 16 divisible by 8 -> vectorize
465             // If UseCompactObjectHeaders=true:  12 not divisibly by 8 -> not vectorize
466         }
467         return res;
468     }
469 
470     @Test
471     @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"},
472         counts = {IRNode.STORE_VECTOR, ">0",
473                   IRNode.LOAD_VECTOR_I, "> 0"})
474     public int partialVectorizableLoop() {
475         int[] res = new int[SIZE];
476         int k = 9;
477         for (int i = 0; i < SIZE / 2; i++) {
478             res[i] = a[i] * b[i];
479             k = 3 * k + 1;
480         }
481         return k;
482     }
483 }