1 /*
  2  * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 
 26 /*
 27  * @test
 28  * @bug 8304720
 29  * @summary Test some examples where non-vectorized memops also need to
 30  *          be reordered during SuperWord::schedule.
 31  * @modules java.base/jdk.internal.misc
 32  * @library /test/lib /
 33  * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops nCOH_nAV
 34  * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops nCOH_yAV
 35  * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops yCOH_nAV
 36  * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops yCOH_yAV
 37  */
 38 
 39 package compiler.loopopts.superword;
 40 
 41 import jdk.internal.misc.Unsafe;
 42 import jdk.test.lib.Asserts;
 43 import compiler.lib.ir_framework.*;
 44 
 45 public class TestScheduleReordersScalarMemops {
 46     static final int RANGE = 1024;
 47     static final int ITER  = 10_000;
 48     static Unsafe unsafe = Unsafe.getUnsafe();
 49 
 50     int[]   goldI0 = new int[RANGE];
 51     float[] goldF0 = new float[RANGE];
 52     int[]   goldI1 = new int[RANGE];
 53     float[] goldF1 = new float[RANGE];
 54 
 55     public static void main(String args[]) {
 56         TestFramework framework = new TestFramework(TestScheduleReordersScalarMemops.class);
 57         framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 58                            "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::test*",
 59                            "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::verify",
 60                            "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::init",
 61                            "-XX:-TieredCompilation", "-Xbatch",
 62                            "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
 63         switch (args[0]) {
 64             case "nCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
 65             case "nCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
 66             case "yCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
 67             case "yCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); }
 68             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
 69         };
 70         framework.start();
 71     }
 72 
 73     TestScheduleReordersScalarMemops() {
 74         // compute the gold standard in interpreter mode
 75         init(goldI0, goldF0);
 76         test0(goldI0, goldI0, goldF0, goldF0);
 77         init(goldI1, goldF1);
 78         test1(goldI1, goldI1, goldF1, goldF1);
 79     }
 80 
 81     @Run(test = "test0")
 82     @Warmup(100)
 83     public void runTest0() {
 84         int[] dataI = new int[RANGE];
 85         float[] dataF = new float[RANGE];
 86         init(dataI, dataF);
 87         test0(dataI, dataI, dataF, dataF);
 88         verify("test0", dataI, goldI0);
 89         verify("test0", dataF, goldF0);
 90     }
 91 
 92     @Test
 93     @IR(counts = {IRNode.MUL_VI, "> 0"},
 94         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
 95         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
 96     static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
 97         for (int i = 0; i < RANGE; i+=2) {
 98             // We have dependency edges:
 99             //  A -> X
100             //  Y -> B
101             // Still, we can vectorize [X,Y].
102             // We do not vectorize A and B, because they are not isomorphic (add vs mul).
103             //
104             // Imagine this is unrolled at least 2x.
105             // We get order: A0 X0 Y0 B0 A1 X1 Y1 B1
106             // Vectorized:      X0 Y0       X1 Y1
107             // Scalar:       A0       B0 A1       B1
108             //
109             // However, since the As need to be before, and the Bs after the vector operations,
110             // we need to have all As before all Bs. This means we need to reorder the scalar
111             // operations, and not just the vectorized ones.
112             //
113             // A correct reordering would be: A0 A1 [X0, Y0, X1, Y1] B0 B1
114             //
115             dataFa[i + 0] = dataIa[i + 0] * 1.3f;     // A *1.3
116             dataIb[i + 0] = (int)dataFb[i + 0] * 11;  // X *11
117             dataIb[i + 1] = (int)dataFb[i + 1] * 11;  // Y *11
118             dataFa[i + 1] = dataIa[i + 1] + 1.2f;     // B +1.2
119             // With AlignVector, we need 8-byte alignment of vector loads/stores.
120             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
121             // adr = base + 16 + 8*i   ->  always            adr = base + 12 + 8*i   ->  never
122             // -> vectorize                                  -> no vectorization
123         }
124     }
125 
126     @Run(test = "test1")
127     @Warmup(100)
128     public void runTest1() {
129         int[] dataI = new int[RANGE];
130         float[] dataF = new float[RANGE];
131         init(dataI, dataF);
132         test1(dataI, dataI, dataF, dataF);
133         verify("test1", dataI, goldI1);
134         verify("test1", dataF, goldF1);
135     }
136 
137     @Test
138     @IR(counts = {IRNode.MUL_VI, "> 0"},
139         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
140         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
141     static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
142         for (int i = 0; i < RANGE; i+=2) {
143             // Do the same as test0, but without int-float conversion.
144             // This should reproduce on machines where conversion is not implemented.
145             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1);  // A +1
146             dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0);  // X
147             dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4);  // Y
148             unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] * 11); // B *11
149             // With AlignVector, we need 8-byte alignment of vector loads/stores.
150             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
151             // adr = base + 16 + 8*i   ->  always            adr = base + 12 + 8*i   ->  never
152             // -> vectorize                                  -> no vectorization
153         }
154     }
155 
156     static void init(int[] dataI, float[] dataF) {
157         for (int i = 0; i < RANGE; i++) {
158             dataI[i] = i + 1;
159             dataF[i] = i + 0.1f;
160         }
161     }
162 
163     static void verify(String name, int[] data, int[] gold) {
164         for (int i = 0; i < RANGE; i++) {
165             if (data[i] != gold[i]) {
166                 throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
167             }
168         }
169     }
170 
171     static void verify(String name, float[] data, float[] gold) {
172         for (int i = 0; i < RANGE; i++) {
173             int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
174             int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
175             if (datav != goldv) {
176                 throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
177             }
178         }
179     }
180 }