1 /* 2 * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 26 /* 27 * @test 28 * @bug 8304720 29 * @summary Test some examples where non-vectorized memops also need to 30 * be reordered during SuperWord::schedule. 31 * @modules java.base/jdk.internal.misc 32 * @library /test/lib / 33 * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops nCOH_nAV 34 * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops nCOH_yAV 35 * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops yCOH_nAV 36 * @run driver compiler.loopopts.superword.TestScheduleReordersScalarMemops yCOH_yAV 37 */ 38 39 package compiler.loopopts.superword; 40 41 import jdk.internal.misc.Unsafe; 42 import jdk.test.lib.Asserts; 43 import compiler.lib.ir_framework.*; 44 45 public class TestScheduleReordersScalarMemops { 46 static final int RANGE = 1024; 47 static final int ITER = 10_000; 48 static Unsafe unsafe = Unsafe.getUnsafe(); 49 50 int[] goldI0 = new int[RANGE]; 51 float[] goldF0 = new float[RANGE]; 52 int[] goldI1 = new int[RANGE]; 53 float[] goldF1 = new float[RANGE]; 54 55 public static void main(String args[]) { 56 TestFramework framework = new TestFramework(TestScheduleReordersScalarMemops.class); 57 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 58 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::test*", 59 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::verify", 60 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestScheduleReordersScalarMemops::init", 61 "-XX:-TieredCompilation", "-Xbatch", 62 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000"); 63 switch (args[0]) { 64 case "nCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } 65 case "nCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } 66 case "yCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } 67 case "yCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); } 68 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 69 }; 70 framework.start(); 71 } 72 73 TestScheduleReordersScalarMemops() { 74 // compute the gold standard in interpreter mode 75 init(goldI0, goldF0); 76 test0(goldI0, goldI0, goldF0, goldF0); 77 init(goldI1, goldF1); 78 test1(goldI1, goldI1, goldF1, goldF1); 79 } 80 81 @Run(test = "test0") 82 @Warmup(100) 83 public void runTest0() { 84 int[] dataI = new int[RANGE]; 85 float[] dataF = new float[RANGE]; 86 init(dataI, dataF); 87 test0(dataI, dataI, dataF, dataF); 88 verify("test0", dataI, goldI0); 89 verify("test0", dataF, goldF0); 90 } 91 92 @Test 93 @IR(counts = {IRNode.MUL_VI, "> 0"}, 94 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 95 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 96 static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 97 for (int i = 0; i < RANGE; i+=2) { 98 // We have dependency edges: 99 // A -> X 100 // Y -> B 101 // Still, we can vectorize [X,Y]. 102 // We do not vectorize A and B, because they are not isomorphic (add vs mul). 103 // 104 // Imagine this is unrolled at least 2x. 105 // We get order: A0 X0 Y0 B0 A1 X1 Y1 B1 106 // Vectorized: X0 Y0 X1 Y1 107 // Scalar: A0 B0 A1 B1 108 // 109 // However, since the As need to be before, and the Bs after the vector operations, 110 // we need to have all As before all Bs. This means we need to reorder the scalar 111 // operations, and not just the vectorized ones. 112 // 113 // A correct reordering would be: A0 A1 [X0, Y0, X1, Y1] B0 B1 114 // 115 dataFa[i + 0] = dataIa[i + 0] * 1.3f; // A *1.3 116 dataIb[i + 0] = (int)dataFb[i + 0] * 11; // X *11 117 dataIb[i + 1] = (int)dataFb[i + 1] * 11; // Y *11 118 dataFa[i + 1] = dataIa[i + 1] + 1.2f; // B +1.2 119 // With AlignVector, we need 8-byte alignment of vector loads/stores. 120 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 121 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never 122 // -> vectorize -> no vectorization 123 } 124 } 125 126 @Run(test = "test1") 127 @Warmup(100) 128 public void runTest1() { 129 int[] dataI = new int[RANGE]; 130 float[] dataF = new float[RANGE]; 131 init(dataI, dataF); 132 test1(dataI, dataI, dataF, dataF); 133 verify("test1", dataI, goldI1); 134 verify("test1", dataF, goldF1); 135 } 136 137 @Test 138 @IR(counts = {IRNode.MUL_VI, "> 0"}, 139 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 140 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 141 static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 142 for (int i = 0; i < RANGE; i+=2) { 143 // Do the same as test0, but without int-float conversion. 144 // This should reproduce on machines where conversion is not implemented. 145 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A +1 146 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X 147 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y 148 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] * 11); // B *11 149 // With AlignVector, we need 8-byte alignment of vector loads/stores. 150 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 151 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never 152 // -> vectorize -> no vectorization 153 } 154 } 155 156 static void init(int[] dataI, float[] dataF) { 157 for (int i = 0; i < RANGE; i++) { 158 dataI[i] = i + 1; 159 dataF[i] = i + 0.1f; 160 } 161 } 162 163 static void verify(String name, int[] data, int[] gold) { 164 for (int i = 0; i < RANGE; i++) { 165 if (data[i] != gold[i]) { 166 throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); 167 } 168 } 169 } 170 171 static void verify(String name, float[] data, float[] gold) { 172 for (int i = 0; i < RANGE; i++) { 173 int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i); 174 int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i); 175 if (datav != goldv) { 176 throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv); 177 } 178 } 179 } 180 }