1 /* 2 * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 /* 26 * @test 27 * @bug 8304042 28 * @summary Test some examples with independent packs with cyclic dependency 29 * between the packs. 30 * @modules java.base/jdk.internal.misc 31 * @library /test/lib / 32 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_nAV 33 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_yAV 34 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_nAV 35 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_yAV 36 */ 37 38 package compiler.loopopts.superword; 39 40 import jdk.internal.misc.Unsafe; 41 import jdk.test.lib.Asserts; 42 import compiler.lib.ir_framework.*; 43 44 public class TestIndependentPacksWithCyclicDependency { 45 static final int RANGE = 1024; 46 static final int ITER = 10_000; 47 static Unsafe unsafe = Unsafe.getUnsafe(); 48 49 int[] goldI0 = new int[RANGE]; 50 float[] goldF0 = new float[RANGE]; 51 int[] goldI1 = new int[RANGE]; 52 float[] goldF1 = new float[RANGE]; 53 int[] goldI2 = new int[RANGE]; 54 float[] goldF2 = new float[RANGE]; 55 int[] goldI3 = new int[RANGE]; 56 float[] goldF3 = new float[RANGE]; 57 int[] goldI4 = new int[RANGE]; 58 float[] goldF4 = new float[RANGE]; 59 int[] goldI5 = new int[RANGE]; 60 float[] goldF5 = new float[RANGE]; 61 int[] goldI6 = new int[RANGE]; 62 float[] goldF6 = new float[RANGE]; 63 long[] goldL6 = new long[RANGE]; 64 int[] goldI7 = new int[RANGE]; 65 float[] goldF7 = new float[RANGE]; 66 long[] goldL7 = new long[RANGE]; 67 int[] goldI8 = new int[RANGE]; 68 float[] goldF8 = new float[RANGE]; 69 long[] goldL8 = new long[RANGE]; 70 int[] goldI9 = new int[RANGE]; 71 float[] goldF9 = new float[RANGE]; 72 long[] goldL9 = new long[RANGE]; 73 int[] goldI10 = new int[RANGE]; 74 float[] goldF10 = new float[RANGE]; 75 long[] goldL10 = new long[RANGE]; 76 77 public static void main(String args[]) { 78 TestFramework framework = new TestFramework(TestIndependentPacksWithCyclicDependency.class); 79 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 80 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*", 81 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify", 82 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init", 83 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000"); 84 switch (args[0]) { 85 case "nCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } 86 case "nCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } 87 case "yCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } 88 case "yCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); } 89 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 90 }; 91 framework.start(); 92 } 93 94 TestIndependentPacksWithCyclicDependency() { 95 // compute the gold standard in interpreter mode 96 init(goldI0, goldF0); 97 test0(goldI0, goldI0, goldF0, goldF0); 98 init(goldI1, goldF1); 99 test1(goldI1, goldI1, goldF1, goldF1); 100 init(goldI2, goldF2); 101 test2(goldI2, goldI2, goldF2, goldF2); 102 init(goldI3, goldF3); 103 test3(goldI3, goldI3, goldF3, goldF3); 104 init(goldI4, goldF4); 105 test4(goldI4, goldI4, goldF4, goldF4); 106 init(goldI5, goldF5); 107 test5(goldI5, goldI5, goldF5, goldF5); 108 init(goldI6, goldF6, goldL6); 109 test6(goldI6, goldI6, goldF6, goldF6, goldL6, goldL6); 110 init(goldI7, goldF7, goldL7); 111 test7(goldI7, goldI7, goldF7, goldF7, goldL7, goldL7); 112 init(goldI8, goldF8, goldL8); 113 test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8); 114 init(goldI9, goldF9, goldL9); 115 test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9); 116 init(goldI10, goldF10, goldL10); 117 test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10); 118 } 119 120 @Run(test = "test0") 121 @Warmup(100) 122 public void runTest0() { 123 int[] dataI = new int[RANGE]; 124 float[] dataF = new float[RANGE]; 125 init(dataI, dataF); 126 test0(dataI, dataI, dataF, dataF); 127 verify("test0", dataI, goldI0); 128 verify("test0", dataF, goldF0); 129 } 130 131 @Test 132 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"}, 133 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 134 applyIfPlatform = {"64-bit", "true"}, 135 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 136 static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 137 for (int i = 0; i < RANGE; i+=2) { 138 // Hand-unrolled 2x. Int and Float slice are completely separate. 139 dataIb[i+0] = dataIa[i+0] + 3; 140 dataIb[i+1] = dataIa[i+1] + 3; 141 dataFb[i+0] = dataFa[i+0] * 1.3f; 142 dataFb[i+1] = dataFa[i+1] * 1.3f; 143 // With AlignVector, we need 8-byte alignment of vector loads/stores. 144 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 145 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never 146 // -> vectorize -> no vectorization 147 } 148 } 149 150 @Run(test = "test1") 151 @Warmup(100) 152 public void runTest1() { 153 int[] dataI = new int[RANGE]; 154 float[] dataF = new float[RANGE]; 155 init(dataI, dataF); 156 test1(dataI, dataI, dataF, dataF); 157 verify("test1", dataI, goldI1); 158 verify("test1", dataF, goldF1); 159 } 160 161 @Test 162 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"}, 163 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 164 applyIfPlatform = {"64-bit", "true"}, 165 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 166 static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 167 for (int i = 0; i < RANGE; i+=2) { 168 // Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency. 169 dataFa[i+0] = dataIa[i+0] + 3; 170 dataFa[i+1] = dataIa[i+1] + 3; 171 dataIb[i+0] = (int)(dataFb[i+0] * 1.3f); 172 dataIb[i+1] = (int)(dataFb[i+1] * 1.3f); 173 // With AlignVector, we need 8-byte alignment of vector loads/stores. 174 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 175 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never 176 // -> vectorize -> no vectorization 177 } 178 } 179 180 @Run(test = "test2") 181 public void runTest2() { 182 int[] dataI = new int[RANGE]; 183 float[] dataF = new float[RANGE]; 184 init(dataI, dataF); 185 test2(dataI, dataI, dataF, dataF); 186 verify("test2", dataI, goldI2); 187 verify("test2", dataF, goldF2); 188 } 189 190 @Test 191 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"}, 192 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 193 applyIfPlatform = {"64-bit", "true"}, 194 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 195 static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 196 for (int i = 0; i < RANGE; i+=2) { 197 // int and float arrays are two slices. But we pretend both are of type int. 198 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); 199 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); 200 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); 201 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); 202 // With AlignVector, we need 8-byte alignment of vector loads/stores. 203 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 204 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never 205 // -> vectorize -> no vectorization 206 } 207 } 208 209 @Run(test = "test3") 210 @Warmup(100) 211 public void runTest3() { 212 int[] dataI = new int[RANGE]; 213 float[] dataF = new float[RANGE]; 214 init(dataI, dataF); 215 test3(dataI, dataI, dataF, dataF); 216 verify("test3", dataI, goldI3); 217 verify("test3", dataF, goldF3); 218 } 219 220 @Test 221 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"}, 222 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 223 applyIfPlatform = {"64-bit", "true"}, 224 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 225 static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 226 for (int i = 0; i < RANGE; i+=2) { 227 // Inversion of orders. But because we operate on separate slices, this should 228 // safely vectorize. It should detect that each line is independent, so it can 229 // reorder them. 230 dataIb[i+0] = dataIa[i+0] + 3; 231 dataFb[i+1] = dataFa[i+1] * 1.3f; 232 dataFb[i+0] = dataFa[i+0] * 1.3f; 233 dataIb[i+1] = dataIa[i+1] + 3; 234 // With AlignVector, we need 8-byte alignment of vector loads/stores. 235 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 236 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never 237 // -> vectorize -> no vectorization 238 } 239 } 240 241 @Run(test = "test4") 242 @Warmup(100) 243 public void runTest4() { 244 int[] dataI = new int[RANGE]; 245 float[] dataF = new float[RANGE]; 246 init(dataI, dataF); 247 test4(dataI, dataI, dataF, dataF); 248 verify("test4", dataI, goldI4); 249 verify("test4", dataF, goldF4); 250 } 251 252 @Test 253 static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 254 for (int i = 0; i < RANGE; i+=2) { 255 // same as test1, except that reordering leads to different semantics 256 // [A,B] and [X,Y] are both packs that are internally independent 257 // But we have dependencies A -> X (StoreF -> LoadF) 258 // and Y -> B (StoreI -> LoadI) 259 // Hence the two packs have a cyclic dependency, we cannot schedule 260 // one before the other. 261 dataFa[i+0] = dataIa[i+0] + 3; // A 262 dataIb[i+0] = (int)(dataFb[i+0] * 1.3f); // X 263 dataIb[i+1] = (int)(dataFb[i+1] * 1.3f); // Y 264 dataFa[i+1] = dataIa[i+1] + 3; // B 265 } 266 } 267 268 @Run(test = "test5") 269 public void runTest5() { 270 int[] dataI = new int[RANGE]; 271 float[] dataF = new float[RANGE]; 272 init(dataI, dataF); 273 test5(dataI, dataI, dataF, dataF); 274 verify("test5", dataI, goldI5); 275 verify("test5", dataF, goldF5); 276 } 277 278 @Test 279 static void test5(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 280 for (int i = 0; i < RANGE; i+=2) { 281 // same as test2, except that reordering leads to different semantics 282 // explanation analogue to test4 283 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A 284 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X 285 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y 286 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B 287 } 288 } 289 290 @Run(test = "test6") 291 public void runTest6() { 292 int[] dataI = new int[RANGE]; 293 float[] dataF = new float[RANGE]; 294 long[] dataL = new long[RANGE]; 295 init(dataI, dataF, dataL); 296 test6(dataI, dataI, dataF, dataF, dataL, dataL); 297 verify("test6", dataI, goldI6); 298 verify("test6", dataF, goldF6); 299 verify("test6", dataL, goldL6); 300 } 301 302 @Test 303 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"}, 304 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 305 applyIfPlatform = {"64-bit", "true"}, 306 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 307 static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 308 long[] dataLa, long[] dataLb) { 309 for (int i = 0; i < RANGE; i+=2) { 310 // Chain of parallelizable op and conversion 311 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; 312 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; 313 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 314 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 315 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; 316 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45; 317 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 318 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 319 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; 320 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; 321 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 322 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 323 // With AlignVector, we need 8-byte alignment of vector loads/stores. 324 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 325 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never 326 // -> vectorize -> no vectorization 327 } 328 } 329 330 @Run(test = "test7") 331 public void runTest7() { 332 int[] dataI = new int[RANGE]; 333 float[] dataF = new float[RANGE]; 334 long[] dataL = new long[RANGE]; 335 init(dataI, dataF, dataL); 336 test7(dataI, dataI, dataF, dataF, dataL, dataL); 337 verify("test7", dataI, goldI7); 338 verify("test7", dataF, goldF7); 339 verify("test7", dataL, goldL7); 340 } 341 342 @Test 343 static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 344 long[] dataLa, long[] dataLb) { 345 for (int i = 0; i < RANGE; i+=2) { 346 // Cycle involving 3 memory slices 347 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; 348 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 349 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; 350 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45; 351 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 352 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 353 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; 354 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; 355 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 356 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 357 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down 358 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 359 } 360 } 361 362 363 @Run(test = "test8") 364 public void runTest8() { 365 int[] dataI = new int[RANGE]; 366 float[] dataF = new float[RANGE]; 367 long[] dataL = new long[RANGE]; 368 init(dataI, dataF, dataL); 369 test8(dataI, dataI, dataF, dataF, dataL, dataL); 370 verify("test8", dataI, goldI8); 371 verify("test8", dataF, goldF8); 372 verify("test8", dataL, goldL8); 373 } 374 375 @Test 376 static void test8(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 377 long[] dataLa, long[] dataLb) { 378 for (int i = 0; i < RANGE; i+=2) { 379 // 2-cycle, with more ops after 380 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; 381 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 382 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; 383 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45; 384 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 385 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 386 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; 387 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 388 // more stuff after 389 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; 390 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; 391 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 392 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 393 } 394 } 395 396 @Run(test = "test9") 397 public void runTest9() { 398 int[] dataI = new int[RANGE]; 399 float[] dataF = new float[RANGE]; 400 long[] dataL = new long[RANGE]; 401 init(dataI, dataF, dataL); 402 test9(dataI, dataI, dataF, dataF, dataL, dataL); 403 verify("test9", dataI, goldI9); 404 verify("test9", dataF, goldF9); 405 verify("test9", dataL, goldL9); 406 } 407 408 @Test 409 static void test9(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 410 long[] dataLa, long[] dataLb) { 411 for (int i = 0; i < RANGE; i+=2) { 412 // 2-cycle, with more stuff before 413 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; 414 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; 415 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 416 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 417 // 2-cycle 418 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; 419 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 420 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; 421 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45; 422 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 423 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 424 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; 425 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 426 } 427 } 428 429 @Run(test = "test10") 430 public void runTest10() { 431 int[] dataI = new int[RANGE]; 432 float[] dataF = new float[RANGE]; 433 long[] dataL = new long[RANGE]; 434 init(dataI, dataF, dataL); 435 test10(dataI, dataI, dataF, dataF, dataL, dataL); 436 verify("test10", dataI, goldI10); 437 verify("test10", dataF, goldF10); 438 verify("test10", dataL, goldL10); 439 } 440 441 @Test 442 static void test10(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 443 long[] dataLa, long[] dataLb) { 444 for (int i = 0; i < RANGE; i+=2) { 445 // This creates the following graph before SuperWord: 446 // 447 // A -> R -> U 448 // S -> V -> B 449 // 450 // SuperWord analyzes the graph, and sees that [A,B] and [U,V] 451 // are adjacent, isomorphic and independent packs. However, 452 // [R,S] are not isomorphic (R mul, S add). 453 // So it vectorizes [A,B] and [U,V] this gives us this graph: 454 // 455 // -> R 456 // [A,B] -> [U,V] -+ 457 // ^ -> S | 458 // | | 459 // +------------------+ 460 // 461 // The cycle thus does not only go via packs, but also scalar ops. 462 // 463 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; // A 464 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 465 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; // R: constant mismatch 466 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43; // S 467 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 468 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 469 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; // U 470 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; // V 471 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 472 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 473 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // B: moved down 474 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 475 } 476 } 477 478 static void init(int[] dataI, float[] dataF) { 479 for (int i = 0; i < RANGE; i++) { 480 dataI[i] = i + 1; 481 dataF[i] = i + 0.1f; 482 } 483 } 484 485 static void init(int[] dataI, float[] dataF, long[] dataL) { 486 for (int i = 0; i < RANGE; i++) { 487 dataI[i] = i + 1; 488 dataF[i] = i + 0.1f; 489 dataL[i] = i + 1; 490 } 491 } 492 493 static void verify(String name, int[] data, int[] gold) { 494 for (int i = 0; i < RANGE; i++) { 495 if (data[i] != gold[i]) { 496 throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]); 497 } 498 } 499 } 500 501 static void verify(String name, float[] data, float[] gold) { 502 for (int i = 0; i < RANGE; i++) { 503 int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i); 504 int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i); 505 if (datav != goldv) { 506 throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv); 507 } 508 } 509 } 510 511 static void verify(String name, long[] data, long[] gold) { 512 for (int i = 0; i < RANGE; i++) { 513 if (data[i] != gold[i]) { 514 throw new RuntimeException(" Invalid " + name + " result: dataL[" + i + "]: " + data[i] + " != " + gold[i]); 515 } 516 } 517 } 518 } 519