1 /* 2 * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 /* 26 * @test 27 * @bug 8304042 28 * @summary Test some examples with independent packs with cyclic dependency 29 * between the packs. 30 * @modules java.base/jdk.internal.misc 31 * @library /test/lib / 32 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_nAV 33 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency nCOH_yAV 34 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_nAV 35 * @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency yCOH_yAV 36 */ 37 38 package compiler.loopopts.superword; 39 40 import jdk.internal.misc.Unsafe; 41 import jdk.test.lib.Asserts; 42 import compiler.lib.ir_framework.*; 43 44 public class TestIndependentPacksWithCyclicDependency { 45 static final int RANGE = 1024; 46 static final int ITER = 10_000; 47 static Unsafe unsafe = Unsafe.getUnsafe(); 48 49 int[] goldI0 = new int[RANGE]; 50 float[] goldF0 = new float[RANGE]; 51 int[] goldI1 = new int[RANGE]; 52 float[] goldF1 = new float[RANGE]; 53 int[] goldI2 = new int[RANGE]; 54 float[] goldF2 = new float[RANGE]; 55 int[] goldI3 = new int[RANGE]; 56 float[] goldF3 = new float[RANGE]; 57 int[] goldI4 = new int[RANGE]; 58 float[] goldF4 = new float[RANGE]; 59 int[] goldI5 = new int[RANGE]; 60 float[] goldF5 = new float[RANGE]; 61 int[] goldI6 = new int[RANGE]; 62 float[] goldF6 = new float[RANGE]; 63 long[] goldL6 = new long[RANGE]; 64 int[] goldI7 = new int[RANGE]; 65 float[] goldF7 = new float[RANGE]; 66 long[] goldL7 = new long[RANGE]; 67 int[] goldI8 = new int[RANGE]; 68 float[] goldF8 = new float[RANGE]; 69 long[] goldL8 = new long[RANGE]; 70 int[] goldI9 = new int[RANGE]; 71 float[] goldF9 = new float[RANGE]; 72 long[] goldL9 = new long[RANGE]; 73 int[] goldI10 = new int[RANGE]; 74 float[] goldF10 = new float[RANGE]; 75 long[] goldL10 = new long[RANGE]; 76 77 public static void main(String args[]) { 78 TestFramework framework = new TestFramework(TestIndependentPacksWithCyclicDependency.class); 79 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 80 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*", 81 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify", 82 "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init", 83 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000"); 84 switch (args[0]) { 85 case "nCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } 86 case "nCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } 87 case "yCOH_nAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } 88 case "yCOH_yAV" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"); } 89 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 90 }; 91 framework.start(); 92 } 93 94 TestIndependentPacksWithCyclicDependency() { 95 // compute the gold standard in interpreter mode 96 init(goldI0, goldF0); 97 test0(goldI0, goldI0, goldF0, goldF0); 98 init(goldI1, goldF1); 99 test1(goldI1, goldI1, goldF1, goldF1); 100 init(goldI2, goldF2); 101 test2(goldI2, goldI2, goldF2, goldF2); 102 init(goldI3, goldF3); 103 test3(goldI3, goldI3, goldF3, goldF3); 104 init(goldI4, goldF4); 105 test4(goldI4, goldI4, goldF4, goldF4); 106 init(goldI5, goldF5); 107 test5(goldI5, goldI5, goldF5, goldF5); 108 init(goldI6, goldF6, goldL6); 109 test6(goldI6, goldI6, goldF6, goldF6, goldL6, goldL6); 110 init(goldI7, goldF7, goldL7); 111 test7(goldI7, goldI7, goldF7, goldF7, goldL7, goldL7); 112 init(goldI8, goldF8, goldL8); 113 test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8); 114 init(goldI9, goldF9, goldL9); 115 test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9); 116 init(goldI10, goldF10, goldL10); 117 test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10); 118 } 119 120 @Run(test = "test0") 121 @Warmup(100) 122 public void runTest0() { 123 int[] dataI = new int[RANGE]; 124 float[] dataF = new float[RANGE]; 125 init(dataI, dataF); 126 test0(dataI, dataI, dataF, dataF); 127 verify("test0", dataI, goldI0); 128 verify("test0", dataF, goldF0); 129 } 130 131 @Test 132 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"}, 133 applyIfPlatform = {"64-bit", "true"}, 134 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 135 static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 136 for (int i = 0; i < RANGE; i+=2) { 137 // Hand-unrolled 2x. Int and Float slice are completely separate. 138 dataIb[i+0] = dataIa[i+0] + 3; 139 dataIb[i+1] = dataIa[i+1] + 3; 140 dataFb[i+0] = dataFa[i+0] * 1.3f; 141 dataFb[i+1] = dataFa[i+1] * 1.3f; 142 } 143 } 144 145 @Run(test = "test1") 146 @Warmup(100) 147 public void runTest1() { 148 int[] dataI = new int[RANGE]; 149 float[] dataF = new float[RANGE]; 150 init(dataI, dataF); 151 test1(dataI, dataI, dataF, dataF); 152 verify("test1", dataI, goldI1); 153 verify("test1", dataF, goldF1); 154 } 155 156 @Test 157 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"}, 158 applyIfPlatform = {"64-bit", "true"}, 159 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 160 static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 161 for (int i = 0; i < RANGE; i+=2) { 162 // Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency. 163 dataFa[i+0] = dataIa[i+0] + 3; 164 dataFa[i+1] = dataIa[i+1] + 3; 165 dataIb[i+0] = (int)(dataFb[i+0] * 1.3f); 166 dataIb[i+1] = (int)(dataFb[i+1] * 1.3f); 167 } 168 } 169 170 @Run(test = "test2") 171 public void runTest2() { 172 int[] dataI = new int[RANGE]; 173 float[] dataF = new float[RANGE]; 174 init(dataI, dataF); 175 test2(dataI, dataI, dataF, dataF); 176 verify("test2", dataI, goldI2); 177 verify("test2", dataF, goldF2); 178 } 179 180 @Test 181 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"}, 182 applyIfPlatform = {"64-bit", "true"}, 183 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 184 static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 185 for (int i = 0; i < RANGE; i+=2) { 186 // int and float arrays are two slices. But we pretend both are of type int. 187 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); 188 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); 189 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); 190 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); 191 } 192 } 193 194 @Run(test = "test3") 195 @Warmup(100) 196 public void runTest3() { 197 int[] dataI = new int[RANGE]; 198 float[] dataF = new float[RANGE]; 199 init(dataI, dataF); 200 test3(dataI, dataI, dataF, dataF); 201 verify("test3", dataI, goldI3); 202 verify("test3", dataF, goldF3); 203 } 204 205 @Test 206 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"}, 207 applyIfPlatform = {"64-bit", "true"}, 208 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 209 static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 210 for (int i = 0; i < RANGE; i+=2) { 211 // Inversion of orders. But because we operate on separate slices, this should 212 // safely vectorize. It should detect that each line is independent, so it can 213 // reorder them. 214 dataIb[i+0] = dataIa[i+0] + 3; 215 dataFb[i+1] = dataFa[i+1] * 1.3f; 216 dataFb[i+0] = dataFa[i+0] * 1.3f; 217 dataIb[i+1] = dataIa[i+1] + 3; 218 } 219 } 220 221 @Run(test = "test4") 222 @Warmup(100) 223 public void runTest4() { 224 int[] dataI = new int[RANGE]; 225 float[] dataF = new float[RANGE]; 226 init(dataI, dataF); 227 test4(dataI, dataI, dataF, dataF); 228 verify("test4", dataI, goldI4); 229 verify("test4", dataF, goldF4); 230 } 231 232 @Test 233 static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 234 for (int i = 0; i < RANGE; i+=2) { 235 // same as test1, except that reordering leads to different semantics 236 // [A,B] and [X,Y] are both packs that are internally independent 237 // But we have dependencies A -> X (StoreF -> LoadF) 238 // and Y -> B (StoreI -> LoadI) 239 // Hence the two packs have a cyclic dependency, we cannot schedule 240 // one before the other. 241 dataFa[i+0] = dataIa[i+0] + 3; // A 242 dataIb[i+0] = (int)(dataFb[i+0] * 1.3f); // X 243 dataIb[i+1] = (int)(dataFb[i+1] * 1.3f); // Y 244 dataFa[i+1] = dataIa[i+1] + 3; // B 245 } 246 } 247 248 @Run(test = "test5") 249 public void runTest5() { 250 int[] dataI = new int[RANGE]; 251 float[] dataF = new float[RANGE]; 252 init(dataI, dataF); 253 test5(dataI, dataI, dataF, dataF); 254 verify("test5", dataI, goldI5); 255 verify("test5", dataF, goldF5); 256 } 257 258 @Test 259 static void test5(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) { 260 for (int i = 0; i < RANGE; i+=2) { 261 // same as test2, except that reordering leads to different semantics 262 // explanation analogue to test4 263 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A 264 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X 265 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y 266 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B 267 } 268 } 269 270 @Run(test = "test6") 271 public void runTest6() { 272 int[] dataI = new int[RANGE]; 273 float[] dataF = new float[RANGE]; 274 long[] dataL = new long[RANGE]; 275 init(dataI, dataF, dataL); 276 test6(dataI, dataI, dataF, dataF, dataL, dataL); 277 verify("test6", dataI, goldI6); 278 verify("test6", dataF, goldF6); 279 verify("test6", dataL, goldL6); 280 } 281 282 @Test 283 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"}, 284 applyIfPlatform = {"64-bit", "true"}, 285 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 286 static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 287 long[] dataLa, long[] dataLb) { 288 for (int i = 0; i < RANGE; i+=2) { 289 // Chain of parallelizable op and conversion 290 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; 291 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; 292 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 293 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 294 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; 295 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45; 296 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 297 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 298 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; 299 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; 300 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 301 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 302 } 303 } 304 305 @Run(test = "test7") 306 public void runTest7() { 307 int[] dataI = new int[RANGE]; 308 float[] dataF = new float[RANGE]; 309 long[] dataL = new long[RANGE]; 310 init(dataI, dataF, dataL); 311 test7(dataI, dataI, dataF, dataF, dataL, dataL); 312 verify("test7", dataI, goldI7); 313 verify("test7", dataF, goldF7); 314 verify("test7", dataL, goldL7); 315 } 316 317 @Test 318 static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 319 long[] dataLa, long[] dataLb) { 320 for (int i = 0; i < RANGE; i+=2) { 321 // Cycle involving 3 memory slices 322 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; 323 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 324 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; 325 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45; 326 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 327 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 328 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; 329 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; 330 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 331 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 332 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down 333 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 334 } 335 } 336 337 338 @Run(test = "test8") 339 public void runTest8() { 340 int[] dataI = new int[RANGE]; 341 float[] dataF = new float[RANGE]; 342 long[] dataL = new long[RANGE]; 343 init(dataI, dataF, dataL); 344 test8(dataI, dataI, dataF, dataF, dataL, dataL); 345 verify("test8", dataI, goldI8); 346 verify("test8", dataF, goldF8); 347 verify("test8", dataL, goldL8); 348 } 349 350 @Test 351 static void test8(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 352 long[] dataLa, long[] dataLb) { 353 for (int i = 0; i < RANGE; i+=2) { 354 // 2-cycle, with more ops after 355 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; 356 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 357 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; 358 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45; 359 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 360 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 361 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; 362 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 363 // more stuff after 364 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; 365 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; 366 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 367 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 368 } 369 } 370 371 @Run(test = "test9") 372 public void runTest9() { 373 int[] dataI = new int[RANGE]; 374 float[] dataF = new float[RANGE]; 375 long[] dataL = new long[RANGE]; 376 init(dataI, dataF, dataL); 377 test9(dataI, dataI, dataF, dataF, dataL, dataL); 378 verify("test9", dataI, goldI9); 379 verify("test9", dataF, goldF9); 380 verify("test9", dataL, goldL9); 381 } 382 383 @Test 384 static void test9(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 385 long[] dataLa, long[] dataLb) { 386 for (int i = 0; i < RANGE; i+=2) { 387 // 2-cycle, with more stuff before 388 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; 389 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; 390 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 391 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 392 // 2-cycle 393 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; 394 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 395 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; 396 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45; 397 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 398 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 399 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; 400 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 401 } 402 } 403 404 @Run(test = "test10") 405 public void runTest10() { 406 int[] dataI = new int[RANGE]; 407 float[] dataF = new float[RANGE]; 408 long[] dataL = new long[RANGE]; 409 init(dataI, dataF, dataL); 410 test10(dataI, dataI, dataF, dataF, dataL, dataL); 411 verify("test10", dataI, goldI10); 412 verify("test10", dataF, goldF10); 413 verify("test10", dataL, goldL10); 414 } 415 416 @Test 417 static void test10(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb, 418 long[] dataLa, long[] dataLb) { 419 for (int i = 0; i < RANGE; i+=2) { 420 // This creates the following graph before SuperWord: 421 // 422 // A -> R -> U 423 // S -> V -> B 424 // 425 // SuperWord analyzes the graph, and sees that [A,B] and [U,V] 426 // are adjacent, isomorphic and independent packs. However, 427 // [R,S] are not isomorphic (R mul, S add). 428 // So it vectorizes [A,B] and [U,V] this gives us this graph: 429 // 430 // -> R 431 // [A,B] -> [U,V] -+ 432 // ^ -> S | 433 // | | 434 // +------------------+ 435 // 436 // The cycle thus does not only go via packs, but also scalar ops. 437 // 438 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; // A 439 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00); 440 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; // R: constant mismatch 441 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43; // S 442 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10); 443 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11); 444 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; // U 445 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; // V 446 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20); 447 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21); 448 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // B: moved down 449 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01); 450 } 451 } 452 453 static void init(int[] dataI, float[] dataF) { 454 for (int i = 0; i < RANGE; i++) { 455 dataI[i] = i + 1; 456 dataF[i] = i + 0.1f; 457 } 458 } 459 460 static void init(int[] dataI, float[] dataF, long[] dataL) { 461 for (int i = 0; i < RANGE; i++) { 462 dataI[i] = i + 1; 463 dataF[i] = i + 0.1f; 464 dataL[i] = i + 1; 465 } 466 } 467 468 static void verify(String name, int[] data, int[] gold) { 469 for (int i = 0; i < RANGE; i++) { 470 if (data[i] != gold[i]) { 471 throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]); 472 } 473 } 474 } 475 476 static void verify(String name, float[] data, float[] gold) { 477 for (int i = 0; i < RANGE; i++) { 478 int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i); 479 int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i); 480 if (datav != goldv) { 481 throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv); 482 } 483 } 484 } 485 486 static void verify(String name, long[] data, long[] gold) { 487 for (int i = 0; i < RANGE; i++) { 488 if (data[i] != gold[i]) { 489 throw new RuntimeException(" Invalid " + name + " result: dataL[" + i + "]: " + data[i] + " != " + gold[i]); 490 } 491 } 492 } 493 } 494