1 /* 2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package compiler.loopopts.superword; 25 26 import compiler.lib.ir_framework.*; 27 import jdk.test.lib.Utils; 28 import jdk.test.whitebox.WhiteBox; 29 import jdk.internal.misc.Unsafe; 30 import java.lang.reflect.Array; 31 import java.util.Map; 32 import java.util.HashMap; 33 import java.util.Random; 34 import java.nio.ByteOrder; 35 36 /* 37 * @test id=NoAlignVector 38 * @bug 8310190 39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 40 * @modules java.base/jdk.internal.misc 41 * @library /test/lib / 42 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector 43 */ 44 45 /* 46 * @test id=AlignVector 47 * @bug 8310190 48 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 49 * @modules java.base/jdk.internal.misc 50 * @library /test/lib / 51 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector 52 */ 53 54 /* 55 * @test id=VerifyAlignVector 56 * @bug 8310190 57 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 58 * @modules java.base/jdk.internal.misc 59 * @library /test/lib / 60 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector 61 */ 62 63 /* 64 * @test id=NoAlignVector-COH 65 * @bug 8310190 66 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 67 * @modules java.base/jdk.internal.misc 68 * @library /test/lib / 69 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH 70 */ 71 72 /* 73 * @test id=VerifyAlignVector-COH 74 * @bug 8310190 75 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 76 * @modules java.base/jdk.internal.misc 77 * @library /test/lib / 78 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH 79 */ 80 81 public class TestAlignVector { 82 static int RANGE = 1024*8; 83 static int RANGE_FINAL = 1024*8; 84 private static final Unsafe UNSAFE = Unsafe.getUnsafe(); 85 private static final Random RANDOM = Utils.getRandomInstance(); 86 87 // Inputs 88 byte[] aB; 89 byte[] bB; 90 byte mB = (byte)31; 91 short[] aS; 92 short[] bS; 93 short mS = (short)0xF0F0; 94 int[] aI; 95 int[] bI; 96 int mI = 0xF0F0F0F0; 97 long[] aL; 98 long[] bL; 99 long mL = 0xF0F0F0F0F0F0F0F0L; 100 101 // List of tests 102 Map<String,TestFunction> tests = new HashMap<String,TestFunction>(); 103 104 // List of gold, the results from the first run before compilation 105 Map<String,Object[]> golds = new HashMap<String,Object[]>(); 106 107 interface TestFunction { 108 Object[] run(); 109 } 110 111 public static void main(String[] args) { 112 TestFramework framework = new TestFramework(TestAlignVector.class); 113 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 114 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250"); 115 116 switch (args[0]) { 117 case "NoAlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } 118 case "AlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } 119 case "VerifyAlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } 121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 123 } 124 framework.start(); 125 } 126 127 public TestAlignVector() { 128 // Generate input once 129 aB = generateB(); 130 bB = generateB(); 131 aS = generateS(); 132 bS = generateS(); 133 aI = generateI(); 134 bI = generateI(); 135 aL = generateL(); 136 bL = generateL(); 137 138 // Add all tests to list 139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); }); 140 tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); }); 141 tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); }); 142 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); }); 143 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); }); 144 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); }); 145 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); }); 146 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); }); 147 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); }); 148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); }); 149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); }); 150 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); }); 151 152 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); }); 153 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); }); 154 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); }); 155 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); }); 156 tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); }); 157 158 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); }); 159 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); }); 160 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); }); 161 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); }); 162 163 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); }); 164 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); }); 165 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); }); 166 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); }); 167 168 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); }); 169 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); }); 170 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); }); 171 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); }); 172 173 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); }); 174 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); }); 175 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); }); 176 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); }); 177 178 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); }); 179 180 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); }); 181 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); }); 182 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); }); 183 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 184 185 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); }); 186 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); }); 187 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); }); 188 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 189 190 tests.put("test14aB", () -> { return test14aB(aB.clone()); }); 191 tests.put("test14bB", () -> { return test14bB(aB.clone()); }); 192 tests.put("test14cB", () -> { return test14cB(aB.clone()); }); 193 tests.put("test14dB", () -> { return test14dB(aB.clone()); }); 194 tests.put("test14eB", () -> { return test14eB(aB.clone()); }); 195 tests.put("test14fB", () -> { return test14fB(aB.clone()); }); 196 197 tests.put("test15aB", () -> { return test15aB(aB.clone()); }); 198 tests.put("test15bB", () -> { return test15bB(aB.clone()); }); 199 tests.put("test15cB", () -> { return test15cB(aB.clone()); }); 200 201 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); }); 202 tests.put("test16b", () -> { return test16b(aB.clone()); }); 203 204 tests.put("test17a", () -> { return test17a(aL.clone()); }); 205 tests.put("test17b", () -> { return test17b(aL.clone()); }); 206 tests.put("test17c", () -> { return test17c(aL.clone()); }); 207 tests.put("test17d", () -> { return test17d(aL.clone()); }); 208 209 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); }); 210 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); }); 211 212 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); }); 213 tests.put("test20", () -> { return test20(aB.clone()); }); 214 215 // Compute gold value for all test methods before compilation 216 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 217 String name = entry.getKey(); 218 TestFunction test = entry.getValue(); 219 Object[] gold = test.run(); 220 golds.put(name, gold); 221 } 222 } 223 224 @Warmup(100) 225 @Run(test = {"test0", 226 "test1a", 227 "test1b", 228 "test2", 229 "test3", 230 "test4", 231 "test5", 232 "test6", 233 "test7", 234 "test8", 235 "test9", 236 "test10a", 237 "test10b", 238 "test10c", 239 "test10d", 240 "test10e", 241 "test11aB", 242 "test11aS", 243 "test11aI", 244 "test11aL", 245 "test11bB", 246 "test11bS", 247 "test11bI", 248 "test11bL", 249 "test11cB", 250 "test11cS", 251 "test11cI", 252 "test11cL", 253 "test11dB", 254 "test11dS", 255 "test11dI", 256 "test11dL", 257 "test12", 258 "test13aIL", 259 "test13aIB", 260 "test13aIS", 261 "test13aBSIL", 262 "test13bIL", 263 "test13bIB", 264 "test13bIS", 265 "test13bBSIL", 266 "test14aB", 267 "test14bB", 268 "test14cB", 269 "test14dB", 270 "test14eB", 271 "test14fB", 272 "test15aB", 273 "test15bB", 274 "test15cB", 275 "test16a", 276 "test16b", 277 "test17a", 278 "test17b", 279 "test17c", 280 "test17d", 281 "test18a", 282 "test18b", 283 "test19", 284 "test20"}) 285 public void runTests() { 286 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 287 String name = entry.getKey(); 288 TestFunction test = entry.getValue(); 289 // Recall gold value from before compilation 290 Object[] gold = golds.get(name); 291 // Compute new result 292 Object[] result = test.run(); 293 // Compare gold and new result 294 verify(name, gold, result); 295 } 296 } 297 298 static byte[] generateB() { 299 byte[] a = new byte[RANGE]; 300 for (int i = 0; i < a.length; i++) { 301 a[i] = (byte)RANDOM.nextInt(); 302 } 303 return a; 304 } 305 306 static short[] generateS() { 307 short[] a = new short[RANGE]; 308 for (int i = 0; i < a.length; i++) { 309 a[i] = (short)RANDOM.nextInt(); 310 } 311 return a; 312 } 313 314 static int[] generateI() { 315 int[] a = new int[RANGE]; 316 for (int i = 0; i < a.length; i++) { 317 a[i] = RANDOM.nextInt(); 318 } 319 return a; 320 } 321 322 static long[] generateL() { 323 long[] a = new long[RANGE]; 324 for (int i = 0; i < a.length; i++) { 325 a[i] = RANDOM.nextLong(); 326 } 327 return a; 328 } 329 330 static void verify(String name, Object[] gold, Object[] result) { 331 if (gold.length != result.length) { 332 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + 333 gold.length + ", result.length = " + result.length); 334 } 335 for (int i = 0; i < gold.length; i++) { 336 Object g = gold[i]; 337 Object r = result[i]; 338 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) { 339 throw new RuntimeException("verify " + name + ": must both be array of same type:" + 340 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 341 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 342 } 343 if (g == r) { 344 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" + 345 " gold[" + i + "] == result[" + i + "]"); 346 } 347 if (Array.getLength(g) != Array.getLength(r)) { 348 throw new RuntimeException("verify " + name + ": arrays must have same length:" + 349 " gold[" + i + "].length = " + Array.getLength(g) + 350 " result[" + i + "].length = " + Array.getLength(r)); 351 } 352 Class c = g.getClass().getComponentType(); 353 if (c == byte.class) { 354 verifyB(name, i, (byte[])g, (byte[])r); 355 } else if (c == short.class) { 356 verifyS(name, i, (short[])g, (short[])r); 357 } else if (c == int.class) { 358 verifyI(name, i, (int[])g, (int[])r); 359 } else if (c == long.class) { 360 verifyL(name, i, (long[])g, (long[])r); 361 } else { 362 throw new RuntimeException("verify " + name + ": array type not supported for verify:" + 363 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 364 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 365 } 366 } 367 } 368 369 static void verifyB(String name, int i, byte[] g, byte[] r) { 370 for (int j = 0; j < g.length; j++) { 371 if (g[j] != r[j]) { 372 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 373 " gold[" + i + "][" + j + "] = " + g[j] + 374 " result[" + i + "][" + j + "] = " + r[j]); 375 } 376 } 377 } 378 379 static void verifyS(String name, int i, short[] g, short[] r) { 380 for (int j = 0; j < g.length; j++) { 381 if (g[j] != r[j]) { 382 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 383 " gold[" + i + "][" + j + "] = " + g[j] + 384 " result[" + i + "][" + j + "] = " + r[j]); 385 } 386 } 387 } 388 389 static void verifyI(String name, int i, int[] g, int[] r) { 390 for (int j = 0; j < g.length; j++) { 391 if (g[j] != r[j]) { 392 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 393 " gold[" + i + "][" + j + "] = " + g[j] + 394 " result[" + i + "][" + j + "] = " + r[j]); 395 } 396 } 397 } 398 399 static void verifyL(String name, int i, long[] g, long[] r) { 400 for (int j = 0; j < g.length; j++) { 401 if (g[j] != r[j]) { 402 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 403 " gold[" + i + "][" + j + "] = " + g[j] + 404 " result[" + i + "][" + j + "] = " + r[j]); 405 } 406 } 407 } 408 409 @Test 410 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 411 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 412 IRNode.STORE_VECTOR, "> 0"}, 413 applyIf = {"MaxVectorSize", ">=8"}, 414 applyIfPlatform = {"64-bit", "true"}, 415 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 416 static Object[] test0(byte[] a, byte[] b, byte mask) { 417 for (int i = 0; i < RANGE; i+=8) { 418 // Safe to vectorize with AlignVector 419 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 420 b[i+1] = (byte)(a[i+1] & mask); 421 b[i+2] = (byte)(a[i+2] & mask); 422 b[i+3] = (byte)(a[i+3] & mask); 423 } 424 return new Object[]{ a, b }; 425 } 426 427 @Test 428 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 429 IRNode.AND_VB, "> 0", 430 IRNode.STORE_VECTOR, "> 0"}, 431 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 432 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. 433 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. 434 applyIfPlatform = {"64-bit", "true"}, 435 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 436 static Object[] test1a(byte[] a, byte[] b, byte mask) { 437 for (int i = 0; i < RANGE; i+=8) { 438 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8 439 b[i+1] = (byte)(a[i+1] & mask); 440 b[i+2] = (byte)(a[i+2] & mask); 441 b[i+3] = (byte)(a[i+3] & mask); 442 b[i+4] = (byte)(a[i+4] & mask); 443 b[i+5] = (byte)(a[i+5] & mask); 444 b[i+6] = (byte)(a[i+6] & mask); 445 b[i+7] = (byte)(a[i+7] & mask); 446 } 447 return new Object[]{ a, b }; 448 } 449 450 @Test 451 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 452 IRNode.AND_VB, "> 0", 453 IRNode.STORE_VECTOR, "> 0"}, 454 applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"}, 455 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. 456 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. 457 applyIfPlatform = {"64-bit", "true"}, 458 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 459 static Object[] test1b(byte[] a, byte[] b, byte mask) { 460 for (int i = 4; i < RANGE-8; i+=8) { 461 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8 462 b[i+1] = (byte)(a[i+1] & mask); 463 b[i+2] = (byte)(a[i+2] & mask); 464 b[i+3] = (byte)(a[i+3] & mask); 465 b[i+4] = (byte)(a[i+4] & mask); 466 b[i+5] = (byte)(a[i+5] & mask); 467 b[i+6] = (byte)(a[i+6] & mask); 468 b[i+7] = (byte)(a[i+7] & mask); 469 } 470 return new Object[]{ a, b }; 471 } 472 473 @Test 474 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 475 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 476 IRNode.STORE_VECTOR, "> 0"}, 477 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 478 applyIfPlatform = {"64-bit", "true"}, 479 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 480 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 481 IRNode.AND_VB, "= 0", 482 IRNode.STORE_VECTOR, "= 0"}, 483 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 484 applyIfPlatform = {"64-bit", "true"}, 485 applyIf = {"AlignVector", "true"}) 486 static Object[] test2(byte[] a, byte[] b, byte mask) { 487 for (int i = 0; i < RANGE; i+=8) { 488 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 489 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3 490 b[i+4] = (byte)(a[i+4] & mask); 491 b[i+5] = (byte)(a[i+5] & mask); 492 b[i+6] = (byte)(a[i+6] & mask); 493 } 494 return new Object[]{ a, b }; 495 } 496 497 @Test 498 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 499 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 500 IRNode.STORE_VECTOR, "> 0"}, 501 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 502 applyIfPlatform = {"64-bit", "true"}, 503 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 504 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 505 IRNode.AND_VB, "= 0", 506 IRNode.STORE_VECTOR, "= 0"}, 507 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 508 applyIfPlatform = {"64-bit", "true"}, 509 applyIf = {"AlignVector", "true"}) 510 static Object[] test3(byte[] a, byte[] b, byte mask) { 511 for (int i = 0; i < RANGE; i+=8) { 512 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 513 514 // Problematic for AlignVector 515 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0 516 517 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes 518 b[i+4] = (byte)(a[i+4] & mask); 519 b[i+5] = (byte)(a[i+5] & mask); 520 b[i+6] = (byte)(a[i+6] & mask); 521 } 522 return new Object[]{ a, b }; 523 } 524 525 @Test 526 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 527 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0", 528 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 529 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0", 530 IRNode.STORE_VECTOR, "> 0"}, 531 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 532 applyIfPlatform = {"64-bit", "true"}, 533 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 534 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 535 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 536 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 537 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 538 IRNode.STORE_VECTOR, "> 0"}, 539 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 540 applyIfPlatform = {"64-bit", "true"}, 541 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"}) 542 static Object[] test4(byte[] a, byte[] b, byte mask) { 543 for (int i = 0; i < RANGE/16; i++) { 544 // Problematic for AlignVector 545 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned 546 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask); 547 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask); 548 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask); 549 550 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned 551 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask); 552 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask); 553 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask); 554 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask); 555 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask); 556 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask); 557 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask); 558 } 559 return new Object[]{ a, b }; 560 } 561 562 @Test 563 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 564 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 565 IRNode.STORE_VECTOR, "> 0"}, 566 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 567 applyIfPlatform = {"64-bit", "true"}, 568 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 569 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 570 IRNode.AND_VB, "= 0", 571 IRNode.STORE_VECTOR, "= 0"}, 572 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 573 applyIfPlatform = {"64-bit", "true"}, 574 applyIf = {"AlignVector", "true"}) 575 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) { 576 for (int i = 0; i < RANGE; i+=8) { 577 // Cannot align with AlignVector because of invariant 578 b[i+inv+0] = (byte)(a[i+inv+0] & mask); 579 580 b[i+inv+3] = (byte)(a[i+inv+3] & mask); 581 b[i+inv+4] = (byte)(a[i+inv+4] & mask); 582 b[i+inv+5] = (byte)(a[i+inv+5] & mask); 583 b[i+inv+6] = (byte)(a[i+inv+6] & mask); 584 } 585 return new Object[]{ a, b }; 586 } 587 588 @Test 589 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 590 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 591 IRNode.STORE_VECTOR, "> 0"}, 592 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 593 applyIfPlatform = {"64-bit", "true"}, 594 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 595 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 596 IRNode.AND_VB, "= 0", 597 IRNode.STORE_VECTOR, "= 0"}, 598 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 599 applyIfPlatform = {"64-bit", "true"}, 600 applyIf = {"AlignVector", "true"}) 601 static Object[] test6(byte[] a, byte[] b, byte mask) { 602 for (int i = 0; i < RANGE/8; i+=2) { 603 // Cannot align with AlignVector because offset is odd 604 b[i*4+0] = (byte)(a[i*4+0] & mask); 605 606 b[i*4+3] = (byte)(a[i*4+3] & mask); 607 b[i*4+4] = (byte)(a[i*4+4] & mask); 608 b[i*4+5] = (byte)(a[i*4+5] & mask); 609 b[i*4+6] = (byte)(a[i*4+6] & mask); 610 } 611 return new Object[]{ a, b }; 612 } 613 614 @Test 615 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 616 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 617 IRNode.STORE_VECTOR, "> 0"}, 618 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}, 619 applyIfPlatform = {"64-bit", "true"}, 620 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 621 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 622 IRNode.AND_VS, "= 0", 623 IRNode.STORE_VECTOR, "= 0"}, 624 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 625 applyIfPlatform = {"64-bit", "true"}, 626 applyIf = {"AlignVector", "true"}) 627 static Object[] test7(short[] a, short[] b, short mask) { 628 for (int i = 0; i < RANGE/8; i+=2) { 629 // Cannot align with AlignVector because offset is odd 630 b[i*4+0] = (short)(a[i*4+0] & mask); 631 632 b[i*4+3] = (short)(a[i*4+3] & mask); 633 b[i*4+4] = (short)(a[i*4+4] & mask); 634 b[i*4+5] = (short)(a[i*4+5] & mask); 635 b[i*4+6] = (short)(a[i*4+6] & mask); 636 } 637 return new Object[]{ a, b }; 638 } 639 640 @Test 641 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 642 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 643 IRNode.STORE_VECTOR, "> 0"}, 644 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 645 applyIfPlatform = {"64-bit", "true"}, 646 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 647 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 648 IRNode.AND_VB, "= 0", 649 IRNode.STORE_VECTOR, "= 0"}, 650 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 651 applyIfPlatform = {"64-bit", "true"}, 652 applyIf = {"AlignVector", "true"}) 653 static Object[] test8(byte[] a, byte[] b, byte mask, int init) { 654 for (int i = init; i < RANGE; i+=8) { 655 // Cannot align with AlignVector because of invariant (variable init becomes invar) 656 b[i+0] = (byte)(a[i+0] & mask); 657 658 b[i+3] = (byte)(a[i+3] & mask); 659 b[i+4] = (byte)(a[i+4] & mask); 660 b[i+5] = (byte)(a[i+5] & mask); 661 b[i+6] = (byte)(a[i+6] & mask); 662 } 663 return new Object[]{ a, b }; 664 } 665 666 @Test 667 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 668 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 669 IRNode.STORE_VECTOR, "> 0"}, 670 applyIf = {"MaxVectorSize", ">=8"}, 671 applyIfPlatform = {"64-bit", "true"}, 672 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 673 static Object[] test9(byte[] a, byte[] b, byte mask) { 674 // known non-zero init value does not affect offset, but has implicit effect on iv 675 for (int i = 13; i < RANGE-8; i+=8) { 676 b[i+0] = (byte)(a[i+0] & mask); 677 678 b[i+3] = (byte)(a[i+3] & mask); 679 b[i+4] = (byte)(a[i+4] & mask); 680 b[i+5] = (byte)(a[i+5] & mask); 681 b[i+6] = (byte)(a[i+6] & mask); 682 } 683 return new Object[]{ a, b }; 684 } 685 686 @Test 687 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 688 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 689 IRNode.STORE_VECTOR, "> 0"}, 690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 691 applyIfPlatform = {"64-bit", "true"}, 692 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 693 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 694 IRNode.AND_VB, "= 0", 695 IRNode.STORE_VECTOR, "= 0"}, 696 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 697 applyIfPlatform = {"64-bit", "true"}, 698 applyIf = {"AlignVector", "true"}) 699 static Object[] test10a(byte[] a, byte[] b, byte mask) { 700 // This is not alignable with pre-loop, because of odd init. 701 for (int i = 3; i < RANGE-8; i+=8) { 702 b[i+0] = (byte)(a[i+0] & mask); 703 b[i+1] = (byte)(a[i+1] & mask); 704 b[i+2] = (byte)(a[i+2] & mask); 705 b[i+3] = (byte)(a[i+3] & mask); 706 } 707 return new Object[]{ a, b }; 708 } 709 710 @Test 711 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 712 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 713 IRNode.STORE_VECTOR, "> 0"}, 714 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 715 applyIfPlatform = {"64-bit", "true"}, 716 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 717 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 718 IRNode.AND_VB, "= 0", 719 IRNode.STORE_VECTOR, "= 0"}, 720 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 721 applyIfPlatform = {"64-bit", "true"}, 722 applyIf = {"AlignVector", "true"}) 723 static Object[] test10b(byte[] a, byte[] b, byte mask) { 724 // This is not alignable with pre-loop, because of odd init. 725 // Seems not correctly handled. 726 for (int i = 13; i < RANGE-8; i+=8) { 727 b[i+0] = (byte)(a[i+0] & mask); 728 b[i+1] = (byte)(a[i+1] & mask); 729 b[i+2] = (byte)(a[i+2] & mask); 730 b[i+3] = (byte)(a[i+3] & mask); 731 } 732 return new Object[]{ a, b }; 733 } 734 735 @Test 736 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 737 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 738 IRNode.STORE_VECTOR, "> 0"}, 739 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 740 applyIfPlatform = {"64-bit", "true"}, 741 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 742 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 743 IRNode.AND_VS, "= 0", 744 IRNode.STORE_VECTOR, "= 0"}, 745 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 746 applyIfPlatform = {"64-bit", "true"}, 747 applyIf = {"AlignVector", "true"}) 748 static Object[] test10c(short[] a, short[] b, short mask) { 749 // This is not alignable with pre-loop, because of odd init. 750 // Seems not correctly handled with MaxVectorSize >= 32. 751 for (int i = 13; i < RANGE-8; i+=8) { 752 b[i+0] = (short)(a[i+0] & mask); 753 b[i+1] = (short)(a[i+1] & mask); 754 b[i+2] = (short)(a[i+2] & mask); 755 b[i+3] = (short)(a[i+3] & mask); 756 } 757 return new Object[]{ a, b }; 758 } 759 760 @Test 761 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 762 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 763 IRNode.STORE_VECTOR, "> 0"}, 764 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"}, 765 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. 766 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. 767 applyIfPlatform = {"64-bit", "true"}, 768 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 769 static Object[] test10d(short[] a, short[] b, short mask) { 770 for (int i = 13; i < RANGE-16; i+=8) { 771 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16 772 b[i+0+3] = (short)(a[i+0+3] & mask); 773 b[i+1+3] = (short)(a[i+1+3] & mask); 774 b[i+2+3] = (short)(a[i+2+3] & mask); 775 b[i+3+3] = (short)(a[i+3+3] & mask); 776 } 777 return new Object[]{ a, b }; 778 } 779 780 @Test 781 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 782 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 783 IRNode.STORE_VECTOR, "> 0"}, 784 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"}, 785 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. 786 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. 787 applyIfPlatform = {"64-bit", "true"}, 788 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 789 static Object[] test10e(short[] a, short[] b, short mask) { 790 for (int i = 11; i < RANGE-16; i+=8) { 791 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16 792 b[i+0+3] = (short)(a[i+0+3] & mask); 793 b[i+1+3] = (short)(a[i+1+3] & mask); 794 b[i+2+3] = (short)(a[i+2+3] & mask); 795 b[i+3+3] = (short)(a[i+3+3] & mask); 796 } 797 return new Object[]{ a, b }; 798 } 799 800 @Test 801 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 802 IRNode.AND_VB, "> 0", 803 IRNode.STORE_VECTOR, "> 0"}, 804 applyIfPlatform = {"64-bit", "true"}, 805 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 806 static Object[] test11aB(byte[] a, byte[] b, byte mask) { 807 for (int i = 0; i < RANGE; i++) { 808 // always alignable 809 b[i+0] = (byte)(a[i+0] & mask); 810 } 811 return new Object[]{ a, b }; 812 } 813 814 @Test 815 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 816 IRNode.AND_VS, "> 0", 817 IRNode.STORE_VECTOR, "> 0"}, 818 applyIfPlatform = {"64-bit", "true"}, 819 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 820 static Object[] test11aS(short[] a, short[] b, short mask) { 821 for (int i = 0; i < RANGE; i++) { 822 // always alignable 823 b[i+0] = (short)(a[i+0] & mask); 824 } 825 return new Object[]{ a, b }; 826 } 827 828 @Test 829 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 830 IRNode.AND_VI, "> 0", 831 IRNode.STORE_VECTOR, "> 0"}, 832 applyIfPlatform = {"64-bit", "true"}, 833 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 834 static Object[] test11aI(int[] a, int[] b, int mask) { 835 for (int i = 0; i < RANGE; i++) { 836 // always alignable 837 b[i+0] = (int)(a[i+0] & mask); 838 } 839 return new Object[]{ a, b }; 840 } 841 842 @Test 843 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 844 IRNode.AND_VL, "> 0", 845 IRNode.STORE_VECTOR, "> 0"}, 846 applyIfPlatform = {"64-bit", "true"}, 847 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 848 static Object[] test11aL(long[] a, long[] b, long mask) { 849 for (int i = 0; i < RANGE; i++) { 850 // always alignable 851 b[i+0] = (long)(a[i+0] & mask); 852 } 853 return new Object[]{ a, b }; 854 } 855 856 @Test 857 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 858 IRNode.AND_VB, "> 0", 859 IRNode.STORE_VECTOR, "> 0"}, 860 applyIfPlatform = {"64-bit", "true"}, 861 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 862 static Object[] test11bB(byte[] a, byte[] b, byte mask) { 863 for (int i = 1; i < RANGE; i++) { 864 // always alignable 865 b[i+0] = (byte)(a[i+0] & mask); 866 } 867 return new Object[]{ a, b }; 868 } 869 870 @Test 871 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 872 IRNode.AND_VS, "> 0", 873 IRNode.STORE_VECTOR, "> 0"}, 874 applyIfPlatform = {"64-bit", "true"}, 875 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 876 static Object[] test11bS(short[] a, short[] b, short mask) { 877 for (int i = 1; i < RANGE; i++) { 878 // always alignable 879 b[i+0] = (short)(a[i+0] & mask); 880 } 881 return new Object[]{ a, b }; 882 } 883 884 @Test 885 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 886 IRNode.AND_VI, "> 0", 887 IRNode.STORE_VECTOR, "> 0"}, 888 applyIfPlatform = {"64-bit", "true"}, 889 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 890 static Object[] test11bI(int[] a, int[] b, int mask) { 891 for (int i = 1; i < RANGE; i++) { 892 // always alignable 893 b[i+0] = (int)(a[i+0] & mask); 894 } 895 return new Object[]{ a, b }; 896 } 897 898 @Test 899 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 900 IRNode.AND_VL, "> 0", 901 IRNode.STORE_VECTOR, "> 0"}, 902 applyIfPlatform = {"64-bit", "true"}, 903 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 904 static Object[] test11bL(long[] a, long[] b, long mask) { 905 for (int i = 1; i < RANGE; i++) { 906 // always alignable 907 b[i+0] = (long)(a[i+0] & mask); 908 } 909 return new Object[]{ a, b }; 910 } 911 912 @Test 913 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 914 IRNode.AND_VB, "> 0", 915 IRNode.STORE_VECTOR, "> 0"}, 916 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 917 applyIfPlatform = {"64-bit", "true"}, 918 applyIf = {"AlignVector", "false"}) 919 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 920 IRNode.AND_VB, "= 0", 921 IRNode.STORE_VECTOR, "= 0"}, 922 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 923 applyIfPlatform = {"64-bit", "true"}, 924 applyIf = {"AlignVector", "true"}) 925 static Object[] test11cB(byte[] a, byte[] b, byte mask) { 926 for (int i = 1; i < RANGE-1; i++) { 927 // 1 byte offset -> not alignable with AlignVector 928 b[i+0] = (byte)(a[i+1] & mask); 929 } 930 return new Object[]{ a, b }; 931 } 932 933 @Test 934 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 935 IRNode.AND_VS, "> 0", 936 IRNode.STORE_VECTOR, "> 0"}, 937 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 938 applyIfPlatform = {"64-bit", "true"}, 939 applyIf = {"AlignVector", "false"}) 940 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 941 IRNode.AND_VS, "= 0", 942 IRNode.STORE_VECTOR, "= 0"}, 943 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 944 applyIfPlatform = {"64-bit", "true"}, 945 applyIf = {"AlignVector", "true"}) 946 static Object[] test11cS(short[] a, short[] b, short mask) { 947 for (int i = 1; i < RANGE-1; i++) { 948 // 2 byte offset -> not alignable with AlignVector 949 b[i+0] = (short)(a[i+1] & mask); 950 } 951 return new Object[]{ a, b }; 952 } 953 954 @Test 955 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 956 IRNode.AND_VI, "> 0", 957 IRNode.STORE_VECTOR, "> 0"}, 958 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 959 applyIfPlatform = {"64-bit", "true"}, 960 applyIf = {"AlignVector", "false"}) 961 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", 962 IRNode.AND_VI, "= 0", 963 IRNode.STORE_VECTOR, "= 0"}, 964 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 965 applyIfPlatform = {"64-bit", "true"}, 966 applyIf = {"AlignVector", "true"}) 967 static Object[] test11cI(int[] a, int[] b, int mask) { 968 for (int i = 1; i < RANGE-1; i++) { 969 // 4 byte offset -> not alignable with AlignVector 970 b[i+0] = (int)(a[i+1] & mask); 971 } 972 return new Object[]{ a, b }; 973 } 974 975 @Test 976 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 977 IRNode.AND_VL, "> 0", 978 IRNode.STORE_VECTOR, "> 0"}, 979 applyIfPlatform = {"64-bit", "true"}, 980 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 981 static Object[] test11cL(long[] a, long[] b, long mask) { 982 for (int i = 1; i < RANGE-1; i++) { 983 // always alignable (8 byte offset) 984 b[i+0] = (long)(a[i+1] & mask); 985 } 986 return new Object[]{ a, b }; 987 } 988 989 @Test 990 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 991 IRNode.AND_VB, "> 0", 992 IRNode.STORE_VECTOR, "> 0"}, 993 applyIfPlatform = {"64-bit", "true"}, 994 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 995 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) { 996 for (int i = 0; i < RANGE; i++) { 997 b[i+0+invar] = (byte)(a[i+0+invar] & mask); 998 } 999 return new Object[]{ a, b }; 1000 } 1001 1002 @Test 1003 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 1004 IRNode.AND_VS, "> 0", 1005 IRNode.STORE_VECTOR, "> 0"}, 1006 applyIfPlatform = {"64-bit", "true"}, 1007 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1008 static Object[] test11dS(short[] a, short[] b, short mask, int invar) { 1009 for (int i = 0; i < RANGE; i++) { 1010 b[i+0+invar] = (short)(a[i+0+invar] & mask); 1011 } 1012 return new Object[]{ a, b }; 1013 } 1014 1015 @Test 1016 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1017 IRNode.AND_VI, "> 0", 1018 IRNode.STORE_VECTOR, "> 0"}, 1019 applyIfPlatform = {"64-bit", "true"}, 1020 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1021 static Object[] test11dI(int[] a, int[] b, int mask, int invar) { 1022 for (int i = 0; i < RANGE; i++) { 1023 b[i+0+invar] = (int)(a[i+0+invar] & mask); 1024 } 1025 return new Object[]{ a, b }; 1026 } 1027 1028 @Test 1029 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 1030 IRNode.AND_VL, "> 0", 1031 IRNode.STORE_VECTOR, "> 0"}, 1032 applyIfPlatform = {"64-bit", "true"}, 1033 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1034 static Object[] test11dL(long[] a, long[] b, long mask, int invar) { 1035 for (int i = 0; i < RANGE; i++) { 1036 b[i+0+invar] = (long)(a[i+0+invar] & mask); 1037 } 1038 return new Object[]{ a, b }; 1039 } 1040 1041 @Test 1042 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0", 1043 IRNode.AND_VB, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0", 1044 IRNode.STORE_VECTOR, "> 0"}, 1045 applyIfPlatform = {"64-bit", "true"}, 1046 applyIf = {"AlignVector", "false"}, 1047 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1048 static Object[] test12(byte[] a, byte[] b, byte mask) { 1049 for (int i = 0; i < RANGE/16; i++) { 1050 // Non-power-of-2 stride. Vectorization of 4 bytes, then 2-bytes gap. 1051 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask); 1052 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask); 1053 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask); 1054 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask); 1055 } 1056 return new Object[]{ a, b }; 1057 } 1058 1059 @Test 1060 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1061 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1062 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1063 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1064 IRNode.STORE_VECTOR, "> 0"}, 1065 applyIfPlatform = {"64-bit", "true"}, 1066 applyIfCPUFeatureOr = {"avx2", "true"}) 1067 // require avx to ensure vectors are larger than what unrolling produces 1068 static Object[] test13aIL(int[] a, long[] b) { 1069 for (int i = 0; i < RANGE; i++) { 1070 a[i]++; 1071 b[i]++; 1072 } 1073 return new Object[]{ a, b }; 1074 } 1075 1076 @Test 1077 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1078 IRNode.LOAD_VECTOR_I, "> 0", 1079 IRNode.ADD_VB, "> 0", 1080 IRNode.ADD_VI, "> 0", 1081 IRNode.STORE_VECTOR, "> 0"}, 1082 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1083 applyIfPlatform = {"64-bit", "true"}, 1084 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1085 static Object[] test13aIB(int[] a, byte[] b) { 1086 for (int i = 0; i < RANGE; i++) { 1087 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter 1088 // = 16 (or 12 if UseCompactObjectHeaders=true) 1089 a[i]++; 1090 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter 1091 // = 16 (or 12 if UseCompactObjectHeaders=true) 1092 b[i]++; 1093 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold: 1094 // If UseCompactObjectHeaders=false: 1095 // a: 0, 8, 16, 24, 32, ... 1096 // b: 0, 2, 4, 6, 8, ... 1097 // -> Ok, aligns every 8th iteration. 1098 // If UseCompactObjectHeaders=true: 1099 // a: 4, 12, 20, 28, 36, ... 1100 // b: 1, 3, 5, 7, 9, ... 1101 // -> we can never align both vectors! 1102 } 1103 return new Object[]{ a, b }; 1104 } 1105 1106 @Test 1107 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1108 IRNode.LOAD_VECTOR_S, "> 0", 1109 IRNode.ADD_VI, "> 0", 1110 IRNode.ADD_VS, "> 0", 1111 IRNode.STORE_VECTOR, "> 0"}, 1112 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1113 applyIfPlatform = {"64-bit", "true"}, 1114 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1115 static Object[] test13aIS(int[] a, short[] b) { 1116 for (int i = 0; i < RANGE; i++) { 1117 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter 1118 // = 16 (or 12 if UseCompactObjectHeaders=true) 1119 a[i]++; 1120 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter 1121 // = 16 (or 12 if UseCompactObjectHeaders=true) 1122 b[i]++; 1123 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold: 1124 // If UseCompactObjectHeaders=false: 1125 // a: iter % 2 == 0 1126 // b: iter % 4 == 0 1127 // -> Ok, aligns every 4th iteration. 1128 // If UseCompactObjectHeaders=true: 1129 // a: iter % 2 = 1 1130 // b: iter % 4 = 2 1131 // -> we can never align both vectors! 1132 } 1133 return new Object[]{ a, b }; 1134 } 1135 1136 @Test 1137 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1138 IRNode.LOAD_VECTOR_S, "> 0", 1139 IRNode.LOAD_VECTOR_I, "> 0", 1140 IRNode.LOAD_VECTOR_L, "> 0", 1141 IRNode.ADD_VB, "> 0", 1142 IRNode.ADD_VS, "> 0", 1143 IRNode.ADD_VI, "> 0", 1144 IRNode.ADD_VL, "> 0", 1145 IRNode.STORE_VECTOR, "> 0"}, 1146 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1147 applyIfPlatform = {"64-bit", "true"}, 1148 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1149 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) { 1150 for (int i = 0; i < RANGE; i++) { 1151 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter 1152 // = 16 (or 12 if UseCompactObjectHeaders=true) 1153 a[i]++; 1154 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter 1155 // = 16 (or 12 if UseCompactObjectHeaders=true) 1156 b[i]++; 1157 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter 1158 // = 16 (or 12 if UseCompactObjectHeaders=true) 1159 c[i]++; 1160 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter 1161 // = 16 (always) 1162 d[i]++; 1163 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: 1164 // a: iter % 8 = 4 1165 // c: iter % 2 = 1 1166 // -> can never align both vectors! 1167 } 1168 return new Object[]{ a, b, c, d }; 1169 } 1170 1171 @Test 1172 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1173 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1174 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1175 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1176 IRNode.STORE_VECTOR, "> 0"}, 1177 applyIfPlatform = {"64-bit", "true"}, 1178 applyIfCPUFeatureOr = {"avx2", "true"}) 1179 // require avx to ensure vectors are larger than what unrolling produces 1180 static Object[] test13bIL(int[] a, long[] b) { 1181 for (int i = 1; i < RANGE; i++) { 1182 a[i]++; 1183 b[i]++; 1184 } 1185 return new Object[]{ a, b }; 1186 } 1187 1188 @Test 1189 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1190 IRNode.LOAD_VECTOR_I, "> 0", 1191 IRNode.ADD_VB, "> 0", 1192 IRNode.ADD_VI, "> 0", 1193 IRNode.STORE_VECTOR, "> 0"}, 1194 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1195 applyIfPlatform = {"64-bit", "true"}, 1196 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1197 static Object[] test13bIB(int[] a, byte[] b) { 1198 for (int i = 1; i < RANGE; i++) { 1199 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter 1200 // = 16 (or 12 if UseCompactObjectHeaders=true) 1201 a[i]++; 1202 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter 1203 // = 16 (or 12 if UseCompactObjectHeaders=true) 1204 b[i]++; 1205 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: 1206 // a: iter % 2 = 0 1207 // b: iter % 8 = 3 1208 // -> can never align both vectors! 1209 } 1210 return new Object[]{ a, b }; 1211 } 1212 1213 @Test 1214 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1215 IRNode.LOAD_VECTOR_S, "> 0", 1216 IRNode.ADD_VI, "> 0", 1217 IRNode.ADD_VS, "> 0", 1218 IRNode.STORE_VECTOR, "> 0"}, 1219 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1220 applyIfPlatform = {"64-bit", "true"}, 1221 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1222 static Object[] test13bIS(int[] a, short[] b) { 1223 for (int i = 1; i < RANGE; i++) { 1224 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter 1225 // = 16 (or 12 if UseCompactObjectHeaders=true) 1226 a[i]++; 1227 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter 1228 // = 16 (or 12 if UseCompactObjectHeaders=true) 1229 b[i]++; 1230 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: 1231 // a: iter % 2 = 0 1232 // b: iter % 4 = 1 1233 // -> can never align both vectors! 1234 } 1235 return new Object[]{ a, b }; 1236 } 1237 1238 @Test 1239 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1240 IRNode.LOAD_VECTOR_S, "> 0", 1241 IRNode.LOAD_VECTOR_I, "> 0", 1242 IRNode.LOAD_VECTOR_L, "> 0", 1243 IRNode.ADD_VB, "> 0", 1244 IRNode.ADD_VS, "> 0", 1245 IRNode.ADD_VI, "> 0", 1246 IRNode.ADD_VL, "> 0", 1247 IRNode.STORE_VECTOR, "> 0"}, 1248 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1249 applyIfPlatform = {"64-bit", "true"}, 1250 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1251 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) { 1252 for (int i = 1; i < RANGE; i++) { 1253 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter 1254 // = 16 (or 12 if UseCompactObjectHeaders=true) 1255 a[i]++; 1256 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter 1257 // = 16 (or 12 if UseCompactObjectHeaders=true) 1258 b[i]++; 1259 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter 1260 // = 16 (or 12 if UseCompactObjectHeaders=true) 1261 c[i]++; 1262 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter 1263 // = 16 (always) 1264 d[i]++; 1265 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: 1266 // a: iter % 8 = 3 1267 // c: iter % 2 = 0 1268 // -> can never align both vectors! 1269 } 1270 return new Object[]{ a, b, c, d }; 1271 } 1272 1273 @Test 1274 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1275 IRNode.ADD_VB, "= 0", 1276 IRNode.STORE_VECTOR, "= 0"}, 1277 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1278 applyIfPlatform = {"64-bit", "true"}, 1279 applyIf = {"AlignVector", "false"}) 1280 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1281 IRNode.ADD_VB, "= 0", 1282 IRNode.STORE_VECTOR, "= 0"}, 1283 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1284 applyIfPlatform = {"64-bit", "true"}, 1285 applyIf = {"AlignVector", "true"}) 1286 static Object[] test14aB(byte[] a) { 1287 // non-power-of-2 stride 1288 for (int i = 0; i < RANGE-20; i+=9) { 1289 // Since the stride is shorter than the vector length, there will be always 1290 // partial overlap of loads with previous stores, this leads to failure in 1291 // store-to-load-forwarding -> vectorization not profitable. 1292 a[i+0]++; 1293 a[i+1]++; 1294 a[i+2]++; 1295 a[i+3]++; 1296 a[i+4]++; 1297 a[i+5]++; 1298 a[i+6]++; 1299 a[i+7]++; 1300 a[i+8]++; 1301 a[i+9]++; 1302 a[i+10]++; 1303 a[i+11]++; 1304 a[i+12]++; 1305 a[i+13]++; 1306 a[i+14]++; 1307 a[i+15]++; 1308 } 1309 return new Object[]{ a }; 1310 } 1311 1312 @Test 1313 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1314 IRNode.ADD_VB, "= 0", 1315 IRNode.STORE_VECTOR, "= 0"}, 1316 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1317 applyIfPlatform = {"64-bit", "true"}, 1318 applyIf = {"AlignVector", "false"}) 1319 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1320 IRNode.ADD_VB, "= 0", 1321 IRNode.STORE_VECTOR, "= 0"}, 1322 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1323 applyIfPlatform = {"64-bit", "true"}, 1324 applyIf = {"AlignVector", "true"}) 1325 static Object[] test14bB(byte[] a) { 1326 // non-power-of-2 stride 1327 for (int i = 0; i < RANGE-20; i+=3) { 1328 // Since the stride is shorter than the vector length, there will be always 1329 // partial overlap of loads with previous stores, this leads to failure in 1330 // store-to-load-forwarding -> vectorization not profitable. 1331 a[i+0]++; 1332 a[i+1]++; 1333 a[i+2]++; 1334 a[i+3]++; 1335 a[i+4]++; 1336 a[i+5]++; 1337 a[i+6]++; 1338 a[i+7]++; 1339 a[i+8]++; 1340 a[i+9]++; 1341 a[i+10]++; 1342 a[i+11]++; 1343 a[i+12]++; 1344 a[i+13]++; 1345 a[i+14]++; 1346 a[i+15]++; 1347 } 1348 return new Object[]{ a }; 1349 } 1350 1351 @Test 1352 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1353 IRNode.ADD_VB, "= 0", 1354 IRNode.STORE_VECTOR, "= 0"}, 1355 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1356 applyIfPlatform = {"64-bit", "true"}, 1357 applyIf = {"AlignVector", "false"}) 1358 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1359 IRNode.ADD_VB, "= 0", 1360 IRNode.STORE_VECTOR, "= 0"}, 1361 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1362 applyIfPlatform = {"64-bit", "true"}, 1363 applyIf = {"AlignVector", "true"}) 1364 static Object[] test14cB(byte[] a) { 1365 // non-power-of-2 stride 1366 for (int i = 0; i < RANGE-20; i+=5) { 1367 // Since the stride is shorter than the vector length, there will be always 1368 // partial overlap of loads with previous stores, this leads to failure in 1369 // store-to-load-forwarding -> vectorization not profitable. 1370 a[i+0]++; 1371 a[i+1]++; 1372 a[i+2]++; 1373 a[i+3]++; 1374 a[i+4]++; 1375 a[i+5]++; 1376 a[i+6]++; 1377 a[i+7]++; 1378 a[i+8]++; 1379 a[i+9]++; 1380 a[i+10]++; 1381 a[i+11]++; 1382 a[i+12]++; 1383 a[i+13]++; 1384 a[i+14]++; 1385 a[i+15]++; 1386 } 1387 return new Object[]{ a }; 1388 } 1389 1390 @Test 1391 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1392 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1393 IRNode.STORE_VECTOR, "> 0"}, 1394 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1395 applyIfPlatform = {"64-bit", "true"}, 1396 applyIf = {"AlignVector", "false"}) 1397 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1398 IRNode.ADD_VB, "= 0", 1399 IRNode.STORE_VECTOR, "= 0"}, 1400 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1401 applyIfPlatform = {"64-bit", "true"}, 1402 applyIf = {"AlignVector", "true"}) 1403 static Object[] test14dB(byte[] a) { 1404 // non-power-of-2 stride 1405 for (int i = 0; i < RANGE-20; i+=9) { 1406 a[i+0]++; 1407 a[i+1]++; 1408 a[i+2]++; 1409 a[i+3]++; 1410 a[i+4]++; 1411 a[i+5]++; 1412 a[i+6]++; 1413 a[i+7]++; 1414 } 1415 return new Object[]{ a }; 1416 } 1417 1418 @Test 1419 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1420 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1421 IRNode.STORE_VECTOR, "> 0"}, 1422 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1423 applyIfPlatform = {"64-bit", "true"}, 1424 applyIf = {"AlignVector", "false"}) 1425 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1426 IRNode.ADD_VB, "= 0", 1427 IRNode.STORE_VECTOR, "= 0"}, 1428 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1429 applyIfPlatform = {"64-bit", "true"}, 1430 applyIf = {"AlignVector", "true"}) 1431 static Object[] test14eB(byte[] a) { 1432 // non-power-of-2 stride 1433 for (int i = 0; i < RANGE-32; i+=11) { 1434 a[i+0]++; 1435 a[i+1]++; 1436 a[i+2]++; 1437 a[i+3]++; 1438 a[i+4]++; 1439 a[i+5]++; 1440 a[i+6]++; 1441 a[i+7]++; 1442 } 1443 return new Object[]{ a }; 1444 } 1445 1446 @Test 1447 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1448 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1449 IRNode.STORE_VECTOR, "> 0"}, 1450 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1451 applyIfPlatform = {"64-bit", "true"}, 1452 applyIf = {"AlignVector", "false"}) 1453 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1454 IRNode.ADD_VB, "= 0", 1455 IRNode.STORE_VECTOR, "= 0"}, 1456 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1457 applyIfPlatform = {"64-bit", "true"}, 1458 applyIf = {"AlignVector", "true"}) 1459 static Object[] test14fB(byte[] a) { 1460 // non-power-of-2 stride 1461 for (int i = 0; i < RANGE-40; i+=12) { 1462 a[i+0]++; 1463 a[i+1]++; 1464 a[i+2]++; 1465 a[i+3]++; 1466 a[i+4]++; 1467 a[i+5]++; 1468 a[i+6]++; 1469 a[i+7]++; 1470 } 1471 return new Object[]{ a }; 1472 } 1473 1474 @Test 1475 // IR rules difficult because of modulo wrapping with offset after peeling. 1476 static Object[] test15aB(byte[] a) { 1477 // non-power-of-2 scale 1478 for (int i = 0; i < RANGE/64-20; i++) { 1479 a[53*i+0]++; 1480 a[53*i+1]++; 1481 a[53*i+2]++; 1482 a[53*i+3]++; 1483 a[53*i+4]++; 1484 a[53*i+5]++; 1485 a[53*i+6]++; 1486 a[53*i+7]++; 1487 a[53*i+8]++; 1488 a[53*i+9]++; 1489 a[53*i+10]++; 1490 a[53*i+11]++; 1491 a[53*i+12]++; 1492 a[53*i+13]++; 1493 a[53*i+14]++; 1494 a[53*i+15]++; 1495 } 1496 return new Object[]{ a }; 1497 } 1498 1499 @Test 1500 // IR rules difficult because of modulo wrapping with offset after peeling. 1501 static Object[] test15bB(byte[] a) { 1502 // non-power-of-2 scale 1503 for (int i = 0; i < RANGE/64-20; i++) { 1504 a[25*i+0]++; 1505 a[25*i+1]++; 1506 a[25*i+2]++; 1507 a[25*i+3]++; 1508 a[25*i+4]++; 1509 a[25*i+5]++; 1510 a[25*i+6]++; 1511 a[25*i+7]++; 1512 a[25*i+8]++; 1513 a[25*i+9]++; 1514 a[25*i+10]++; 1515 a[25*i+11]++; 1516 a[25*i+12]++; 1517 a[25*i+13]++; 1518 a[25*i+14]++; 1519 a[25*i+15]++; 1520 } 1521 return new Object[]{ a }; 1522 } 1523 1524 @Test 1525 // IR rules difficult because of modulo wrapping with offset after peeling. 1526 static Object[] test15cB(byte[] a) { 1527 // non-power-of-2 scale 1528 for (int i = 0; i < RANGE/64-20; i++) { 1529 a[19*i+0]++; 1530 a[19*i+1]++; 1531 a[19*i+2]++; 1532 a[19*i+3]++; 1533 a[19*i+4]++; 1534 a[19*i+5]++; 1535 a[19*i+6]++; 1536 a[19*i+7]++; 1537 a[19*i+8]++; 1538 a[19*i+9]++; 1539 a[19*i+10]++; 1540 a[19*i+11]++; 1541 a[19*i+12]++; 1542 a[19*i+13]++; 1543 a[19*i+14]++; 1544 a[19*i+15]++; 1545 } 1546 return new Object[]{ a }; 1547 } 1548 1549 @Test 1550 static Object[] test16a(byte[] a, short[] b) { 1551 // infinite loop issues 1552 for (int i = 0; i < RANGE/2-20; i++) { 1553 a[2*i+0]++; 1554 a[2*i+1]++; 1555 a[2*i+2]++; 1556 a[2*i+3]++; 1557 a[2*i+4]++; 1558 a[2*i+5]++; 1559 a[2*i+6]++; 1560 a[2*i+7]++; 1561 a[2*i+8]++; 1562 a[2*i+9]++; 1563 a[2*i+10]++; 1564 a[2*i+11]++; 1565 a[2*i+12]++; 1566 a[2*i+13]++; 1567 a[2*i+14]++; 1568 1569 b[2*i+0]++; 1570 b[2*i+1]++; 1571 b[2*i+2]++; 1572 b[2*i+3]++; 1573 } 1574 return new Object[]{ a, b }; 1575 } 1576 1577 @Test 1578 static Object[] test16b(byte[] a) { 1579 // infinite loop issues 1580 for (int i = 0; i < RANGE/2-20; i++) { 1581 a[2*i+0]++; 1582 a[2*i+1]++; 1583 a[2*i+2]++; 1584 a[2*i+3]++; 1585 a[2*i+4]++; 1586 a[2*i+5]++; 1587 a[2*i+6]++; 1588 a[2*i+7]++; 1589 a[2*i+8]++; 1590 a[2*i+9]++; 1591 a[2*i+10]++; 1592 a[2*i+11]++; 1593 a[2*i+12]++; 1594 a[2*i+13]++; 1595 a[2*i+14]++; 1596 } 1597 return new Object[]{ a }; 1598 } 1599 1600 @Test 1601 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 1602 IRNode.ADD_VL, "> 0", 1603 IRNode.STORE_VECTOR, "> 0"}, 1604 applyIfPlatform = {"64-bit", "true"}, 1605 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1606 static Object[] test17a(long[] a) { 1607 // Unsafe: vectorizes with profiling (not xcomp) 1608 for (int i = 0; i < RANGE; i++) { 1609 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i; 1610 long v = UNSAFE.getLongUnaligned(a, adr); 1611 UNSAFE.putLongUnaligned(a, adr, v + 1); 1612 } 1613 return new Object[]{ a }; 1614 } 1615 1616 @Test 1617 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs. 1618 static Object[] test17b(long[] a) { 1619 // Not alignable 1620 for (int i = 0; i < RANGE-1; i++) { 1621 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1; 1622 long v = UNSAFE.getLongUnaligned(a, adr); 1623 UNSAFE.putLongUnaligned(a, adr, v + 1); 1624 } 1625 return new Object[]{ a }; 1626 } 1627 1628 @Test 1629 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1630 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1631 IRNode.STORE_VECTOR, "> 0"}, 1632 applyIf = {"MaxVectorSize", ">=32"}, 1633 applyIfPlatform = {"64-bit", "true"}, 1634 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1635 static Object[] test17c(long[] a) { 1636 // Unsafe: aligned vectorizes 1637 for (int i = 0; i < RANGE-1; i+=4) { 1638 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i; 1639 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1640 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1641 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1642 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1643 } 1644 return new Object[]{ a }; 1645 } 1646 1647 @Test 1648 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1649 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1650 IRNode.STORE_VECTOR, "> 0"}, 1651 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"}, 1652 applyIfPlatform = {"64-bit", "true"}, 1653 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"}) 1654 // Ensure vector width is large enough to fit 64 byte for longs: 1655 // The offsets are: 25, 33, 57, 65 1656 // In modulo 32: 25, 1, 25, 1 -> does not vectorize 1657 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes 1658 // This problem is because we compute modulo vector width in memory_alignment. 1659 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", 1660 IRNode.ADD_VL, "= 0", 1661 IRNode.STORE_VECTOR, "= 0"}, 1662 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1663 applyIfPlatform = {"64-bit", "true"}, 1664 applyIf = {"AlignVector", "true"}) 1665 static Object[] test17d(long[] a) { 1666 // Not alignable 1667 for (int i = 0; i < RANGE-1; i+=4) { 1668 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1; 1669 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1670 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1671 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1672 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1673 } 1674 return new Object[]{ a }; 1675 } 1676 1677 @Test 1678 static Object[] test18a(byte[] a, int[] b) { 1679 // scale = 0 --> no iv 1680 for (int i = 0; i < RANGE; i++) { 1681 a[0] = 1; 1682 b[i] = 2; 1683 a[1] = 1; 1684 } 1685 return new Object[]{ a, b }; 1686 } 1687 1688 @Test 1689 static Object[] test18b(byte[] a, int[] b) { 1690 // scale = 0 --> no iv 1691 for (int i = 0; i < RANGE; i++) { 1692 a[1] = 1; 1693 b[i] = 2; 1694 a[2] = 1; 1695 } 1696 return new Object[]{ a, b }; 1697 } 1698 1699 @Test 1700 static Object[] test19(int[] a, int[] b) { 1701 for (int i = 5000; i > 0; i--) { 1702 a[RANGE_FINAL - i] = b[RANGE_FINAL - i]; 1703 } 1704 return new Object[]{ a, b }; 1705 } 1706 1707 @Test 1708 static Object[] test20(byte[] a) { 1709 // Example where it is easy to pass alignment check, 1710 // but used to fail the alignment calculation 1711 for (int i = 1; i < RANGE/2-50; i++) { 1712 a[2*i+0+30]++; 1713 a[2*i+1+30]++; 1714 a[2*i+2+30]++; 1715 a[2*i+3+30]++; 1716 } 1717 return new Object[]{ a }; 1718 } 1719 }