1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package compiler.loopopts.superword; 25 26 import compiler.lib.ir_framework.*; 27 import jdk.test.lib.Utils; 28 import jdk.test.whitebox.WhiteBox; 29 import jdk.internal.misc.Unsafe; 30 import java.lang.reflect.Array; 31 import java.util.Map; 32 import java.util.HashMap; 33 import java.util.Random; 34 import java.nio.ByteOrder; 35 36 /* 37 * @test id=NoAlignVector 38 * @bug 8310190 39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 40 * @modules java.base/jdk.internal.misc 41 * @library /test/lib / 42 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector 43 */ 44 45 /* 46 * @test id=AlignVector 47 * @bug 8310190 48 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 49 * @modules java.base/jdk.internal.misc 50 * @library /test/lib / 51 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector 52 */ 53 54 /* 55 * @test id=VerifyAlignVector 56 * @bug 8310190 57 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 58 * @modules java.base/jdk.internal.misc 59 * @library /test/lib / 60 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector 61 */ 62 63 /* 64 * @test id=NoAlignVector-COH 65 * @bug 8310190 66 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 67 * @modules java.base/jdk.internal.misc 68 * @library /test/lib / 69 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH 70 */ 71 72 /* 73 * @test id=VerifyAlignVector-COH 74 * @bug 8310190 75 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 76 * @modules java.base/jdk.internal.misc 77 * @library /test/lib / 78 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH 79 */ 80 81 public class TestAlignVector { 82 static int RANGE = 1024*8; 83 static int RANGE_FINAL = 1024*8; 84 private static final Unsafe UNSAFE = Unsafe.getUnsafe(); 85 private static final Random RANDOM = Utils.getRandomInstance(); 86 87 // Inputs 88 byte[] aB; 89 byte[] bB; 90 byte mB = (byte)31; 91 short[] aS; 92 short[] bS; 93 short mS = (short)0xF0F0; 94 int[] aI; 95 int[] bI; 96 int mI = 0xF0F0F0F0; 97 long[] aL; 98 long[] bL; 99 long mL = 0xF0F0F0F0F0F0F0F0L; 100 101 // List of tests 102 Map<String,TestFunction> tests = new HashMap<String,TestFunction>(); 103 104 // List of gold, the results from the first run before compilation 105 Map<String,Object[]> golds = new HashMap<String,Object[]>(); 106 107 interface TestFunction { 108 Object[] run(); 109 } 110 111 public static void main(String[] args) { 112 TestFramework framework = new TestFramework(TestAlignVector.class); 113 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 114 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250"); 115 116 switch (args[0]) { 117 case "NoAlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } 118 case "AlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } 119 case "VerifyAlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } 121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 123 } 124 framework.start(); 125 } 126 127 public TestAlignVector() { 128 // Generate input once 129 aB = generateB(); 130 bB = generateB(); 131 aS = generateS(); 132 bS = generateS(); 133 aI = generateI(); 134 bI = generateI(); 135 aL = generateL(); 136 bL = generateL(); 137 138 // Add all tests to list 139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); }); 140 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); }); 141 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); }); 142 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); }); 143 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); }); 144 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); }); 145 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); }); 146 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); }); 147 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); }); 148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); }); 149 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); }); 150 151 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); }); 152 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); }); 153 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); }); 154 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); }); 155 156 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); }); 157 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); }); 158 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); }); 159 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); }); 160 161 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); }); 162 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); }); 163 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); }); 164 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); }); 165 166 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); }); 167 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); }); 168 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); }); 169 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); }); 170 171 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); }); 172 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); }); 173 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); }); 174 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); }); 175 176 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); }); 177 178 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); }); 179 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); }); 180 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); }); 181 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 182 183 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); }); 184 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); }); 185 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); }); 186 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 187 188 tests.put("test14aB", () -> { return test14aB(aB.clone()); }); 189 tests.put("test14bB", () -> { return test14bB(aB.clone()); }); 190 tests.put("test14cB", () -> { return test14cB(aB.clone()); }); 191 tests.put("test14dB", () -> { return test14dB(aB.clone()); }); 192 tests.put("test14eB", () -> { return test14eB(aB.clone()); }); 193 tests.put("test14fB", () -> { return test14fB(aB.clone()); }); 194 195 tests.put("test15aB", () -> { return test15aB(aB.clone()); }); 196 tests.put("test15bB", () -> { return test15bB(aB.clone()); }); 197 tests.put("test15cB", () -> { return test15cB(aB.clone()); }); 198 199 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); }); 200 tests.put("test16b", () -> { return test16b(aB.clone()); }); 201 202 tests.put("test17a", () -> { return test17a(aL.clone()); }); 203 tests.put("test17b", () -> { return test17b(aL.clone()); }); 204 tests.put("test17c", () -> { return test17c(aL.clone()); }); 205 tests.put("test17d", () -> { return test17d(aL.clone()); }); 206 207 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); }); 208 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); }); 209 210 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); }); 211 tests.put("test20", () -> { return test20(aB.clone()); }); 212 213 // Compute gold value for all test methods before compilation 214 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 215 String name = entry.getKey(); 216 TestFunction test = entry.getValue(); 217 Object[] gold = test.run(); 218 golds.put(name, gold); 219 } 220 } 221 222 @Warmup(100) 223 @Run(test = {"test0", 224 "test1", 225 "test2", 226 "test3", 227 "test4", 228 "test5", 229 "test6", 230 "test7", 231 "test8", 232 "test9", 233 "test10a", 234 "test10b", 235 "test10c", 236 "test10d", 237 "test11aB", 238 "test11aS", 239 "test11aI", 240 "test11aL", 241 "test11bB", 242 "test11bS", 243 "test11bI", 244 "test11bL", 245 "test11cB", 246 "test11cS", 247 "test11cI", 248 "test11cL", 249 "test11dB", 250 "test11dS", 251 "test11dI", 252 "test11dL", 253 "test12", 254 "test13aIL", 255 "test13aIB", 256 "test13aIS", 257 "test13aBSIL", 258 "test13bIL", 259 "test13bIB", 260 "test13bIS", 261 "test13bBSIL", 262 "test14aB", 263 "test14bB", 264 "test14cB", 265 "test14dB", 266 "test14eB", 267 "test14fB", 268 "test15aB", 269 "test15bB", 270 "test15cB", 271 "test16a", 272 "test16b", 273 "test17a", 274 "test17b", 275 "test17c", 276 "test17d", 277 "test18a", 278 "test18b", 279 "test19", 280 "test20"}) 281 public void runTests() { 282 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 283 String name = entry.getKey(); 284 TestFunction test = entry.getValue(); 285 // Recall gold value from before compilation 286 Object[] gold = golds.get(name); 287 // Compute new result 288 Object[] result = test.run(); 289 // Compare gold and new result 290 verify(name, gold, result); 291 } 292 } 293 294 static byte[] generateB() { 295 byte[] a = new byte[RANGE]; 296 for (int i = 0; i < a.length; i++) { 297 a[i] = (byte)RANDOM.nextInt(); 298 } 299 return a; 300 } 301 302 static short[] generateS() { 303 short[] a = new short[RANGE]; 304 for (int i = 0; i < a.length; i++) { 305 a[i] = (short)RANDOM.nextInt(); 306 } 307 return a; 308 } 309 310 static int[] generateI() { 311 int[] a = new int[RANGE]; 312 for (int i = 0; i < a.length; i++) { 313 a[i] = RANDOM.nextInt(); 314 } 315 return a; 316 } 317 318 static long[] generateL() { 319 long[] a = new long[RANGE]; 320 for (int i = 0; i < a.length; i++) { 321 a[i] = RANDOM.nextLong(); 322 } 323 return a; 324 } 325 326 static void verify(String name, Object[] gold, Object[] result) { 327 if (gold.length != result.length) { 328 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + 329 gold.length + ", result.length = " + result.length); 330 } 331 for (int i = 0; i < gold.length; i++) { 332 Object g = gold[i]; 333 Object r = result[i]; 334 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) { 335 throw new RuntimeException("verify " + name + ": must both be array of same type:" + 336 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 337 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 338 } 339 if (g == r) { 340 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" + 341 " gold[" + i + "] == result[" + i + "]"); 342 } 343 if (Array.getLength(g) != Array.getLength(r)) { 344 throw new RuntimeException("verify " + name + ": arrays must have same length:" + 345 " gold[" + i + "].length = " + Array.getLength(g) + 346 " result[" + i + "].length = " + Array.getLength(r)); 347 } 348 Class c = g.getClass().getComponentType(); 349 if (c == byte.class) { 350 verifyB(name, i, (byte[])g, (byte[])r); 351 } else if (c == short.class) { 352 verifyS(name, i, (short[])g, (short[])r); 353 } else if (c == int.class) { 354 verifyI(name, i, (int[])g, (int[])r); 355 } else if (c == long.class) { 356 verifyL(name, i, (long[])g, (long[])r); 357 } else { 358 throw new RuntimeException("verify " + name + ": array type not supported for verify:" + 359 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 360 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 361 } 362 } 363 } 364 365 static void verifyB(String name, int i, byte[] g, byte[] r) { 366 for (int j = 0; j < g.length; j++) { 367 if (g[j] != r[j]) { 368 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 369 " gold[" + i + "][" + j + "] = " + g[j] + 370 " result[" + i + "][" + j + "] = " + r[j]); 371 } 372 } 373 } 374 375 static void verifyS(String name, int i, short[] g, short[] r) { 376 for (int j = 0; j < g.length; j++) { 377 if (g[j] != r[j]) { 378 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 379 " gold[" + i + "][" + j + "] = " + g[j] + 380 " result[" + i + "][" + j + "] = " + r[j]); 381 } 382 } 383 } 384 385 static void verifyI(String name, int i, int[] g, int[] r) { 386 for (int j = 0; j < g.length; j++) { 387 if (g[j] != r[j]) { 388 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 389 " gold[" + i + "][" + j + "] = " + g[j] + 390 " result[" + i + "][" + j + "] = " + r[j]); 391 } 392 } 393 } 394 395 static void verifyL(String name, int i, long[] g, long[] r) { 396 for (int j = 0; j < g.length; j++) { 397 if (g[j] != r[j]) { 398 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 399 " gold[" + i + "][" + j + "] = " + g[j] + 400 " result[" + i + "][" + j + "] = " + r[j]); 401 } 402 } 403 } 404 405 @Test 406 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 407 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 408 IRNode.STORE_VECTOR, "> 0"}, 409 applyIf = {"MaxVectorSize", ">=8"}, 410 applyIfPlatform = {"64-bit", "true"}, 411 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 412 static Object[] test0(byte[] a, byte[] b, byte mask) { 413 for (int i = 0; i < RANGE; i+=8) { 414 // Safe to vectorize with AlignVector 415 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 416 b[i+1] = (byte)(a[i+1] & mask); 417 b[i+2] = (byte)(a[i+2] & mask); 418 b[i+3] = (byte)(a[i+3] & mask); 419 } 420 return new Object[]{ a, b }; 421 } 422 423 @Test 424 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 425 IRNode.AND_VB, "> 0", 426 IRNode.STORE_VECTOR, "> 0"}, 427 applyIfPlatform = {"64-bit", "true"}, 428 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 429 static Object[] test1(byte[] a, byte[] b, byte mask) { 430 for (int i = 0; i < RANGE; i+=8) { 431 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8 432 b[i+1] = (byte)(a[i+1] & mask); 433 b[i+2] = (byte)(a[i+2] & mask); 434 b[i+3] = (byte)(a[i+3] & mask); 435 b[i+4] = (byte)(a[i+4] & mask); 436 b[i+5] = (byte)(a[i+5] & mask); 437 b[i+6] = (byte)(a[i+6] & mask); 438 b[i+7] = (byte)(a[i+7] & mask); 439 } 440 return new Object[]{ a, b }; 441 } 442 443 @Test 444 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 445 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 446 IRNode.STORE_VECTOR, "> 0"}, 447 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 448 applyIfPlatform = {"64-bit", "true"}, 449 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 450 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 451 IRNode.AND_VB, "= 0", 452 IRNode.STORE_VECTOR, "= 0"}, 453 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 454 applyIfPlatform = {"64-bit", "true"}, 455 applyIf = {"AlignVector", "true"}) 456 static Object[] test2(byte[] a, byte[] b, byte mask) { 457 for (int i = 0; i < RANGE; i+=8) { 458 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 459 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3 460 b[i+4] = (byte)(a[i+4] & mask); 461 b[i+5] = (byte)(a[i+5] & mask); 462 b[i+6] = (byte)(a[i+6] & mask); 463 } 464 return new Object[]{ a, b }; 465 } 466 467 @Test 468 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 469 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 470 IRNode.STORE_VECTOR, "> 0"}, 471 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 472 applyIfPlatform = {"64-bit", "true"}, 473 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 474 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 475 IRNode.AND_VB, "= 0", 476 IRNode.STORE_VECTOR, "= 0"}, 477 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 478 applyIfPlatform = {"64-bit", "true"}, 479 applyIf = {"AlignVector", "true"}) 480 static Object[] test3(byte[] a, byte[] b, byte mask) { 481 for (int i = 0; i < RANGE; i+=8) { 482 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 483 484 // Problematic for AlignVector 485 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0 486 487 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes 488 b[i+4] = (byte)(a[i+4] & mask); 489 b[i+5] = (byte)(a[i+5] & mask); 490 b[i+6] = (byte)(a[i+6] & mask); 491 } 492 return new Object[]{ a, b }; 493 } 494 495 @Test 496 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 497 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0", 498 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 499 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0", 500 IRNode.STORE_VECTOR, "> 0"}, 501 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 502 applyIfPlatform = {"64-bit", "true"}, 503 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 504 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 505 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 506 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 507 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 508 IRNode.STORE_VECTOR, "> 0"}, 509 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 510 applyIfPlatform = {"64-bit", "true"}, 511 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"}) 512 static Object[] test4(byte[] a, byte[] b, byte mask) { 513 for (int i = 0; i < RANGE/16; i++) { 514 // Problematic for AlignVector 515 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned 516 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask); 517 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask); 518 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask); 519 520 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned 521 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask); 522 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask); 523 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask); 524 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask); 525 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask); 526 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask); 527 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask); 528 } 529 return new Object[]{ a, b }; 530 } 531 532 @Test 533 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 534 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 535 IRNode.STORE_VECTOR, "> 0"}, 536 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 537 applyIfPlatform = {"64-bit", "true"}, 538 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 539 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 540 IRNode.AND_VB, "= 0", 541 IRNode.STORE_VECTOR, "= 0"}, 542 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 543 applyIfPlatform = {"64-bit", "true"}, 544 applyIf = {"AlignVector", "true"}) 545 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) { 546 for (int i = 0; i < RANGE; i+=8) { 547 // Cannot align with AlignVector because of invariant 548 b[i+inv+0] = (byte)(a[i+inv+0] & mask); 549 550 b[i+inv+3] = (byte)(a[i+inv+3] & mask); 551 b[i+inv+4] = (byte)(a[i+inv+4] & mask); 552 b[i+inv+5] = (byte)(a[i+inv+5] & mask); 553 b[i+inv+6] = (byte)(a[i+inv+6] & mask); 554 } 555 return new Object[]{ a, b }; 556 } 557 558 @Test 559 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 560 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 561 IRNode.STORE_VECTOR, "> 0"}, 562 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 563 applyIfPlatform = {"64-bit", "true"}, 564 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 565 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 566 IRNode.AND_VB, "= 0", 567 IRNode.STORE_VECTOR, "= 0"}, 568 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 569 applyIfPlatform = {"64-bit", "true"}, 570 applyIf = {"AlignVector", "true"}) 571 static Object[] test6(byte[] a, byte[] b, byte mask) { 572 for (int i = 0; i < RANGE/8; i+=2) { 573 // Cannot align with AlignVector because offset is odd 574 b[i*4+0] = (byte)(a[i*4+0] & mask); 575 576 b[i*4+3] = (byte)(a[i*4+3] & mask); 577 b[i*4+4] = (byte)(a[i*4+4] & mask); 578 b[i*4+5] = (byte)(a[i*4+5] & mask); 579 b[i*4+6] = (byte)(a[i*4+6] & mask); 580 } 581 return new Object[]{ a, b }; 582 } 583 584 @Test 585 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 586 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 587 IRNode.STORE_VECTOR, "> 0"}, 588 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}, 589 applyIfPlatform = {"64-bit", "true"}, 590 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 591 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 592 IRNode.AND_VS, "= 0", 593 IRNode.STORE_VECTOR, "= 0"}, 594 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 595 applyIfPlatform = {"64-bit", "true"}, 596 applyIf = {"AlignVector", "true"}) 597 static Object[] test7(short[] a, short[] b, short mask) { 598 for (int i = 0; i < RANGE/8; i+=2) { 599 // Cannot align with AlignVector because offset is odd 600 b[i*4+0] = (short)(a[i*4+0] & mask); 601 602 b[i*4+3] = (short)(a[i*4+3] & mask); 603 b[i*4+4] = (short)(a[i*4+4] & mask); 604 b[i*4+5] = (short)(a[i*4+5] & mask); 605 b[i*4+6] = (short)(a[i*4+6] & mask); 606 } 607 return new Object[]{ a, b }; 608 } 609 610 @Test 611 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 612 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 613 IRNode.STORE_VECTOR, "> 0"}, 614 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 615 applyIfPlatform = {"64-bit", "true"}, 616 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 617 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 618 IRNode.AND_VB, "= 0", 619 IRNode.STORE_VECTOR, "= 0"}, 620 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 621 applyIfPlatform = {"64-bit", "true"}, 622 applyIf = {"AlignVector", "true"}) 623 static Object[] test8(byte[] a, byte[] b, byte mask, int init) { 624 for (int i = init; i < RANGE; i+=8) { 625 // Cannot align with AlignVector because of invariant (variable init becomes invar) 626 b[i+0] = (byte)(a[i+0] & mask); 627 628 b[i+3] = (byte)(a[i+3] & mask); 629 b[i+4] = (byte)(a[i+4] & mask); 630 b[i+5] = (byte)(a[i+5] & mask); 631 b[i+6] = (byte)(a[i+6] & mask); 632 } 633 return new Object[]{ a, b }; 634 } 635 636 @Test 637 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 638 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 639 IRNode.STORE_VECTOR, "> 0"}, 640 applyIf = {"MaxVectorSize", ">=8"}, 641 applyIfPlatform = {"64-bit", "true"}, 642 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 643 static Object[] test9(byte[] a, byte[] b, byte mask) { 644 // known non-zero init value does not affect offset, but has implicit effect on iv 645 for (int i = 13; i < RANGE-8; i+=8) { 646 b[i+0] = (byte)(a[i+0] & mask); 647 648 b[i+3] = (byte)(a[i+3] & mask); 649 b[i+4] = (byte)(a[i+4] & mask); 650 b[i+5] = (byte)(a[i+5] & mask); 651 b[i+6] = (byte)(a[i+6] & mask); 652 } 653 return new Object[]{ a, b }; 654 } 655 656 @Test 657 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 658 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 659 IRNode.STORE_VECTOR, "> 0"}, 660 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 661 applyIfPlatform = {"64-bit", "true"}, 662 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 663 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 664 IRNode.AND_VB, "= 0", 665 IRNode.STORE_VECTOR, "= 0"}, 666 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 667 applyIfPlatform = {"64-bit", "true"}, 668 applyIf = {"AlignVector", "true"}) 669 static Object[] test10a(byte[] a, byte[] b, byte mask) { 670 // This is not alignable with pre-loop, because of odd init. 671 for (int i = 3; i < RANGE-8; i+=8) { 672 b[i+0] = (byte)(a[i+0] & mask); 673 b[i+1] = (byte)(a[i+1] & mask); 674 b[i+2] = (byte)(a[i+2] & mask); 675 b[i+3] = (byte)(a[i+3] & mask); 676 } 677 return new Object[]{ a, b }; 678 } 679 680 @Test 681 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 682 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 683 IRNode.STORE_VECTOR, "> 0"}, 684 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 685 applyIfPlatform = {"64-bit", "true"}, 686 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 687 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 688 IRNode.AND_VB, "= 0", 689 IRNode.STORE_VECTOR, "= 0"}, 690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 691 applyIfPlatform = {"64-bit", "true"}, 692 applyIf = {"AlignVector", "true"}) 693 static Object[] test10b(byte[] a, byte[] b, byte mask) { 694 // This is not alignable with pre-loop, because of odd init. 695 // Seems not correctly handled. 696 for (int i = 13; i < RANGE-8; i+=8) { 697 b[i+0] = (byte)(a[i+0] & mask); 698 b[i+1] = (byte)(a[i+1] & mask); 699 b[i+2] = (byte)(a[i+2] & mask); 700 b[i+3] = (byte)(a[i+3] & mask); 701 } 702 return new Object[]{ a, b }; 703 } 704 705 @Test 706 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 707 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 708 IRNode.STORE_VECTOR, "> 0"}, 709 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 710 applyIfPlatform = {"64-bit", "true"}, 711 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 712 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 713 IRNode.AND_VS, "= 0", 714 IRNode.STORE_VECTOR, "= 0"}, 715 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 716 applyIfPlatform = {"64-bit", "true"}, 717 applyIf = {"AlignVector", "true"}) 718 static Object[] test10c(short[] a, short[] b, short mask) { 719 // This is not alignable with pre-loop, because of odd init. 720 // Seems not correctly handled with MaxVectorSize >= 32. 721 for (int i = 13; i < RANGE-8; i+=8) { 722 b[i+0] = (short)(a[i+0] & mask); 723 b[i+1] = (short)(a[i+1] & mask); 724 b[i+2] = (short)(a[i+2] & mask); 725 b[i+3] = (short)(a[i+3] & mask); 726 } 727 return new Object[]{ a, b }; 728 } 729 730 @Test 731 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 732 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 733 IRNode.STORE_VECTOR, "> 0"}, 734 applyIf = {"MaxVectorSize", ">=16"}, 735 applyIfPlatform = {"64-bit", "true"}, 736 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 737 static Object[] test10d(short[] a, short[] b, short mask) { 738 for (int i = 13; i < RANGE-16; i+=8) { 739 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16 740 b[i+0+3] = (short)(a[i+0+3] & mask); 741 b[i+1+3] = (short)(a[i+1+3] & mask); 742 b[i+2+3] = (short)(a[i+2+3] & mask); 743 b[i+3+3] = (short)(a[i+3+3] & mask); 744 } 745 return new Object[]{ a, b }; 746 } 747 748 @Test 749 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 750 IRNode.AND_VB, "> 0", 751 IRNode.STORE_VECTOR, "> 0"}, 752 applyIfPlatform = {"64-bit", "true"}, 753 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 754 static Object[] test11aB(byte[] a, byte[] b, byte mask) { 755 for (int i = 0; i < RANGE; i++) { 756 // always alignable 757 b[i+0] = (byte)(a[i+0] & mask); 758 } 759 return new Object[]{ a, b }; 760 } 761 762 @Test 763 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 764 IRNode.AND_VS, "> 0", 765 IRNode.STORE_VECTOR, "> 0"}, 766 applyIfPlatform = {"64-bit", "true"}, 767 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 768 static Object[] test11aS(short[] a, short[] b, short mask) { 769 for (int i = 0; i < RANGE; i++) { 770 // always alignable 771 b[i+0] = (short)(a[i+0] & mask); 772 } 773 return new Object[]{ a, b }; 774 } 775 776 @Test 777 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 778 IRNode.AND_VI, "> 0", 779 IRNode.STORE_VECTOR, "> 0"}, 780 applyIfPlatform = {"64-bit", "true"}, 781 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 782 static Object[] test11aI(int[] a, int[] b, int mask) { 783 for (int i = 0; i < RANGE; i++) { 784 // always alignable 785 b[i+0] = (int)(a[i+0] & mask); 786 } 787 return new Object[]{ a, b }; 788 } 789 790 @Test 791 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 792 IRNode.AND_VL, "> 0", 793 IRNode.STORE_VECTOR, "> 0"}, 794 applyIfPlatform = {"64-bit", "true"}, 795 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 796 static Object[] test11aL(long[] a, long[] b, long mask) { 797 for (int i = 0; i < RANGE; i++) { 798 // always alignable 799 b[i+0] = (long)(a[i+0] & mask); 800 } 801 return new Object[]{ a, b }; 802 } 803 804 @Test 805 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 806 IRNode.AND_VB, "> 0", 807 IRNode.STORE_VECTOR, "> 0"}, 808 applyIfPlatform = {"64-bit", "true"}, 809 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 810 static Object[] test11bB(byte[] a, byte[] b, byte mask) { 811 for (int i = 1; i < RANGE; i++) { 812 // always alignable 813 b[i+0] = (byte)(a[i+0] & mask); 814 } 815 return new Object[]{ a, b }; 816 } 817 818 @Test 819 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 820 IRNode.AND_VS, "> 0", 821 IRNode.STORE_VECTOR, "> 0"}, 822 applyIfPlatform = {"64-bit", "true"}, 823 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 824 static Object[] test11bS(short[] a, short[] b, short mask) { 825 for (int i = 1; i < RANGE; i++) { 826 // always alignable 827 b[i+0] = (short)(a[i+0] & mask); 828 } 829 return new Object[]{ a, b }; 830 } 831 832 @Test 833 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 834 IRNode.AND_VI, "> 0", 835 IRNode.STORE_VECTOR, "> 0"}, 836 applyIfPlatform = {"64-bit", "true"}, 837 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 838 static Object[] test11bI(int[] a, int[] b, int mask) { 839 for (int i = 1; i < RANGE; i++) { 840 // always alignable 841 b[i+0] = (int)(a[i+0] & mask); 842 } 843 return new Object[]{ a, b }; 844 } 845 846 @Test 847 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 848 IRNode.AND_VL, "> 0", 849 IRNode.STORE_VECTOR, "> 0"}, 850 applyIfPlatform = {"64-bit", "true"}, 851 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 852 static Object[] test11bL(long[] a, long[] b, long mask) { 853 for (int i = 1; i < RANGE; i++) { 854 // always alignable 855 b[i+0] = (long)(a[i+0] & mask); 856 } 857 return new Object[]{ a, b }; 858 } 859 860 @Test 861 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 862 IRNode.AND_VB, "> 0", 863 IRNode.STORE_VECTOR, "> 0"}, 864 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 865 applyIfPlatform = {"64-bit", "true"}, 866 applyIf = {"AlignVector", "false"}) 867 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 868 IRNode.AND_VB, "= 0", 869 IRNode.STORE_VECTOR, "= 0"}, 870 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 871 applyIfPlatform = {"64-bit", "true"}, 872 applyIf = {"AlignVector", "true"}) 873 static Object[] test11cB(byte[] a, byte[] b, byte mask) { 874 for (int i = 1; i < RANGE-1; i++) { 875 // 1 byte offset -> not alignable with AlignVector 876 b[i+0] = (byte)(a[i+1] & mask); 877 } 878 return new Object[]{ a, b }; 879 } 880 881 @Test 882 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 883 IRNode.AND_VS, "> 0", 884 IRNode.STORE_VECTOR, "> 0"}, 885 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 886 applyIfPlatform = {"64-bit", "true"}, 887 applyIf = {"AlignVector", "false"}) 888 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 889 IRNode.AND_VS, "= 0", 890 IRNode.STORE_VECTOR, "= 0"}, 891 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 892 applyIfPlatform = {"64-bit", "true"}, 893 applyIf = {"AlignVector", "true"}) 894 static Object[] test11cS(short[] a, short[] b, short mask) { 895 for (int i = 1; i < RANGE-1; i++) { 896 // 2 byte offset -> not alignable with AlignVector 897 b[i+0] = (short)(a[i+1] & mask); 898 } 899 return new Object[]{ a, b }; 900 } 901 902 @Test 903 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 904 IRNode.AND_VI, "> 0", 905 IRNode.STORE_VECTOR, "> 0"}, 906 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 907 applyIfPlatform = {"64-bit", "true"}, 908 applyIf = {"AlignVector", "false"}) 909 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", 910 IRNode.AND_VI, "= 0", 911 IRNode.STORE_VECTOR, "= 0"}, 912 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 913 applyIfPlatform = {"64-bit", "true"}, 914 applyIf = {"AlignVector", "true"}) 915 static Object[] test11cI(int[] a, int[] b, int mask) { 916 for (int i = 1; i < RANGE-1; i++) { 917 // 4 byte offset -> not alignable with AlignVector 918 b[i+0] = (int)(a[i+1] & mask); 919 } 920 return new Object[]{ a, b }; 921 } 922 923 @Test 924 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 925 IRNode.AND_VL, "> 0", 926 IRNode.STORE_VECTOR, "> 0"}, 927 applyIfPlatform = {"64-bit", "true"}, 928 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 929 static Object[] test11cL(long[] a, long[] b, long mask) { 930 for (int i = 1; i < RANGE-1; i++) { 931 // always alignable (8 byte offset) 932 b[i+0] = (long)(a[i+1] & mask); 933 } 934 return new Object[]{ a, b }; 935 } 936 937 @Test 938 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 939 IRNode.AND_VB, "> 0", 940 IRNode.STORE_VECTOR, "> 0"}, 941 applyIfPlatform = {"64-bit", "true"}, 942 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 943 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) { 944 for (int i = 0; i < RANGE; i++) { 945 b[i+0+invar] = (byte)(a[i+0+invar] & mask); 946 } 947 return new Object[]{ a, b }; 948 } 949 950 @Test 951 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 952 IRNode.AND_VS, "> 0", 953 IRNode.STORE_VECTOR, "> 0"}, 954 applyIfPlatform = {"64-bit", "true"}, 955 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 956 static Object[] test11dS(short[] a, short[] b, short mask, int invar) { 957 for (int i = 0; i < RANGE; i++) { 958 b[i+0+invar] = (short)(a[i+0+invar] & mask); 959 } 960 return new Object[]{ a, b }; 961 } 962 963 @Test 964 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 965 IRNode.AND_VI, "> 0", 966 IRNode.STORE_VECTOR, "> 0"}, 967 applyIfPlatform = {"64-bit", "true"}, 968 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 969 static Object[] test11dI(int[] a, int[] b, int mask, int invar) { 970 for (int i = 0; i < RANGE; i++) { 971 b[i+0+invar] = (int)(a[i+0+invar] & mask); 972 } 973 return new Object[]{ a, b }; 974 } 975 976 @Test 977 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 978 IRNode.AND_VL, "> 0", 979 IRNode.STORE_VECTOR, "> 0"}, 980 applyIfPlatform = {"64-bit", "true"}, 981 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 982 static Object[] test11dL(long[] a, long[] b, long mask, int invar) { 983 for (int i = 0; i < RANGE; i++) { 984 b[i+0+invar] = (long)(a[i+0+invar] & mask); 985 } 986 return new Object[]{ a, b }; 987 } 988 989 @Test 990 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 991 IRNode.AND_VB, "= 0", 992 IRNode.STORE_VECTOR, "= 0"}, 993 applyIfPlatform = {"64-bit", "true"}, 994 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 995 static Object[] test12(byte[] a, byte[] b, byte mask) { 996 for (int i = 0; i < RANGE/16; i++) { 997 // Currently does not vectorize at all 998 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask); 999 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask); 1000 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask); 1001 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask); 1002 } 1003 return new Object[]{ a, b }; 1004 } 1005 1006 @Test 1007 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1008 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1009 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1010 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1011 IRNode.STORE_VECTOR, "> 0"}, 1012 applyIfPlatform = {"64-bit", "true"}, 1013 applyIfCPUFeatureOr = {"avx2", "true"}) 1014 // require avx to ensure vectors are larger than what unrolling produces 1015 static Object[] test13aIL(int[] a, long[] b) { 1016 for (int i = 0; i < RANGE; i++) { 1017 a[i]++; 1018 b[i]++; 1019 } 1020 return new Object[]{ a, b }; 1021 } 1022 1023 @Test 1024 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1025 IRNode.LOAD_VECTOR_I, "> 0", 1026 IRNode.ADD_VB, "> 0", 1027 IRNode.ADD_VI, "> 0", 1028 IRNode.STORE_VECTOR, "> 0"}, 1029 applyIfPlatform = {"64-bit", "true"}, 1030 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1031 static Object[] test13aIB(int[] a, byte[] b) { 1032 for (int i = 0; i < RANGE; i++) { 1033 a[i]++; 1034 b[i]++; 1035 } 1036 return new Object[]{ a, b }; 1037 } 1038 1039 @Test 1040 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1041 IRNode.LOAD_VECTOR_S, "> 0", 1042 IRNode.ADD_VI, "> 0", 1043 IRNode.ADD_VS, "> 0", 1044 IRNode.STORE_VECTOR, "> 0"}, 1045 applyIfPlatform = {"64-bit", "true"}, 1046 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1047 static Object[] test13aIS(int[] a, short[] b) { 1048 for (int i = 0; i < RANGE; i++) { 1049 a[i]++; 1050 b[i]++; 1051 } 1052 return new Object[]{ a, b }; 1053 } 1054 1055 @Test 1056 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1057 IRNode.LOAD_VECTOR_S, "> 0", 1058 IRNode.LOAD_VECTOR_I, "> 0", 1059 IRNode.LOAD_VECTOR_L, "> 0", 1060 IRNode.ADD_VB, "> 0", 1061 IRNode.ADD_VS, "> 0", 1062 IRNode.ADD_VI, "> 0", 1063 IRNode.ADD_VL, "> 0", 1064 IRNode.STORE_VECTOR, "> 0"}, 1065 applyIfPlatform = {"64-bit", "true"}, 1066 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1067 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) { 1068 for (int i = 0; i < RANGE; i++) { 1069 a[i]++; 1070 b[i]++; 1071 c[i]++; 1072 d[i]++; 1073 } 1074 return new Object[]{ a, b, c, d }; 1075 } 1076 1077 @Test 1078 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1079 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1080 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1081 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1082 IRNode.STORE_VECTOR, "> 0"}, 1083 applyIfPlatform = {"64-bit", "true"}, 1084 applyIfCPUFeatureOr = {"avx2", "true"}) 1085 // require avx to ensure vectors are larger than what unrolling produces 1086 static Object[] test13bIL(int[] a, long[] b) { 1087 for (int i = 1; i < RANGE; i++) { 1088 a[i]++; 1089 b[i]++; 1090 } 1091 return new Object[]{ a, b }; 1092 } 1093 1094 @Test 1095 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1096 IRNode.LOAD_VECTOR_I, "> 0", 1097 IRNode.ADD_VB, "> 0", 1098 IRNode.ADD_VI, "> 0", 1099 IRNode.STORE_VECTOR, "> 0"}, 1100 applyIfPlatform = {"64-bit", "true"}, 1101 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1102 static Object[] test13bIB(int[] a, byte[] b) { 1103 for (int i = 1; i < RANGE; i++) { 1104 a[i]++; 1105 b[i]++; 1106 } 1107 return new Object[]{ a, b }; 1108 } 1109 1110 @Test 1111 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1112 IRNode.LOAD_VECTOR_S, "> 0", 1113 IRNode.ADD_VI, "> 0", 1114 IRNode.ADD_VS, "> 0", 1115 IRNode.STORE_VECTOR, "> 0"}, 1116 applyIfPlatform = {"64-bit", "true"}, 1117 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1118 static Object[] test13bIS(int[] a, short[] b) { 1119 for (int i = 1; i < RANGE; i++) { 1120 a[i]++; 1121 b[i]++; 1122 } 1123 return new Object[]{ a, b }; 1124 } 1125 1126 @Test 1127 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1128 IRNode.LOAD_VECTOR_S, "> 0", 1129 IRNode.LOAD_VECTOR_I, "> 0", 1130 IRNode.LOAD_VECTOR_L, "> 0", 1131 IRNode.ADD_VB, "> 0", 1132 IRNode.ADD_VS, "> 0", 1133 IRNode.ADD_VI, "> 0", 1134 IRNode.ADD_VL, "> 0", 1135 IRNode.STORE_VECTOR, "> 0"}, 1136 applyIfPlatform = {"64-bit", "true"}, 1137 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1138 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) { 1139 for (int i = 1; i < RANGE; i++) { 1140 a[i]++; 1141 b[i]++; 1142 c[i]++; 1143 d[i]++; 1144 } 1145 return new Object[]{ a, b, c, d }; 1146 } 1147 1148 @Test 1149 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1150 IRNode.ADD_VB, "= 0", 1151 IRNode.STORE_VECTOR, "= 0"}, 1152 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1153 applyIfPlatform = {"64-bit", "true"}, 1154 applyIf = {"AlignVector", "false"}) 1155 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1156 IRNode.ADD_VB, "= 0", 1157 IRNode.STORE_VECTOR, "= 0"}, 1158 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1159 applyIfPlatform = {"64-bit", "true"}, 1160 applyIf = {"AlignVector", "true"}) 1161 static Object[] test14aB(byte[] a) { 1162 // non-power-of-2 stride 1163 for (int i = 0; i < RANGE-20; i+=9) { 1164 // Since the stride is shorter than the vector length, there will be always 1165 // partial overlap of loads with previous stores, this leads to failure in 1166 // store-to-load-forwarding -> vectorization not profitable. 1167 a[i+0]++; 1168 a[i+1]++; 1169 a[i+2]++; 1170 a[i+3]++; 1171 a[i+4]++; 1172 a[i+5]++; 1173 a[i+6]++; 1174 a[i+7]++; 1175 a[i+8]++; 1176 a[i+9]++; 1177 a[i+10]++; 1178 a[i+11]++; 1179 a[i+12]++; 1180 a[i+13]++; 1181 a[i+14]++; 1182 a[i+15]++; 1183 } 1184 return new Object[]{ a }; 1185 } 1186 1187 @Test 1188 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1189 IRNode.ADD_VB, "= 0", 1190 IRNode.STORE_VECTOR, "= 0"}, 1191 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1192 applyIfPlatform = {"64-bit", "true"}, 1193 applyIf = {"AlignVector", "false"}) 1194 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1195 IRNode.ADD_VB, "= 0", 1196 IRNode.STORE_VECTOR, "= 0"}, 1197 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1198 applyIfPlatform = {"64-bit", "true"}, 1199 applyIf = {"AlignVector", "true"}) 1200 static Object[] test14bB(byte[] a) { 1201 // non-power-of-2 stride 1202 for (int i = 0; i < RANGE-20; i+=3) { 1203 // Since the stride is shorter than the vector length, there will be always 1204 // partial overlap of loads with previous stores, this leads to failure in 1205 // store-to-load-forwarding -> vectorization not profitable. 1206 a[i+0]++; 1207 a[i+1]++; 1208 a[i+2]++; 1209 a[i+3]++; 1210 a[i+4]++; 1211 a[i+5]++; 1212 a[i+6]++; 1213 a[i+7]++; 1214 a[i+8]++; 1215 a[i+9]++; 1216 a[i+10]++; 1217 a[i+11]++; 1218 a[i+12]++; 1219 a[i+13]++; 1220 a[i+14]++; 1221 a[i+15]++; 1222 } 1223 return new Object[]{ a }; 1224 } 1225 1226 @Test 1227 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1228 IRNode.ADD_VB, "= 0", 1229 IRNode.STORE_VECTOR, "= 0"}, 1230 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1231 applyIfPlatform = {"64-bit", "true"}, 1232 applyIf = {"AlignVector", "false"}) 1233 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1234 IRNode.ADD_VB, "= 0", 1235 IRNode.STORE_VECTOR, "= 0"}, 1236 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1237 applyIfPlatform = {"64-bit", "true"}, 1238 applyIf = {"AlignVector", "true"}) 1239 static Object[] test14cB(byte[] a) { 1240 // non-power-of-2 stride 1241 for (int i = 0; i < RANGE-20; i+=5) { 1242 // Since the stride is shorter than the vector length, there will be always 1243 // partial overlap of loads with previous stores, this leads to failure in 1244 // store-to-load-forwarding -> vectorization not profitable. 1245 a[i+0]++; 1246 a[i+1]++; 1247 a[i+2]++; 1248 a[i+3]++; 1249 a[i+4]++; 1250 a[i+5]++; 1251 a[i+6]++; 1252 a[i+7]++; 1253 a[i+8]++; 1254 a[i+9]++; 1255 a[i+10]++; 1256 a[i+11]++; 1257 a[i+12]++; 1258 a[i+13]++; 1259 a[i+14]++; 1260 a[i+15]++; 1261 } 1262 return new Object[]{ a }; 1263 } 1264 1265 @Test 1266 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1267 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1268 IRNode.STORE_VECTOR, "> 0"}, 1269 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1270 applyIfPlatform = {"64-bit", "true"}, 1271 applyIf = {"AlignVector", "false"}) 1272 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1273 IRNode.ADD_VB, "= 0", 1274 IRNode.STORE_VECTOR, "= 0"}, 1275 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1276 applyIfPlatform = {"64-bit", "true"}, 1277 applyIf = {"AlignVector", "true"}) 1278 static Object[] test14dB(byte[] a) { 1279 // non-power-of-2 stride 1280 for (int i = 0; i < RANGE-20; i+=9) { 1281 a[i+0]++; 1282 a[i+1]++; 1283 a[i+2]++; 1284 a[i+3]++; 1285 a[i+4]++; 1286 a[i+5]++; 1287 a[i+6]++; 1288 a[i+7]++; 1289 } 1290 return new Object[]{ a }; 1291 } 1292 1293 @Test 1294 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1295 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1296 IRNode.STORE_VECTOR, "> 0"}, 1297 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1298 applyIfPlatform = {"64-bit", "true"}, 1299 applyIf = {"AlignVector", "false"}) 1300 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1301 IRNode.ADD_VB, "= 0", 1302 IRNode.STORE_VECTOR, "= 0"}, 1303 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1304 applyIfPlatform = {"64-bit", "true"}, 1305 applyIf = {"AlignVector", "true"}) 1306 static Object[] test14eB(byte[] a) { 1307 // non-power-of-2 stride 1308 for (int i = 0; i < RANGE-32; i+=11) { 1309 a[i+0]++; 1310 a[i+1]++; 1311 a[i+2]++; 1312 a[i+3]++; 1313 a[i+4]++; 1314 a[i+5]++; 1315 a[i+6]++; 1316 a[i+7]++; 1317 } 1318 return new Object[]{ a }; 1319 } 1320 1321 @Test 1322 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1323 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1324 IRNode.STORE_VECTOR, "> 0"}, 1325 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1326 applyIfPlatform = {"64-bit", "true"}, 1327 applyIf = {"AlignVector", "false"}) 1328 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1329 IRNode.ADD_VB, "= 0", 1330 IRNode.STORE_VECTOR, "= 0"}, 1331 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1332 applyIfPlatform = {"64-bit", "true"}, 1333 applyIf = {"AlignVector", "true"}) 1334 static Object[] test14fB(byte[] a) { 1335 // non-power-of-2 stride 1336 for (int i = 0; i < RANGE-40; i+=12) { 1337 a[i+0]++; 1338 a[i+1]++; 1339 a[i+2]++; 1340 a[i+3]++; 1341 a[i+4]++; 1342 a[i+5]++; 1343 a[i+6]++; 1344 a[i+7]++; 1345 } 1346 return new Object[]{ a }; 1347 } 1348 1349 @Test 1350 // IR rules difficult because of modulo wrapping with offset after peeling. 1351 static Object[] test15aB(byte[] a) { 1352 // non-power-of-2 scale 1353 for (int i = 0; i < RANGE/64-20; i++) { 1354 a[53*i+0]++; 1355 a[53*i+1]++; 1356 a[53*i+2]++; 1357 a[53*i+3]++; 1358 a[53*i+4]++; 1359 a[53*i+5]++; 1360 a[53*i+6]++; 1361 a[53*i+7]++; 1362 a[53*i+8]++; 1363 a[53*i+9]++; 1364 a[53*i+10]++; 1365 a[53*i+11]++; 1366 a[53*i+12]++; 1367 a[53*i+13]++; 1368 a[53*i+14]++; 1369 a[53*i+15]++; 1370 } 1371 return new Object[]{ a }; 1372 } 1373 1374 @Test 1375 // IR rules difficult because of modulo wrapping with offset after peeling. 1376 static Object[] test15bB(byte[] a) { 1377 // non-power-of-2 scale 1378 for (int i = 0; i < RANGE/64-20; i++) { 1379 a[25*i+0]++; 1380 a[25*i+1]++; 1381 a[25*i+2]++; 1382 a[25*i+3]++; 1383 a[25*i+4]++; 1384 a[25*i+5]++; 1385 a[25*i+6]++; 1386 a[25*i+7]++; 1387 a[25*i+8]++; 1388 a[25*i+9]++; 1389 a[25*i+10]++; 1390 a[25*i+11]++; 1391 a[25*i+12]++; 1392 a[25*i+13]++; 1393 a[25*i+14]++; 1394 a[25*i+15]++; 1395 } 1396 return new Object[]{ a }; 1397 } 1398 1399 @Test 1400 // IR rules difficult because of modulo wrapping with offset after peeling. 1401 static Object[] test15cB(byte[] a) { 1402 // non-power-of-2 scale 1403 for (int i = 0; i < RANGE/64-20; i++) { 1404 a[19*i+0]++; 1405 a[19*i+1]++; 1406 a[19*i+2]++; 1407 a[19*i+3]++; 1408 a[19*i+4]++; 1409 a[19*i+5]++; 1410 a[19*i+6]++; 1411 a[19*i+7]++; 1412 a[19*i+8]++; 1413 a[19*i+9]++; 1414 a[19*i+10]++; 1415 a[19*i+11]++; 1416 a[19*i+12]++; 1417 a[19*i+13]++; 1418 a[19*i+14]++; 1419 a[19*i+15]++; 1420 } 1421 return new Object[]{ a }; 1422 } 1423 1424 @Test 1425 static Object[] test16a(byte[] a, short[] b) { 1426 // infinite loop issues 1427 for (int i = 0; i < RANGE/2-20; i++) { 1428 a[2*i+0]++; 1429 a[2*i+1]++; 1430 a[2*i+2]++; 1431 a[2*i+3]++; 1432 a[2*i+4]++; 1433 a[2*i+5]++; 1434 a[2*i+6]++; 1435 a[2*i+7]++; 1436 a[2*i+8]++; 1437 a[2*i+9]++; 1438 a[2*i+10]++; 1439 a[2*i+11]++; 1440 a[2*i+12]++; 1441 a[2*i+13]++; 1442 a[2*i+14]++; 1443 1444 b[2*i+0]++; 1445 b[2*i+1]++; 1446 b[2*i+2]++; 1447 b[2*i+3]++; 1448 } 1449 return new Object[]{ a, b }; 1450 } 1451 1452 @Test 1453 static Object[] test16b(byte[] a) { 1454 // infinite loop issues 1455 for (int i = 0; i < RANGE/2-20; i++) { 1456 a[2*i+0]++; 1457 a[2*i+1]++; 1458 a[2*i+2]++; 1459 a[2*i+3]++; 1460 a[2*i+4]++; 1461 a[2*i+5]++; 1462 a[2*i+6]++; 1463 a[2*i+7]++; 1464 a[2*i+8]++; 1465 a[2*i+9]++; 1466 a[2*i+10]++; 1467 a[2*i+11]++; 1468 a[2*i+12]++; 1469 a[2*i+13]++; 1470 a[2*i+14]++; 1471 } 1472 return new Object[]{ a }; 1473 } 1474 1475 @Test 1476 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 1477 IRNode.ADD_VL, "> 0", 1478 IRNode.STORE_VECTOR, "> 0"}, 1479 applyIfPlatform = {"64-bit", "true"}, 1480 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1481 static Object[] test17a(long[] a) { 1482 // Unsafe: vectorizes with profiling (not xcomp) 1483 for (int i = 0; i < RANGE; i++) { 1484 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i; 1485 long v = UNSAFE.getLongUnaligned(a, adr); 1486 UNSAFE.putLongUnaligned(a, adr, v + 1); 1487 } 1488 return new Object[]{ a }; 1489 } 1490 1491 @Test 1492 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs. 1493 static Object[] test17b(long[] a) { 1494 // Not alignable 1495 for (int i = 0; i < RANGE-1; i++) { 1496 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1; 1497 long v = UNSAFE.getLongUnaligned(a, adr); 1498 UNSAFE.putLongUnaligned(a, adr, v + 1); 1499 } 1500 return new Object[]{ a }; 1501 } 1502 1503 @Test 1504 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1505 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1506 IRNode.STORE_VECTOR, "> 0"}, 1507 applyIf = {"MaxVectorSize", ">=32"}, 1508 applyIfPlatform = {"64-bit", "true"}, 1509 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1510 static Object[] test17c(long[] a) { 1511 // Unsafe: aligned vectorizes 1512 for (int i = 0; i < RANGE-1; i+=4) { 1513 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i; 1514 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1515 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1516 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1517 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1518 } 1519 return new Object[]{ a }; 1520 } 1521 1522 @Test 1523 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1524 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1525 IRNode.STORE_VECTOR, "> 0"}, 1526 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"}, 1527 applyIfPlatform = {"64-bit", "true"}, 1528 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"}) 1529 // Ensure vector width is large enough to fit 64 byte for longs: 1530 // The offsets are: 25, 33, 57, 65 1531 // In modulo 32: 25, 1, 25, 1 -> does not vectorize 1532 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes 1533 // This problem is because we compute modulo vector width in memory_alignment. 1534 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", 1535 IRNode.ADD_VL, "= 0", 1536 IRNode.STORE_VECTOR, "= 0"}, 1537 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1538 applyIfPlatform = {"64-bit", "true"}, 1539 applyIf = {"AlignVector", "true"}) 1540 static Object[] test17d(long[] a) { 1541 // Not alignable 1542 for (int i = 0; i < RANGE-1; i+=4) { 1543 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1; 1544 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1545 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1546 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1547 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1548 } 1549 return new Object[]{ a }; 1550 } 1551 1552 @Test 1553 static Object[] test18a(byte[] a, int[] b) { 1554 // scale = 0 --> no iv 1555 for (int i = 0; i < RANGE; i++) { 1556 a[0] = 1; 1557 b[i] = 2; 1558 a[1] = 1; 1559 } 1560 return new Object[]{ a, b }; 1561 } 1562 1563 @Test 1564 static Object[] test18b(byte[] a, int[] b) { 1565 // scale = 0 --> no iv 1566 for (int i = 0; i < RANGE; i++) { 1567 a[1] = 1; 1568 b[i] = 2; 1569 a[2] = 1; 1570 } 1571 return new Object[]{ a, b }; 1572 } 1573 1574 @Test 1575 static Object[] test19(int[] a, int[] b) { 1576 for (int i = 5000; i > 0; i--) { 1577 a[RANGE_FINAL - i] = b[RANGE_FINAL - i]; 1578 } 1579 return new Object[]{ a, b }; 1580 } 1581 1582 @Test 1583 static Object[] test20(byte[] a) { 1584 // Example where it is easy to pass alignment check, 1585 // but used to fail the alignment calculation 1586 for (int i = 1; i < RANGE/2-50; i++) { 1587 a[2*i+0+30]++; 1588 a[2*i+1+30]++; 1589 a[2*i+2+30]++; 1590 a[2*i+3+30]++; 1591 } 1592 return new Object[]{ a }; 1593 } 1594 }