1 /* 2 * Copyright (c) 2022, 2023, Arm Limited. All rights reserved. 3 * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 */ 24 25 /* 26 * @test 27 * @summary Vectorization test on combined operations 28 * @library /test/lib / 29 * 30 * @build jdk.test.whitebox.WhiteBox 31 * compiler.vectorization.runner.VectorizationTestRunner 32 * 33 * @requires vm.compiler2.enabled 34 * 35 * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox 36 * 37 * @run main/othervm -Xbootclasspath/a:. 38 * -XX:+UnlockDiagnosticVMOptions 39 * -XX:+WhiteBoxAPI 40 * compiler.vectorization.runner.LoopCombinedOpTest nCOH_nAV 41 * 42 * @run main/othervm -Xbootclasspath/a:. 43 * -XX:+UnlockDiagnosticVMOptions 44 * -XX:+WhiteBoxAPI 45 * compiler.vectorization.runner.LoopCombinedOpTest nCOH_yAV 46 * 47 * @run main/othervm -Xbootclasspath/a:. 48 * -XX:+UnlockDiagnosticVMOptions 49 * -XX:+WhiteBoxAPI 50 * compiler.vectorization.runner.LoopCombinedOpTest yCOH_nAV 51 * 52 * @run main/othervm -Xbootclasspath/a:. 53 * -XX:+UnlockDiagnosticVMOptions 54 * -XX:+WhiteBoxAPI 55 * compiler.vectorization.runner.LoopCombinedOpTest yCOH_yAV 56 */ 57 58 package compiler.vectorization.runner; 59 60 import compiler.lib.ir_framework.*; 61 62 import java.util.Random; 63 64 public class LoopCombinedOpTest extends VectorizationTestRunner { 65 66 // We must pass the flags directly to the test-VM, and not the driver vm in the @run above. 67 @Override 68 protected String[] testVMFlags(String[] args) { 69 return switch (args[0]) { 70 case "nCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"}; 71 case "nCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"}; 72 case "yCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"}; 73 case "yCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"}; 74 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 75 }; 76 } 77 78 private static final int SIZE = 543; 79 80 private int[] a; 81 private int[] b; 82 private int[] c; 83 private int[] d; 84 private long[] l1; 85 private long[] l2; 86 private short[] s1; 87 private short[] s2; 88 private int intInv; 89 90 public LoopCombinedOpTest() { 91 a = new int[SIZE]; 92 b = new int[SIZE]; 93 c = new int[SIZE]; 94 d = new int[SIZE]; 95 l1 = new long[SIZE]; 96 l2 = new long[SIZE]; 97 s1 = new short[SIZE]; 98 s2 = new short[SIZE]; 99 for (int i = 0; i < SIZE; i++) { 100 a[i] = -654321 * i; 101 b[i] = 123456 * i; 102 c[i] = -998877 * i; 103 d[i] = 778899 * i; 104 l1[i] = 5000000000L * i; 105 l2[i] = -600000000L * i; 106 s1[i] = (short) (3 * i); 107 s2[i] = (short) (-2 * i); 108 } 109 Random ran = new Random(999); 110 intInv = ran.nextInt(); 111 } 112 113 @Test 114 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 115 counts = {IRNode.STORE_VECTOR, ">0", 116 IRNode.LOAD_VECTOR_I, "> 0"}) 117 public int[] opWithConstant() { 118 int[] res = new int[SIZE]; 119 for (int i = 0; i < SIZE; i++) { 120 res[i] = a[i] + 1234567890; 121 } 122 return res; 123 } 124 125 @Test 126 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 127 counts = {IRNode.STORE_VECTOR, ">0", 128 IRNode.LOAD_VECTOR_I, "> 0"}) 129 public int[] opWithLoopInvariant() { 130 int[] res = new int[SIZE]; 131 for (int i = 0; i < SIZE; i++) { 132 res[i] = b[i] * intInv; 133 } 134 return res; 135 } 136 137 @Test 138 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 139 counts = {IRNode.STORE_VECTOR, ">0", 140 IRNode.LOAD_VECTOR_I, "> 0"}) 141 public int[] opWithConstantAndLoopInvariant() { 142 int[] res = new int[SIZE]; 143 for (int i = 0; i < SIZE; i++) { 144 res[i] = c[i] * (intInv & 0xfff); 145 } 146 return res; 147 } 148 149 @Test 150 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 151 counts = {IRNode.STORE_VECTOR, ">0", 152 IRNode.LOAD_VECTOR_I, "> 0"}) 153 public int[] multipleOps() { 154 int[] res = new int[SIZE]; 155 for (int i = 0; i < SIZE; i++) { 156 res[i] = a[i] & b[i] + c[i] & d[i]; 157 } 158 return res; 159 } 160 161 @Test 162 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 163 counts = {IRNode.STORE_VECTOR, ">0", 164 IRNode.LOAD_VECTOR_I, "> 0"}) 165 public int[] multipleOpsWithMultipleConstants() { 166 int[] res = new int[SIZE]; 167 for (int i = 0; i < SIZE; i++) { 168 res[i] = a[i] * 12345678 + 87654321 + b[i] & 0xffff - c[i] * d[i] * 2; 169 } 170 return res; 171 } 172 173 @Test 174 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 175 counts = {IRNode.STORE_VECTOR, ">0", 176 IRNode.LOAD_VECTOR_I, "> 0"}) 177 // With sse2, the MulI does not vectorize. This means we have vectorized stores 178 // to res1, but scalar loads from res1. The store-to-load-forwarding failure 179 // detection catches this and rejects vectorization. 180 public int[] multipleStores() { 181 int[] res1 = new int[SIZE]; 182 int[] res2 = new int[SIZE]; 183 int[] res3 = new int[SIZE]; 184 for (int i = 0; i < SIZE; i++) { 185 res1[i] = a[i] & b[i]; 186 res2[i] = c[i] | d[i]; 187 res3[i] = res1[i] * res2[i]; 188 } 189 return res3; 190 } 191 192 @Test 193 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 194 counts = {IRNode.STORE_VECTOR, ">0", 195 IRNode.LOAD_VECTOR_I, "> 0"}) 196 public int[] multipleStoresWithCommonSubExpression() { 197 int[] res1 = new int[SIZE]; 198 int[] res2 = new int[SIZE]; 199 int[] res3 = new int[SIZE]; 200 for (int i = 0; i < SIZE; i++) { 201 res1[i] = a[i] * b[i]; 202 res2[i] = c[i] * d[i]; 203 res3[i] = res1[i] + res2[i]; 204 } 205 return res3; 206 } 207 208 @Test 209 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 210 counts = {IRNode.STORE_VECTOR, ">0", 211 IRNode.LOAD_VECTOR_S, "> 0", 212 IRNode.LOAD_VECTOR_I, "> 0"}) 213 public int[] multipleOpsWith2DifferentTypes() { 214 short[] res1 = new short[SIZE]; 215 int[] res2 = new int[SIZE]; 216 for (int i = 0; i < SIZE; i++) { 217 res1[i] = (short) (s1[i] + s2[i]); 218 res2[i] = a[i] + b[i]; 219 } 220 return res2; 221 } 222 223 @Test 224 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 225 counts = {IRNode.STORE_VECTOR, ">0", 226 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_ANY, "> 0", 227 IRNode.LOAD_VECTOR_L, "> 0"}) 228 public long[] multipleOpsWith3DifferentTypes() { 229 short[] res1 = new short[SIZE]; 230 int[] res2 = new int[SIZE]; 231 long[] res3 = new long[SIZE]; 232 for (int i = 0; i < SIZE; i++) { 233 res1[i] = (short) (s1[i] + s2[i]); 234 res2[i] = a[i] + b[i]; 235 res3[i] = l1[i] + l2[i]; 236 } 237 return res3; 238 } 239 240 @Test 241 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 242 counts = {IRNode.STORE_VECTOR, ">0", 243 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_ANY, "> 0", 244 IRNode.LOAD_VECTOR_L, "> 0"}) 245 public long[] multipleOpsWith2NonAdjacentTypes() { 246 short[] res1 = new short[SIZE]; 247 long[] res2 = new long[SIZE]; 248 for (int i = 0; i < SIZE; i++) { 249 res1[i] = (short) (s1[i] + s2[i]); 250 res2[i] = l1[i] + l2[i]; 251 } 252 return res2; 253 } 254 255 @Test 256 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 257 counts = {IRNode.STORE_VECTOR, ">0", 258 IRNode.LOAD_VECTOR_S, "> 0", 259 IRNode.LOAD_VECTOR_I, "> 0"}) 260 public int[] multipleOpsWith2DifferentTypesAndConstant() { 261 short[] res1 = new short[SIZE]; 262 int[] res2 = new int[SIZE]; 263 for (int i = 0; i < SIZE; i++) { 264 res1[i] = (short) (s1[i] + s2[i]); 265 res2[i] = a[i] + 88888888;; 266 } 267 return res2; 268 } 269 270 @Test 271 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 272 counts = {IRNode.STORE_VECTOR, ">0", 273 IRNode.LOAD_VECTOR_S, "> 0", 274 IRNode.LOAD_VECTOR_I, "> 0"}) 275 public int[] multipleOpsWith2DifferentTypesAndInvariant() { 276 short[] res1 = new short[SIZE]; 277 int[] res2 = new int[SIZE]; 278 for (int i = 0; i < SIZE; i++) { 279 res1[i] = (short) (s1[i] + s2[i]); 280 res2[i] = a[i] * intInv; 281 } 282 return res2; 283 } 284 285 @Test 286 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 287 counts = {IRNode.STORE_VECTOR, ">0", 288 IRNode.LOAD_VECTOR_S, "> 0", 289 IRNode.LOAD_VECTOR_I, "> 0"}) 290 public int[] multipleOpsWith2DifferentTypesAndComplexExpression() { 291 short[] res1 = new short[SIZE]; 292 int[] res2 = new int[SIZE]; 293 for (int i = 0; i < SIZE; i++) { 294 res1[i] = (short) (s1[i] + s2[i]); 295 res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa); 296 } 297 return res2; 298 } 299 300 @Test 301 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse3", "true"}, 302 counts = {IRNode.STORE_VECTOR, ">0", 303 IRNode.LOAD_VECTOR_S, "> 0", 304 IRNode.LOAD_VECTOR_I, "> 0"}) 305 public int[] multipleOpsWith2DifferentTypesAndSharedOp() { 306 int i = 0, sum = 0; 307 int[] res1 = new int[SIZE]; 308 short[] res2 = new short[SIZE]; 309 while (++i < SIZE) { 310 sum += (res1[i]--); 311 res2[i]++; 312 } 313 return res1; 314 } 315 316 @Test 317 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. 318 public int[] fillIndexPlusStride() { 319 int[] res = new int[SIZE]; 320 for (int i = 0; i < SIZE; i++) { 321 res[i] = i + 1; 322 } 323 return res; 324 } 325 326 @Test 327 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. 328 public int[] addArrayWithIndex() { 329 int[] res = new int[SIZE]; 330 for (int i = 0; i < SIZE; i++) { 331 res[i] = a[i] + i; 332 } 333 return res; 334 } 335 336 @Test 337 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. 338 public short[] multiplyAddShortIndex() { 339 short[] res = new short[SIZE]; 340 for (int i = 0; i < SIZE; i++) { 341 res[i] = (short) (i * i + i); 342 } 343 return res; 344 } 345 346 @Test 347 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. 348 public int[] multiplyBySumOfIndexAndInvariant() { 349 int[] res = new int[SIZE]; 350 for (int i = 0; i < SIZE; i++) { 351 res[i] = a[i] * (i + 10 + intInv); 352 } 353 return res; 354 } 355 356 @Test 357 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 358 counts = {IRNode.STORE_VECTOR, ">0"}) 359 public int[] manuallyUnrolledStride2() { 360 int[] res = new int[SIZE]; 361 for (int i = 0; i < SIZE - 1; i += 2) { 362 res[i] = a[i] * b[i]; 363 res[i + 1] = a[i + 1] * b[i + 1]; 364 } 365 return res; 366 } 367 368 @Test 369 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 370 counts = {IRNode.STORE_VECTOR, ">0", 371 IRNode.LOAD_VECTOR_I, "> 0"}) 372 public int partialVectorizableLoop() { 373 int[] res = new int[SIZE]; 374 int k = 9; 375 for (int i = 0; i < SIZE / 2; i++) { 376 res[i] = a[i] * b[i]; 377 k = 3 * k + 1; 378 } 379 return k; 380 } 381 }