1 /* 2 * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @bug 8310886 8325252 8320622 27 * @summary Test MulAddS2I vectorization. 28 * @library /test/lib / 29 * @run driver compiler.loopopts.superword.TestMulAddS2I 30 */ 31 32 package compiler.loopopts.superword; 33 34 import compiler.lib.ir_framework.*; 35 import jdk.test.lib.Asserts; 36 import jdk.test.lib.Platform; 37 38 public class TestMulAddS2I { 39 static final int RANGE = 1024*16; 40 static final int ITER = RANGE/2 - 1; 41 42 static short[] sArr1 = new short[RANGE]; 43 static short[] sArr2 = new short[RANGE]; 44 static final int[] GOLDEN_A; 45 static final int[] GOLDEN_B; 46 static final int[] GOLDEN_C; 47 static final int[] GOLDEN_D; 48 static final int[] GOLDEN_E; 49 static final int[] GOLDEN_F; 50 static final int[] GOLDEN_G; 51 static final int[] GOLDEN_H; 52 static final int[] GOLDEN_I; 53 static final int[] GOLDEN_J; 54 static final int[] GOLDEN_K; 55 static final int[] GOLDEN_L; 56 static final int[] GOLDEN_M; 57 58 static { 59 for (int i = 0; i < RANGE; i++) { 60 sArr1[i] = (short)(AbstractInfo.getRandom().nextInt()); 61 sArr2[i] = (short)(AbstractInfo.getRandom().nextInt()); 62 } 63 GOLDEN_A = testa(); 64 GOLDEN_B = testb(); 65 GOLDEN_C = testc(new int[ITER]); 66 GOLDEN_D = testd(new int[ITER]); 67 GOLDEN_E = teste(new int[ITER]); 68 GOLDEN_F = testf(new int[ITER]); 69 GOLDEN_G = testg(new int[ITER]); 70 GOLDEN_H = testh(new int[ITER]); 71 GOLDEN_I = testi(new int[ITER]); 72 GOLDEN_J = testj(new int[ITER]); 73 GOLDEN_K = testk(new int[ITER]); 74 GOLDEN_L = testl(new int[ITER]); 75 GOLDEN_M = testm(new int[ITER]); 76 } 77 78 79 public static void main(String[] args) { 80 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaers"); 81 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaers"); 82 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaers"); 83 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector", "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaers"); 84 } 85 86 @Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh", 87 "testi", "testj", "testk", "testl", "testm"}) 88 @Warmup(0) 89 public static void run() { 90 compare(testa(), GOLDEN_A, "testa"); 91 compare(testb(), GOLDEN_B, "testb"); 92 compare(testc(new int[ITER]), GOLDEN_C, "testc"); 93 compare(testd(new int[ITER]), GOLDEN_D, "testd"); 94 compare(teste(new int[ITER]), GOLDEN_E, "teste"); 95 compare(testf(new int[ITER]), GOLDEN_F, "testf"); 96 compare(testg(new int[ITER]), GOLDEN_G, "testg"); 97 compare(testh(new int[ITER]), GOLDEN_H, "testh"); 98 compare(testi(new int[ITER]), GOLDEN_I, "testi"); 99 compare(testj(new int[ITER]), GOLDEN_J, "testj"); 100 compare(testk(new int[ITER]), GOLDEN_K, "testk"); 101 compare(testl(new int[ITER]), GOLDEN_L, "testl"); 102 compare(testm(new int[ITER]), GOLDEN_M, "testm"); 103 } 104 105 public static void compare(int[] out, int[] golden, String name) { 106 for (int i = 0; i < ITER; i++) { 107 Asserts.assertEQ(out[i], golden[i], "wrong result for '" + name + "' out[" + i + "]"); 108 } 109 } 110 111 @Test 112 @IR(applyIfCPUFeature = {"sse2", "true"}, 113 applyIfPlatform = {"64-bit", "true"}, 114 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 115 @IR(applyIfCPUFeature = {"asimd", "true"}, 116 applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 117 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 118 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 119 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 120 public static int[] testa() { 121 int[] out = new int[ITER]; 122 int[] out2 = new int[ITER]; 123 for (int i = 0; i < ITER; i++) { 124 out[i] += ((sArr1[2*i] * sArr1[2*i]) + (sArr1[2*i+1] * sArr1[2*i+1])); 125 out2[i] += out[i]; 126 } 127 return out; 128 } 129 130 @Test 131 @IR(applyIfCPUFeature = {"sse2", "true"}, 132 applyIfPlatform = {"64-bit", "true"}, 133 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 134 @IR(applyIfCPUFeature = {"asimd", "true"}, 135 applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 136 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 137 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 138 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 139 public static int[] testb() { 140 int[] out = new int[ITER]; 141 int[] out2 = new int[ITER]; 142 for (int i = 0; i < ITER; i++) { 143 out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1])); 144 out2[i] += out[i]; 145 } 146 return out; 147 } 148 149 @Test 150 @IR(applyIfCPUFeature = {"sse2", "true"}, 151 applyIfPlatform = {"64-bit", "true"}, 152 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 153 @IR(applyIfCPUFeature = {"asimd", "true"}, 154 applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 155 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 156 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 157 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 158 public static int[] testc(int[] out) { 159 for (int i = 0; i < ITER; i++) { 160 out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1])); 161 } 162 return out; 163 } 164 165 @Test 166 @IR(applyIfCPUFeature = {"sse2", "true"}, 167 applyIfPlatform = {"64-bit", "true"}, 168 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 169 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 170 @IR(applyIfCPUFeature = {"asimd", "true"}, 171 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false"}, // AD file requires vector_length = 16 172 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 173 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 174 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 175 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 176 public static int[] testd(int[] out) { 177 for (int i = 0; i < ITER-2; i+=2) { 178 // Unrolled, with the same structure. 179 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 180 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); 181 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. 182 // We need all addresses 8-byte aligned. 183 // 184 // out: 185 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter 186 // = 16 (or 12 if UseCompactObjectHeaders=true) 187 // -> never aligned! 188 } 189 return out; 190 } 191 192 @Test 193 @IR(applyIfCPUFeature = {"sse2", "true"}, 194 applyIfPlatform = {"64-bit", "true"}, 195 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 196 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 197 @IR(applyIfCPUFeature = {"asimd", "true"}, 198 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 199 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 200 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 201 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 202 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 203 public static int[] teste(int[] out) { 204 for (int i = 0; i < ITER-2; i+=2) { 205 // Unrolled, with some swaps. 206 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 207 out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // swap(1 2) 208 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. 209 // We need all addresses 8-byte aligned. 210 // 211 // out: 212 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter 213 // = 16 (or 12 if UseCompactObjectHeaders=true) 214 // -> never aligned! 215 } 216 return out; 217 } 218 219 @Test 220 @IR(applyIfCPUFeature = {"sse2", "true"}, 221 applyIfPlatform = {"64-bit", "true"}, 222 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 223 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 224 @IR(applyIfCPUFeature = {"asimd", "true"}, 225 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 226 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 227 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 228 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 229 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 230 public static int[] testf(int[] out) { 231 for (int i = 0; i < ITER-2; i+=2) { 232 // Unrolled, with some swaps. 233 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 234 out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr2[2*i+3] * sArr1[2*i+3])); // swap(1 2), swap(3 4) 235 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. 236 // We need all addresses 8-byte aligned. 237 // 238 // out: 239 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter 240 // = 16 (or 12 if UseCompactObjectHeaders=true) 241 // -> never aligned! 242 } 243 return out; 244 } 245 246 @Test 247 @IR(applyIfCPUFeature = {"sse2", "true"}, 248 applyIfPlatform = {"64-bit", "true"}, 249 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 250 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 251 @IR(applyIfCPUFeature = {"asimd", "true"}, 252 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 253 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 254 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 255 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 256 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 257 public static int[] testg(int[] out) { 258 for (int i = 0; i < ITER-2; i+=2) { 259 // Unrolled, with some swaps. 260 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 261 out[i+1] += ((sArr1[2*i+3] * sArr2[2*i+3]) + (sArr1[2*i+2] * sArr2[2*i+2])); // swap(1 3), swap(2 4) 262 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. 263 // We need all addresses 8-byte aligned. 264 // 265 // out: 266 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter 267 // = 16 (or 12 if UseCompactObjectHeaders=true) 268 // -> never aligned! 269 } 270 return out; 271 } 272 273 @Test 274 @IR(applyIfCPUFeature = {"sse2", "true"}, 275 applyIfPlatform = {"64-bit", "true"}, 276 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 277 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 278 @IR(applyIfCPUFeature = {"asimd", "true"}, 279 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 280 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 281 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 282 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false" }, 283 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 284 public static int[] testh(int[] out) { 285 for (int i = 0; i < ITER-2; i+=2) { 286 // Unrolled, with some swaps. 287 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 288 out[i+1] += ((sArr2[2*i+3] * sArr1[2*i+3]) + (sArr2[2*i+2] * sArr1[2*i+2])); // swap(1 4), swap(2 3) 289 // Hand-unrolling can mess with AlignVector and UseCompactObjectHeaders. 290 // We need all addresses 8-byte aligned. 291 // 292 // out: 293 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter 294 // = 16 (or 12 if UseCompactObjectHeaders=true) 295 // -> never aligned! 296 } 297 return out; 298 } 299 300 @Test 301 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"}, 302 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 303 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"}) 304 public static int[] testi(int[] out) { 305 for (int i = 0; i < ITER-2; i+=2) { 306 // Unrolled, with some swaps that prevent vectorization. 307 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); // ok 308 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad 309 } 310 return out; 311 } 312 313 @Test 314 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"}, 315 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 316 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"}) 317 public static int[] testj(int[] out) { 318 for (int i = 0; i < ITER-2; i+=2) { 319 // Unrolled, with some swaps that prevent vectorization. 320 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad 321 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad 322 } 323 return out; 324 } 325 326 @Test 327 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"}, 328 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 329 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"}) 330 public static int[] testk(int[] out) { 331 for (int i = 0; i < ITER-2; i+=2) { 332 // Unrolled, with some swaps that prevent vectorization. 333 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad 334 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok 335 } 336 return out; 337 } 338 339 @Test 340 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"}, 341 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 342 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"}) 343 public static int[] testl(int[] out) { 344 for (int i = 0; i < ITER-2; i+=2) { 345 // Unrolled, with some swaps that prevent vectorization. 346 out[i+0] += ((sArr1[2*i+1] * sArr2[2*i+1]) + (sArr1[2*i+0] * sArr2[2*i+0])); // ok 347 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad 348 } 349 return out; 350 } 351 352 @Test 353 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"}, 354 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 355 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"}) 356 public static int[] testm(int[] out) { 357 for (int i = 0; i < ITER-4; i+=4) { 358 // Unrolled, with some swaps that prevent vectorization. 359 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad 360 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok 361 // 2-element gap 362 } 363 return out; 364 } 365 }