1 /* 2 * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @bug 8310886 8325252 8320622 27 * @summary Test MulAddS2I vectorization. 28 * @library /test/lib / 29 * @run driver compiler.loopopts.superword.TestMulAddS2I 30 */ 31 32 package compiler.loopopts.superword; 33 34 import compiler.lib.ir_framework.*; 35 import jdk.test.lib.Asserts; 36 import jdk.test.lib.Platform; 37 38 public class TestMulAddS2I { 39 static final int RANGE = 1024*16; 40 static final int ITER = RANGE/2 - 1; 41 42 static short[] sArr1 = new short[RANGE]; 43 static short[] sArr2 = new short[RANGE]; 44 static final int[] GOLDEN_A; 45 static final int[] GOLDEN_B; 46 static final int[] GOLDEN_C; 47 static final int[] GOLDEN_D; 48 static final int[] GOLDEN_E; 49 static final int[] GOLDEN_F; 50 static final int[] GOLDEN_G; 51 static final int[] GOLDEN_H; 52 static final int[] GOLDEN_I; 53 static final int[] GOLDEN_J; 54 static final int[] GOLDEN_K; 55 static final int[] GOLDEN_L; 56 57 static { 58 for (int i = 0; i < RANGE; i++) { 59 sArr1[i] = (short)(AbstractInfo.getRandom().nextInt()); 60 sArr2[i] = (short)(AbstractInfo.getRandom().nextInt()); 61 } 62 GOLDEN_A = testa(); 63 GOLDEN_B = testb(); 64 GOLDEN_C = testc(new int[ITER]); 65 GOLDEN_D = testd(new int[ITER]); 66 GOLDEN_E = teste(new int[ITER]); 67 GOLDEN_F = testf(new int[ITER]); 68 GOLDEN_G = testg(new int[ITER]); 69 GOLDEN_H = testh(new int[ITER]); 70 GOLDEN_I = testi(new int[ITER]); 71 GOLDEN_J = testj(new int[ITER]); 72 GOLDEN_K = testk(new int[ITER]); 73 GOLDEN_L = testl(new int[ITER]); 74 } 75 76 77 public static void main(String[] args) { 78 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:+AlignVector"); 79 TestFramework.runWithFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:-AlignVector"); 80 } 81 82 @Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh", 83 "testi", "testj", "testk", "testl"}) 84 @Warmup(0) 85 public static void run() { 86 compare(testa(), GOLDEN_A, "testa"); 87 compare(testb(), GOLDEN_B, "testb"); 88 compare(testc(new int[ITER]), GOLDEN_C, "testc"); 89 compare(testd(new int[ITER]), GOLDEN_D, "testd"); 90 compare(teste(new int[ITER]), GOLDEN_E, "teste"); 91 compare(testf(new int[ITER]), GOLDEN_F, "testf"); 92 compare(testg(new int[ITER]), GOLDEN_G, "testg"); 93 compare(testh(new int[ITER]), GOLDEN_H, "testh"); 94 compare(testi(new int[ITER]), GOLDEN_I, "testi"); 95 compare(testj(new int[ITER]), GOLDEN_J, "testj"); 96 compare(testk(new int[ITER]), GOLDEN_K, "testk"); 97 compare(testl(new int[ITER]), GOLDEN_L, "testl"); 98 } 99 100 public static void compare(int[] out, int[] golden, String name) { 101 for (int i = 0; i < ITER; i++) { 102 Asserts.assertEQ(out[i], golden[i], "wrong result for '" + name + "' out[" + i + "]"); 103 } 104 } 105 106 @Test 107 @IR(applyIfCPUFeature = {"sse2", "true"}, 108 applyIfPlatform = {"64-bit", "true"}, 109 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 110 @IR(applyIfCPUFeature = {"asimd", "true"}, 111 applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 112 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 113 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 114 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 115 public static int[] testa() { 116 int[] out = new int[ITER]; 117 int[] out2 = new int[ITER]; 118 for (int i = 0; i < ITER; i++) { 119 out[i] += ((sArr1[2*i] * sArr1[2*i]) + (sArr1[2*i+1] * sArr1[2*i+1])); 120 out2[i] += out[i]; 121 } 122 return out; 123 } 124 125 @Test 126 @IR(applyIfCPUFeature = {"sse2", "true"}, 127 applyIfPlatform = {"64-bit", "true"}, 128 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 129 @IR(applyIfCPUFeature = {"asimd", "true"}, 130 applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 131 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 132 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 133 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 134 public static int[] testb() { 135 int[] out = new int[ITER]; 136 int[] out2 = new int[ITER]; 137 for (int i = 0; i < ITER; i++) { 138 out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1])); 139 out2[i] += out[i]; 140 } 141 return out; 142 } 143 144 @Test 145 @IR(applyIfCPUFeature = {"sse2", "true"}, 146 applyIfPlatform = {"64-bit", "true"}, 147 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 148 @IR(applyIfCPUFeature = {"asimd", "true"}, 149 applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16 150 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 151 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 152 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 153 public static int[] testc(int[] out) { 154 for (int i = 0; i < ITER; i++) { 155 out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1])); 156 } 157 return out; 158 } 159 160 @Test 161 @IR(applyIfCPUFeature = {"sse2", "true"}, 162 applyIfPlatform = {"64-bit", "true"}, 163 applyIf = { "UseCompactObjectHeaders", "false" }, 164 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 165 @IR(applyIfCPUFeature = {"asimd", "true"}, 166 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 167 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 168 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 169 applyIf = { "UseCompactObjectHeaders", "false" }, 170 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 171 public static int[] testd(int[] out) { 172 for (int i = 0; i < ITER-2; i+=2) { 173 // Unrolled, with the same structure. 174 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 175 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); 176 } 177 return out; 178 } 179 180 @Test 181 @IR(applyIfCPUFeature = {"sse2", "true"}, 182 applyIfPlatform = {"64-bit", "true"}, 183 applyIf = { "UseCompactObjectHeaders", "false" }, 184 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 185 @IR(applyIfCPUFeature = {"asimd", "true"}, 186 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 187 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 188 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 189 applyIf = { "UseCompactObjectHeaders", "false" }, 190 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 191 public static int[] teste(int[] out) { 192 for (int i = 0; i < ITER-2; i+=2) { 193 // Unrolled, with some swaps. 194 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 195 out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // swap(1 2) 196 } 197 return out; 198 } 199 200 @Test 201 @IR(applyIfCPUFeature = {"sse2", "true"}, 202 applyIfPlatform = {"64-bit", "true"}, 203 applyIf = { "UseCompactObjectHeaders", "false" }, 204 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 205 @IR(applyIfCPUFeature = {"asimd", "true"}, 206 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 207 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 208 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 209 applyIf = { "UseCompactObjectHeaders", "false" }, 210 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 211 public static int[] testf(int[] out) { 212 for (int i = 0; i < ITER-2; i+=2) { 213 // Unrolled, with some swaps. 214 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 215 out[i+1] += ((sArr2[2*i+2] * sArr1[2*i+2]) + (sArr2[2*i+3] * sArr1[2*i+3])); // swap(1 2), swap(3 4) 216 } 217 return out; 218 } 219 220 @Test 221 @IR(applyIfCPUFeature = {"sse2", "true"}, 222 applyIfPlatform = {"64-bit", "true"}, 223 applyIf = { "UseCompactObjectHeaders", "false" }, 224 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 225 @IR(applyIfCPUFeature = {"asimd", "true"}, 226 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 227 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 228 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 229 applyIf = { "UseCompactObjectHeaders", "false" }, 230 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 231 public static int[] testg(int[] out) { 232 for (int i = 0; i < ITER-2; i+=2) { 233 // Unrolled, with some swaps. 234 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 235 out[i+1] += ((sArr1[2*i+3] * sArr2[2*i+3]) + (sArr1[2*i+2] * sArr2[2*i+2])); // swap(1 3), swap(2 4) 236 } 237 return out; 238 } 239 240 @Test 241 @IR(applyIfCPUFeature = {"sse2", "true"}, 242 applyIfPlatform = {"64-bit", "true"}, 243 applyIf = { "UseCompactObjectHeaders", "false" }, 244 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 245 @IR(applyIfCPUFeature = {"asimd", "true"}, 246 applyIfAnd = {"MaxVectorSize", "16", "UseCompactObjectHeaders", "false" }, // AD file requires vector_length = 16 247 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"}) 248 @IR(applyIfCPUFeature = {"avx512_vnni", "true"}, 249 applyIf = { "UseCompactObjectHeaders", "false" }, 250 counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"}) 251 public static int[] testh(int[] out) { 252 for (int i = 0; i < ITER-2; i+=2) { 253 // Unrolled, with some swaps. 254 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); 255 out[i+1] += ((sArr2[2*i+3] * sArr1[2*i+3]) + (sArr2[2*i+2] * sArr1[2*i+2])); // swap(1 4), swap(2 3) 256 } 257 return out; 258 } 259 260 @Test 261 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"}, 262 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 263 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"}) 264 public static int[] testi(int[] out) { 265 for (int i = 0; i < ITER-2; i+=2) { 266 // Unrolled, with some swaps that prevent vectorization. 267 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+0]) + (sArr1[2*i+1] * sArr2[2*i+1])); // ok 268 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad 269 } 270 return out; 271 } 272 273 @Test 274 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"}, 275 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 276 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"}) 277 public static int[] testj(int[] out) { 278 for (int i = 0; i < ITER-2; i+=2) { 279 // Unrolled, with some swaps that prevent vectorization. 280 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad 281 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad 282 } 283 return out; 284 } 285 286 @Test 287 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"}, 288 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 289 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"}) 290 public static int[] testk(int[] out) { 291 for (int i = 0; i < ITER-2; i+=2) { 292 // Unrolled, with some swaps that prevent vectorization. 293 out[i+0] += ((sArr1[2*i+0] * sArr2[2*i+1]) + (sArr1[2*i+1] * sArr2[2*i+0])); // bad 294 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+2]) + (sArr1[2*i+3] * sArr2[2*i+3])); // ok 295 } 296 return out; 297 } 298 299 @Test 300 @IR(counts = {IRNode.MUL_ADD_S2I, "> 0"}, 301 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 302 @IR(counts = {IRNode.MUL_ADD_VS2VI, "= 0"}) 303 public static int[] testl(int[] out) { 304 for (int i = 0; i < ITER-2; i+=2) { 305 // Unrolled, with some swaps that prevent vectorization. 306 out[i+0] += ((sArr1[2*i+1] * sArr2[2*i+1]) + (sArr1[2*i+0] * sArr2[2*i+0])); // ok 307 out[i+1] += ((sArr1[2*i+2] * sArr2[2*i+3]) + (sArr1[2*i+3] * sArr2[2*i+2])); // bad 308 } 309 return out; 310 } 311 312 }