121 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
122 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
123 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
124 }
125 framework.start();
126 }
127
128 public TestAlignVector() {
129 // Generate input once
130 aB = generateB();
131 bB = generateB();
132 aS = generateS();
133 bS = generateS();
134 aI = generateI();
135 bI = generateI();
136 aL = generateL();
137 bL = generateL();
138
139 // Add all tests to list
140 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
141 tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); });
142 tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); });
143 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
144 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
145 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
146 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
147 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
148 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
150 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
151 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
152
153 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
154 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
155 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
156 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
157 tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); });
158
159 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
160 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
161 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
162 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
163
164 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
165 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
166 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
167 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
168
169 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
170 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
171 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
172 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
173
174 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
175 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
176 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
177 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
207 tests.put("test17c", () -> { return test17c(aL.clone()); });
208 tests.put("test17d", () -> { return test17d(aL.clone()); });
209
210 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
211 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
212
213 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
214 tests.put("test20", () -> { return test20(aB.clone()); });
215
216 // Compute gold value for all test methods before compilation
217 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
218 String name = entry.getKey();
219 TestFunction test = entry.getValue();
220 Object[] gold = test.run();
221 golds.put(name, gold);
222 }
223 }
224
225 @Warmup(100)
226 @Run(test = {"test0",
227 "test1a",
228 "test1b",
229 "test2",
230 "test3",
231 "test4",
232 "test5",
233 "test6",
234 "test7",
235 "test8",
236 "test9",
237 "test10a",
238 "test10b",
239 "test10c",
240 "test10d",
241 "test10e",
242 "test11aB",
243 "test11aS",
244 "test11aI",
245 "test11aL",
246 "test11bB",
247 "test11bS",
248 "test11bI",
249 "test11bL",
250 "test11cB",
251 "test11cS",
252 "test11cI",
253 "test11cL",
254 "test11dB",
255 "test11dS",
256 "test11dI",
257 "test11dL",
258 "test12",
259 "test13aIL",
260 "test13aIB",
261 "test13aIS",
412 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
413 IRNode.STORE_VECTOR, "> 0"},
414 applyIf = {"MaxVectorSize", ">=8"},
415 applyIfPlatform = {"64-bit", "true"},
416 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
417 static Object[] test0(byte[] a, byte[] b, byte mask) {
418 for (int i = 0; i < RANGE; i+=8) {
419 // Safe to vectorize with AlignVector
420 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
421 b[i+1] = (byte)(a[i+1] & mask);
422 b[i+2] = (byte)(a[i+2] & mask);
423 b[i+3] = (byte)(a[i+3] & mask);
424 }
425 return new Object[]{ a, b };
426 }
427
428 @Test
429 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
430 IRNode.AND_VB, "> 0",
431 IRNode.STORE_VECTOR, "> 0"},
432 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
433 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
434 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
435 applyIfPlatform = {"64-bit", "true"},
436 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
437 static Object[] test1a(byte[] a, byte[] b, byte mask) {
438 for (int i = 0; i < RANGE; i+=8) {
439 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
440 b[i+1] = (byte)(a[i+1] & mask);
441 b[i+2] = (byte)(a[i+2] & mask);
442 b[i+3] = (byte)(a[i+3] & mask);
443 b[i+4] = (byte)(a[i+4] & mask);
444 b[i+5] = (byte)(a[i+5] & mask);
445 b[i+6] = (byte)(a[i+6] & mask);
446 b[i+7] = (byte)(a[i+7] & mask);
447 }
448 return new Object[]{ a, b };
449 }
450
451 @Test
452 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
453 IRNode.AND_VB, "> 0",
454 IRNode.STORE_VECTOR, "> 0"},
455 applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"},
456 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
457 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
458 applyIfPlatform = {"64-bit", "true"},
459 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
460 static Object[] test1b(byte[] a, byte[] b, byte mask) {
461 for (int i = 4; i < RANGE-8; i+=8) {
462 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8
463 b[i+1] = (byte)(a[i+1] & mask);
464 b[i+2] = (byte)(a[i+2] & mask);
465 b[i+3] = (byte)(a[i+3] & mask);
466 b[i+4] = (byte)(a[i+4] & mask);
467 b[i+5] = (byte)(a[i+5] & mask);
468 b[i+6] = (byte)(a[i+6] & mask);
469 b[i+7] = (byte)(a[i+7] & mask);
470 }
471 return new Object[]{ a, b };
472 }
473
474 @Test
475 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
476 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
477 IRNode.STORE_VECTOR, "> 0"},
478 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
479 applyIfPlatform = {"64-bit", "true"},
480 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
481 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
482 IRNode.AND_VB, "= 0",
483 IRNode.STORE_VECTOR, "= 0"},
484 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
485 applyIfPlatform = {"64-bit", "true"},
486 applyIf = {"AlignVector", "true"})
487 static Object[] test2(byte[] a, byte[] b, byte mask) {
488 for (int i = 0; i < RANGE; i+=8) {
489 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
490 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
491 b[i+4] = (byte)(a[i+4] & mask);
492 b[i+5] = (byte)(a[i+5] & mask);
493 b[i+6] = (byte)(a[i+6] & mask);
745 IRNode.STORE_VECTOR, "= 0"},
746 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
747 applyIfPlatform = {"64-bit", "true"},
748 applyIf = {"AlignVector", "true"})
749 static Object[] test10c(short[] a, short[] b, short mask) {
750 // This is not alignable with pre-loop, because of odd init.
751 // Seems not correctly handled with MaxVectorSize >= 32.
752 for (int i = 13; i < RANGE-8; i+=8) {
753 b[i+0] = (short)(a[i+0] & mask);
754 b[i+1] = (short)(a[i+1] & mask);
755 b[i+2] = (short)(a[i+2] & mask);
756 b[i+3] = (short)(a[i+3] & mask);
757 }
758 return new Object[]{ a, b };
759 }
760
761 @Test
762 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
763 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
764 IRNode.STORE_VECTOR, "> 0"},
765 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"},
766 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
767 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
768 applyIfPlatform = {"64-bit", "true"},
769 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
770 static Object[] test10d(short[] a, short[] b, short mask) {
771 for (int i = 13; i < RANGE-16; i+=8) {
772 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
773 b[i+0+3] = (short)(a[i+0+3] & mask);
774 b[i+1+3] = (short)(a[i+1+3] & mask);
775 b[i+2+3] = (short)(a[i+2+3] & mask);
776 b[i+3+3] = (short)(a[i+3+3] & mask);
777 }
778 return new Object[]{ a, b };
779 }
780
781 @Test
782 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
783 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
784 IRNode.STORE_VECTOR, "> 0"},
785 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"},
786 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
787 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
788 applyIfPlatform = {"64-bit", "true"},
789 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
790 static Object[] test10e(short[] a, short[] b, short mask) {
791 for (int i = 11; i < RANGE-16; i+=8) {
792 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16
793 b[i+0+3] = (short)(a[i+0+3] & mask);
794 b[i+1+3] = (short)(a[i+1+3] & mask);
795 b[i+2+3] = (short)(a[i+2+3] & mask);
796 b[i+3+3] = (short)(a[i+3+3] & mask);
797 }
798 return new Object[]{ a, b };
799 }
800
801 @Test
802 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
803 IRNode.AND_VB, "> 0",
804 IRNode.STORE_VECTOR, "> 0"},
805 applyIfPlatform = {"64-bit", "true"},
806 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
807 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
808 for (int i = 0; i < RANGE; i++) {
809 // always alignable
810 b[i+0] = (byte)(a[i+0] & mask);
811 }
812 return new Object[]{ a, b };
813 }
814
815 @Test
816 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
817 IRNode.AND_VS, "> 0",
818 IRNode.STORE_VECTOR, "> 0"},
819 applyIfPlatform = {"64-bit", "true"},
820 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1071 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1072 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1073 IRNode.STORE_VECTOR, "> 0"},
1074 applyIfPlatform = {"riscv64", "true"},
1075 applyIfCPUFeature = {"rvv", "true"},
1076 applyIf = {"MaxVectorSize", ">=32"})
1077 static Object[] test13aIL(int[] a, long[] b) {
1078 for (int i = 0; i < RANGE; i++) {
1079 a[i]++;
1080 b[i]++;
1081 }
1082 return new Object[]{ a, b };
1083 }
1084
1085 @Test
1086 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1087 IRNode.LOAD_VECTOR_I, "> 0",
1088 IRNode.ADD_VB, "> 0",
1089 IRNode.ADD_VI, "> 0",
1090 IRNode.STORE_VECTOR, "> 0"},
1091 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1092 applyIfPlatform = {"64-bit", "true"},
1093 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1094 static Object[] test13aIB(int[] a, byte[] b) {
1095 for (int i = 0; i < RANGE; i++) {
1096 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1097 // = 16 (or 12 if UseCompactObjectHeaders=true)
1098 a[i]++;
1099 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1100 // = 16 (or 12 if UseCompactObjectHeaders=true)
1101 b[i]++;
1102 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1103 // If UseCompactObjectHeaders=false:
1104 // a: 0, 8, 16, 24, 32, ...
1105 // b: 0, 2, 4, 6, 8, ...
1106 // -> Ok, aligns every 8th iteration.
1107 // If UseCompactObjectHeaders=true:
1108 // a: 4, 12, 20, 28, 36, ...
1109 // b: 1, 3, 5, 7, 9, ...
1110 // -> we can never align both vectors!
1111 }
1112 return new Object[]{ a, b };
1113 }
1114
1115 @Test
1116 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1117 IRNode.LOAD_VECTOR_S, "> 0",
1118 IRNode.ADD_VI, "> 0",
1119 IRNode.ADD_VS, "> 0",
1120 IRNode.STORE_VECTOR, "> 0"},
1121 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1122 applyIfPlatform = {"64-bit", "true"},
1123 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1124 static Object[] test13aIS(int[] a, short[] b) {
1125 for (int i = 0; i < RANGE; i++) {
1126 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter
1127 // = 16 (or 12 if UseCompactObjectHeaders=true)
1128 a[i]++;
1129 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1130 // = 16 (or 12 if UseCompactObjectHeaders=true)
1131 b[i]++;
1132 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1133 // If UseCompactObjectHeaders=false:
1134 // a: iter % 2 == 0
1135 // b: iter % 4 == 0
1136 // -> Ok, aligns every 4th iteration.
1137 // If UseCompactObjectHeaders=true:
1138 // a: iter % 2 = 1
1139 // b: iter % 4 = 2
1140 // -> we can never align both vectors!
1141 }
1142 return new Object[]{ a, b };
1143 }
1144
1145 @Test
1146 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1147 IRNode.LOAD_VECTOR_S, "> 0",
1148 IRNode.LOAD_VECTOR_I, "> 0",
1149 IRNode.LOAD_VECTOR_L, "> 0",
1150 IRNode.ADD_VB, "> 0",
1151 IRNode.ADD_VS, "> 0",
1152 IRNode.ADD_VI, "> 0",
1153 IRNode.ADD_VL, "> 0",
1154 IRNode.STORE_VECTOR, "> 0"},
1155 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1156 applyIfPlatform = {"64-bit", "true"},
1157 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1158 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1159 for (int i = 0; i < RANGE; i++) {
1160 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1161 // = 16 (or 12 if UseCompactObjectHeaders=true)
1162 a[i]++;
1163 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1164 // = 16 (or 12 if UseCompactObjectHeaders=true)
1165 b[i]++;
1166 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1167 // = 16 (or 12 if UseCompactObjectHeaders=true)
1168 c[i]++;
1169 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter
1170 // = 16 (always)
1171 d[i]++;
1172 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1173 // a: iter % 8 = 4
1174 // c: iter % 2 = 1
1175 // -> can never align both vectors!
1176 }
1177 return new Object[]{ a, b, c, d };
1178 }
1179
1180 @Test
1181 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1182 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1183 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1184 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1185 IRNode.STORE_VECTOR, "> 0"},
1186 applyIfPlatform = {"64-bit", "true"},
1187 applyIfCPUFeature = {"avx2", "true"})
1188 // require avx to ensure vectors are larger than what unrolling produces
1189 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1190 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1191 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1192 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1193 IRNode.STORE_VECTOR, "> 0"},
1194 applyIfPlatform = {"riscv64", "true"},
1195 applyIfCPUFeature = {"rvv", "true"},
1196 applyIf = {"MaxVectorSize", ">=32"})
1197 static Object[] test13bIL(int[] a, long[] b) {
1198 for (int i = 1; i < RANGE; i++) {
1199 a[i]++;
1200 b[i]++;
1201 }
1202 return new Object[]{ a, b };
1203 }
1204
1205 @Test
1206 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1207 IRNode.LOAD_VECTOR_I, "> 0",
1208 IRNode.ADD_VB, "> 0",
1209 IRNode.ADD_VI, "> 0",
1210 IRNode.STORE_VECTOR, "> 0"},
1211 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1212 applyIfPlatform = {"64-bit", "true"},
1213 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1214 static Object[] test13bIB(int[] a, byte[] b) {
1215 for (int i = 1; i < RANGE; i++) {
1216 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1217 // = 16 (or 12 if UseCompactObjectHeaders=true)
1218 a[i]++;
1219 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1220 // = 16 (or 12 if UseCompactObjectHeaders=true)
1221 b[i]++;
1222 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1223 // a: iter % 2 = 0
1224 // b: iter % 8 = 3
1225 // -> can never align both vectors!
1226 }
1227 return new Object[]{ a, b };
1228 }
1229
1230 @Test
1231 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1232 IRNode.LOAD_VECTOR_S, "> 0",
1233 IRNode.ADD_VI, "> 0",
1234 IRNode.ADD_VS, "> 0",
1235 IRNode.STORE_VECTOR, "> 0"},
1236 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1237 applyIfPlatform = {"64-bit", "true"},
1238 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1239 static Object[] test13bIS(int[] a, short[] b) {
1240 for (int i = 1; i < RANGE; i++) {
1241 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1242 // = 16 (or 12 if UseCompactObjectHeaders=true)
1243 a[i]++;
1244 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1245 // = 16 (or 12 if UseCompactObjectHeaders=true)
1246 b[i]++;
1247 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1248 // a: iter % 2 = 0
1249 // b: iter % 4 = 1
1250 // -> can never align both vectors!
1251 }
1252 return new Object[]{ a, b };
1253 }
1254
1255 @Test
1256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1257 IRNode.LOAD_VECTOR_S, "> 0",
1258 IRNode.LOAD_VECTOR_I, "> 0",
1259 IRNode.LOAD_VECTOR_L, "> 0",
1260 IRNode.ADD_VB, "> 0",
1261 IRNode.ADD_VS, "> 0",
1262 IRNode.ADD_VI, "> 0",
1263 IRNode.ADD_VL, "> 0",
1264 IRNode.STORE_VECTOR, "> 0"},
1265 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1266 applyIfPlatform = {"64-bit", "true"},
1267 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1268 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1269 for (int i = 1; i < RANGE; i++) {
1270 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1271 // = 16 (or 12 if UseCompactObjectHeaders=true)
1272 a[i]++;
1273 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1274 // = 16 (or 12 if UseCompactObjectHeaders=true)
1275 b[i]++;
1276 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1277 // = 16 (or 12 if UseCompactObjectHeaders=true)
1278 c[i]++;
1279 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter
1280 // = 16 (always)
1281 d[i]++;
1282 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1283 // a: iter % 8 = 3
1284 // c: iter % 2 = 0
1285 // -> can never align both vectors!
1286 }
1287 return new Object[]{ a, b, c, d };
1288 }
1289
1290 @Test
1291 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1292 IRNode.ADD_VB, "= 0",
1293 IRNode.STORE_VECTOR, "= 0"},
1294 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1295 applyIfPlatform = {"64-bit", "true"},
1296 applyIf = {"AlignVector", "false"})
1297 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1298 IRNode.ADD_VB, "= 0",
1299 IRNode.STORE_VECTOR, "= 0"},
1300 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1301 applyIfPlatform = {"64-bit", "true"},
1302 applyIf = {"AlignVector", "true"})
1303 static Object[] test14aB(byte[] a) {
1304 // non-power-of-2 stride
1305 for (int i = 0; i < RANGE-20; i+=9) {
|
121 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
122 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
123 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
124 }
125 framework.start();
126 }
127
128 public TestAlignVector() {
129 // Generate input once
130 aB = generateB();
131 bB = generateB();
132 aS = generateS();
133 bS = generateS();
134 aI = generateI();
135 bI = generateI();
136 aL = generateL();
137 bL = generateL();
138
139 // Add all tests to list
140 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
141 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); });
142 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
143 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
144 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
145 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
146 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
147 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
150 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
151
152 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
153 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
154 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
155 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
156
157 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
158 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
159 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
160 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
161
162 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
163 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
164 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
165 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
166
167 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
168 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
169 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
170 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
171
172 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
173 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
174 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
175 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
205 tests.put("test17c", () -> { return test17c(aL.clone()); });
206 tests.put("test17d", () -> { return test17d(aL.clone()); });
207
208 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
209 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
210
211 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
212 tests.put("test20", () -> { return test20(aB.clone()); });
213
214 // Compute gold value for all test methods before compilation
215 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
216 String name = entry.getKey();
217 TestFunction test = entry.getValue();
218 Object[] gold = test.run();
219 golds.put(name, gold);
220 }
221 }
222
223 @Warmup(100)
224 @Run(test = {"test0",
225 "test1",
226 "test2",
227 "test3",
228 "test4",
229 "test5",
230 "test6",
231 "test7",
232 "test8",
233 "test9",
234 "test10a",
235 "test10b",
236 "test10c",
237 "test10d",
238 "test11aB",
239 "test11aS",
240 "test11aI",
241 "test11aL",
242 "test11bB",
243 "test11bS",
244 "test11bI",
245 "test11bL",
246 "test11cB",
247 "test11cS",
248 "test11cI",
249 "test11cL",
250 "test11dB",
251 "test11dS",
252 "test11dI",
253 "test11dL",
254 "test12",
255 "test13aIL",
256 "test13aIB",
257 "test13aIS",
408 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
409 IRNode.STORE_VECTOR, "> 0"},
410 applyIf = {"MaxVectorSize", ">=8"},
411 applyIfPlatform = {"64-bit", "true"},
412 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
413 static Object[] test0(byte[] a, byte[] b, byte mask) {
414 for (int i = 0; i < RANGE; i+=8) {
415 // Safe to vectorize with AlignVector
416 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
417 b[i+1] = (byte)(a[i+1] & mask);
418 b[i+2] = (byte)(a[i+2] & mask);
419 b[i+3] = (byte)(a[i+3] & mask);
420 }
421 return new Object[]{ a, b };
422 }
423
424 @Test
425 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
426 IRNode.AND_VB, "> 0",
427 IRNode.STORE_VECTOR, "> 0"},
428 applyIfPlatform = {"64-bit", "true"},
429 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
430 static Object[] test1(byte[] a, byte[] b, byte mask) {
431 for (int i = 0; i < RANGE; i+=8) {
432 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
433 b[i+1] = (byte)(a[i+1] & mask);
434 b[i+2] = (byte)(a[i+2] & mask);
435 b[i+3] = (byte)(a[i+3] & mask);
436 b[i+4] = (byte)(a[i+4] & mask);
437 b[i+5] = (byte)(a[i+5] & mask);
438 b[i+6] = (byte)(a[i+6] & mask);
439 b[i+7] = (byte)(a[i+7] & mask);
440 }
441 return new Object[]{ a, b };
442 }
443
444 @Test
445 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
446 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
447 IRNode.STORE_VECTOR, "> 0"},
448 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
449 applyIfPlatform = {"64-bit", "true"},
450 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
451 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
452 IRNode.AND_VB, "= 0",
453 IRNode.STORE_VECTOR, "= 0"},
454 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
455 applyIfPlatform = {"64-bit", "true"},
456 applyIf = {"AlignVector", "true"})
457 static Object[] test2(byte[] a, byte[] b, byte mask) {
458 for (int i = 0; i < RANGE; i+=8) {
459 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
460 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
461 b[i+4] = (byte)(a[i+4] & mask);
462 b[i+5] = (byte)(a[i+5] & mask);
463 b[i+6] = (byte)(a[i+6] & mask);
715 IRNode.STORE_VECTOR, "= 0"},
716 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
717 applyIfPlatform = {"64-bit", "true"},
718 applyIf = {"AlignVector", "true"})
719 static Object[] test10c(short[] a, short[] b, short mask) {
720 // This is not alignable with pre-loop, because of odd init.
721 // Seems not correctly handled with MaxVectorSize >= 32.
722 for (int i = 13; i < RANGE-8; i+=8) {
723 b[i+0] = (short)(a[i+0] & mask);
724 b[i+1] = (short)(a[i+1] & mask);
725 b[i+2] = (short)(a[i+2] & mask);
726 b[i+3] = (short)(a[i+3] & mask);
727 }
728 return new Object[]{ a, b };
729 }
730
731 @Test
732 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
733 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
734 IRNode.STORE_VECTOR, "> 0"},
735 applyIf = {"MaxVectorSize", ">=16"},
736 applyIfPlatform = {"64-bit", "true"},
737 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
738 static Object[] test10d(short[] a, short[] b, short mask) {
739 for (int i = 13; i < RANGE-16; i+=8) {
740 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
741 b[i+0+3] = (short)(a[i+0+3] & mask);
742 b[i+1+3] = (short)(a[i+1+3] & mask);
743 b[i+2+3] = (short)(a[i+2+3] & mask);
744 b[i+3+3] = (short)(a[i+3+3] & mask);
745 }
746 return new Object[]{ a, b };
747 }
748
749 @Test
750 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
751 IRNode.AND_VB, "> 0",
752 IRNode.STORE_VECTOR, "> 0"},
753 applyIfPlatform = {"64-bit", "true"},
754 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
755 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
756 for (int i = 0; i < RANGE; i++) {
757 // always alignable
758 b[i+0] = (byte)(a[i+0] & mask);
759 }
760 return new Object[]{ a, b };
761 }
762
763 @Test
764 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
765 IRNode.AND_VS, "> 0",
766 IRNode.STORE_VECTOR, "> 0"},
767 applyIfPlatform = {"64-bit", "true"},
768 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1019 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1020 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1021 IRNode.STORE_VECTOR, "> 0"},
1022 applyIfPlatform = {"riscv64", "true"},
1023 applyIfCPUFeature = {"rvv", "true"},
1024 applyIf = {"MaxVectorSize", ">=32"})
1025 static Object[] test13aIL(int[] a, long[] b) {
1026 for (int i = 0; i < RANGE; i++) {
1027 a[i]++;
1028 b[i]++;
1029 }
1030 return new Object[]{ a, b };
1031 }
1032
1033 @Test
1034 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1035 IRNode.LOAD_VECTOR_I, "> 0",
1036 IRNode.ADD_VB, "> 0",
1037 IRNode.ADD_VI, "> 0",
1038 IRNode.STORE_VECTOR, "> 0"},
1039 applyIfPlatform = {"64-bit", "true"},
1040 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1041 static Object[] test13aIB(int[] a, byte[] b) {
1042 for (int i = 0; i < RANGE; i++) {
1043 a[i]++;
1044 b[i]++;
1045 }
1046 return new Object[]{ a, b };
1047 }
1048
1049 @Test
1050 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1051 IRNode.LOAD_VECTOR_S, "> 0",
1052 IRNode.ADD_VI, "> 0",
1053 IRNode.ADD_VS, "> 0",
1054 IRNode.STORE_VECTOR, "> 0"},
1055 applyIfPlatform = {"64-bit", "true"},
1056 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1057 static Object[] test13aIS(int[] a, short[] b) {
1058 for (int i = 0; i < RANGE; i++) {
1059 a[i]++;
1060 b[i]++;
1061 }
1062 return new Object[]{ a, b };
1063 }
1064
1065 @Test
1066 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1067 IRNode.LOAD_VECTOR_S, "> 0",
1068 IRNode.LOAD_VECTOR_I, "> 0",
1069 IRNode.LOAD_VECTOR_L, "> 0",
1070 IRNode.ADD_VB, "> 0",
1071 IRNode.ADD_VS, "> 0",
1072 IRNode.ADD_VI, "> 0",
1073 IRNode.ADD_VL, "> 0",
1074 IRNode.STORE_VECTOR, "> 0"},
1075 applyIfPlatform = {"64-bit", "true"},
1076 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1077 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1078 for (int i = 0; i < RANGE; i++) {
1079 a[i]++;
1080 b[i]++;
1081 c[i]++;
1082 d[i]++;
1083 }
1084 return new Object[]{ a, b, c, d };
1085 }
1086
1087 @Test
1088 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1089 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1090 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1091 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1092 IRNode.STORE_VECTOR, "> 0"},
1093 applyIfPlatform = {"64-bit", "true"},
1094 applyIfCPUFeature = {"avx2", "true"})
1095 // require avx to ensure vectors are larger than what unrolling produces
1096 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1097 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1098 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1099 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1100 IRNode.STORE_VECTOR, "> 0"},
1101 applyIfPlatform = {"riscv64", "true"},
1102 applyIfCPUFeature = {"rvv", "true"},
1103 applyIf = {"MaxVectorSize", ">=32"})
1104 static Object[] test13bIL(int[] a, long[] b) {
1105 for (int i = 1; i < RANGE; i++) {
1106 a[i]++;
1107 b[i]++;
1108 }
1109 return new Object[]{ a, b };
1110 }
1111
1112 @Test
1113 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1114 IRNode.LOAD_VECTOR_I, "> 0",
1115 IRNode.ADD_VB, "> 0",
1116 IRNode.ADD_VI, "> 0",
1117 IRNode.STORE_VECTOR, "> 0"},
1118 applyIfPlatform = {"64-bit", "true"},
1119 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1120 static Object[] test13bIB(int[] a, byte[] b) {
1121 for (int i = 1; i < RANGE; i++) {
1122 a[i]++;
1123 b[i]++;
1124 }
1125 return new Object[]{ a, b };
1126 }
1127
1128 @Test
1129 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1130 IRNode.LOAD_VECTOR_S, "> 0",
1131 IRNode.ADD_VI, "> 0",
1132 IRNode.ADD_VS, "> 0",
1133 IRNode.STORE_VECTOR, "> 0"},
1134 applyIfPlatform = {"64-bit", "true"},
1135 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1136 static Object[] test13bIS(int[] a, short[] b) {
1137 for (int i = 1; i < RANGE; i++) {
1138 a[i]++;
1139 b[i]++;
1140 }
1141 return new Object[]{ a, b };
1142 }
1143
1144 @Test
1145 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1146 IRNode.LOAD_VECTOR_S, "> 0",
1147 IRNode.LOAD_VECTOR_I, "> 0",
1148 IRNode.LOAD_VECTOR_L, "> 0",
1149 IRNode.ADD_VB, "> 0",
1150 IRNode.ADD_VS, "> 0",
1151 IRNode.ADD_VI, "> 0",
1152 IRNode.ADD_VL, "> 0",
1153 IRNode.STORE_VECTOR, "> 0"},
1154 applyIfPlatform = {"64-bit", "true"},
1155 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1156 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1157 for (int i = 1; i < RANGE; i++) {
1158 a[i]++;
1159 b[i]++;
1160 c[i]++;
1161 d[i]++;
1162 }
1163 return new Object[]{ a, b, c, d };
1164 }
1165
1166 @Test
1167 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1168 IRNode.ADD_VB, "= 0",
1169 IRNode.STORE_VECTOR, "= 0"},
1170 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1171 applyIfPlatform = {"64-bit", "true"},
1172 applyIf = {"AlignVector", "false"})
1173 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1174 IRNode.ADD_VB, "= 0",
1175 IRNode.STORE_VECTOR, "= 0"},
1176 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1177 applyIfPlatform = {"64-bit", "true"},
1178 applyIf = {"AlignVector", "true"})
1179 static Object[] test14aB(byte[] a) {
1180 // non-power-of-2 stride
1181 for (int i = 0; i < RANGE-20; i+=9) {
|