120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); });
141 tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); });
142 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
143 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
144 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
145 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
146 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
147 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
150 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
151
152 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
153 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
154 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
155 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
156 tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); });
157
158 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
159 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
160 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
161 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
162
163 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
164 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
165 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
166 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
167
168 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
169 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
170 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
171 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
172
173 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
174 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
175 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
176 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
206 tests.put("test17c", () -> { return test17c(aL.clone()); });
207 tests.put("test17d", () -> { return test17d(aL.clone()); });
208
209 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
210 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
211
212 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
213 tests.put("test20", () -> { return test20(aB.clone()); });
214
215 // Compute gold value for all test methods before compilation
216 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
217 String name = entry.getKey();
218 TestFunction test = entry.getValue();
219 Object[] gold = test.run();
220 golds.put(name, gold);
221 }
222 }
223
224 @Warmup(100)
225 @Run(test = {"test0",
226 "test1a",
227 "test1b",
228 "test2",
229 "test3",
230 "test4",
231 "test5",
232 "test6",
233 "test7",
234 "test8",
235 "test9",
236 "test10a",
237 "test10b",
238 "test10c",
239 "test10d",
240 "test10e",
241 "test11aB",
242 "test11aS",
243 "test11aI",
244 "test11aL",
245 "test11bB",
246 "test11bS",
247 "test11bI",
248 "test11bL",
249 "test11cB",
250 "test11cS",
251 "test11cI",
252 "test11cL",
253 "test11dB",
254 "test11dS",
255 "test11dI",
256 "test11dL",
257 "test12",
258 "test13aIL",
259 "test13aIB",
260 "test13aIS",
411 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
412 IRNode.STORE_VECTOR, "> 0"},
413 applyIf = {"MaxVectorSize", ">=8"},
414 applyIfPlatform = {"64-bit", "true"},
415 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
416 static Object[] test0(byte[] a, byte[] b, byte mask) {
417 for (int i = 0; i < RANGE; i+=8) {
418 // Safe to vectorize with AlignVector
419 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
420 b[i+1] = (byte)(a[i+1] & mask);
421 b[i+2] = (byte)(a[i+2] & mask);
422 b[i+3] = (byte)(a[i+3] & mask);
423 }
424 return new Object[]{ a, b };
425 }
426
427 @Test
428 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
429 IRNode.AND_VB, "> 0",
430 IRNode.STORE_VECTOR, "> 0"},
431 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
432 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
433 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
434 applyIfPlatform = {"64-bit", "true"},
435 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
436 static Object[] test1a(byte[] a, byte[] b, byte mask) {
437 for (int i = 0; i < RANGE; i+=8) {
438 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
439 b[i+1] = (byte)(a[i+1] & mask);
440 b[i+2] = (byte)(a[i+2] & mask);
441 b[i+3] = (byte)(a[i+3] & mask);
442 b[i+4] = (byte)(a[i+4] & mask);
443 b[i+5] = (byte)(a[i+5] & mask);
444 b[i+6] = (byte)(a[i+6] & mask);
445 b[i+7] = (byte)(a[i+7] & mask);
446 }
447 return new Object[]{ a, b };
448 }
449
450 @Test
451 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
452 IRNode.AND_VB, "> 0",
453 IRNode.STORE_VECTOR, "> 0"},
454 applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"},
455 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
456 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
457 applyIfPlatform = {"64-bit", "true"},
458 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
459 static Object[] test1b(byte[] a, byte[] b, byte mask) {
460 for (int i = 4; i < RANGE-8; i+=8) {
461 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8
462 b[i+1] = (byte)(a[i+1] & mask);
463 b[i+2] = (byte)(a[i+2] & mask);
464 b[i+3] = (byte)(a[i+3] & mask);
465 b[i+4] = (byte)(a[i+4] & mask);
466 b[i+5] = (byte)(a[i+5] & mask);
467 b[i+6] = (byte)(a[i+6] & mask);
468 b[i+7] = (byte)(a[i+7] & mask);
469 }
470 return new Object[]{ a, b };
471 }
472
473 @Test
474 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
475 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
476 IRNode.STORE_VECTOR, "> 0"},
477 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
478 applyIfPlatform = {"64-bit", "true"},
479 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
480 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
481 IRNode.AND_VB, "= 0",
482 IRNode.STORE_VECTOR, "= 0"},
483 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
484 applyIfPlatform = {"64-bit", "true"},
485 applyIf = {"AlignVector", "true"})
486 static Object[] test2(byte[] a, byte[] b, byte mask) {
487 for (int i = 0; i < RANGE; i+=8) {
488 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
489 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
490 b[i+4] = (byte)(a[i+4] & mask);
491 b[i+5] = (byte)(a[i+5] & mask);
492 b[i+6] = (byte)(a[i+6] & mask);
744 IRNode.STORE_VECTOR, "= 0"},
745 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
746 applyIfPlatform = {"64-bit", "true"},
747 applyIf = {"AlignVector", "true"})
748 static Object[] test10c(short[] a, short[] b, short mask) {
749 // This is not alignable with pre-loop, because of odd init.
750 // Seems not correctly handled with MaxVectorSize >= 32.
751 for (int i = 13; i < RANGE-8; i+=8) {
752 b[i+0] = (short)(a[i+0] & mask);
753 b[i+1] = (short)(a[i+1] & mask);
754 b[i+2] = (short)(a[i+2] & mask);
755 b[i+3] = (short)(a[i+3] & mask);
756 }
757 return new Object[]{ a, b };
758 }
759
760 @Test
761 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
762 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
763 IRNode.STORE_VECTOR, "> 0"},
764 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"},
765 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
766 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
767 applyIfPlatform = {"64-bit", "true"},
768 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
769 static Object[] test10d(short[] a, short[] b, short mask) {
770 for (int i = 13; i < RANGE-16; i+=8) {
771 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
772 b[i+0+3] = (short)(a[i+0+3] & mask);
773 b[i+1+3] = (short)(a[i+1+3] & mask);
774 b[i+2+3] = (short)(a[i+2+3] & mask);
775 b[i+3+3] = (short)(a[i+3+3] & mask);
776 }
777 return new Object[]{ a, b };
778 }
779
780 @Test
781 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
782 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
783 IRNode.STORE_VECTOR, "> 0"},
784 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"},
785 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
786 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
787 applyIfPlatform = {"64-bit", "true"},
788 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
789 static Object[] test10e(short[] a, short[] b, short mask) {
790 for (int i = 11; i < RANGE-16; i+=8) {
791 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16
792 b[i+0+3] = (short)(a[i+0+3] & mask);
793 b[i+1+3] = (short)(a[i+1+3] & mask);
794 b[i+2+3] = (short)(a[i+2+3] & mask);
795 b[i+3+3] = (short)(a[i+3+3] & mask);
796 }
797 return new Object[]{ a, b };
798 }
799
800 @Test
801 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
802 IRNode.AND_VB, "> 0",
803 IRNode.STORE_VECTOR, "> 0"},
804 applyIfPlatform = {"64-bit", "true"},
805 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
806 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
807 for (int i = 0; i < RANGE; i++) {
808 // always alignable
809 b[i+0] = (byte)(a[i+0] & mask);
810 }
811 return new Object[]{ a, b };
812 }
813
814 @Test
815 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
816 IRNode.AND_VS, "> 0",
817 IRNode.STORE_VECTOR, "> 0"},
818 applyIfPlatform = {"64-bit", "true"},
819 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1070 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1071 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1072 IRNode.STORE_VECTOR, "> 0"},
1073 applyIfPlatform = {"riscv64", "true"},
1074 applyIfCPUFeature = {"rvv", "true"},
1075 applyIf = {"MaxVectorSize", ">=32"})
1076 static Object[] test13aIL(int[] a, long[] b) {
1077 for (int i = 0; i < RANGE; i++) {
1078 a[i]++;
1079 b[i]++;
1080 }
1081 return new Object[]{ a, b };
1082 }
1083
1084 @Test
1085 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1086 IRNode.LOAD_VECTOR_I, "> 0",
1087 IRNode.ADD_VB, "> 0",
1088 IRNode.ADD_VI, "> 0",
1089 IRNode.STORE_VECTOR, "> 0"},
1090 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1091 applyIfPlatform = {"64-bit", "true"},
1092 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1093 static Object[] test13aIB(int[] a, byte[] b) {
1094 for (int i = 0; i < RANGE; i++) {
1095 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1096 // = 16 (or 12 if UseCompactObjectHeaders=true)
1097 a[i]++;
1098 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1099 // = 16 (or 12 if UseCompactObjectHeaders=true)
1100 b[i]++;
1101 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1102 // If UseCompactObjectHeaders=false:
1103 // a: 0, 8, 16, 24, 32, ...
1104 // b: 0, 2, 4, 6, 8, ...
1105 // -> Ok, aligns every 8th iteration.
1106 // If UseCompactObjectHeaders=true:
1107 // a: 4, 12, 20, 28, 36, ...
1108 // b: 1, 3, 5, 7, 9, ...
1109 // -> we can never align both vectors!
1110 }
1111 return new Object[]{ a, b };
1112 }
1113
1114 @Test
1115 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1116 IRNode.LOAD_VECTOR_S, "> 0",
1117 IRNode.ADD_VI, "> 0",
1118 IRNode.ADD_VS, "> 0",
1119 IRNode.STORE_VECTOR, "> 0"},
1120 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1121 applyIfPlatform = {"64-bit", "true"},
1122 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1123 static Object[] test13aIS(int[] a, short[] b) {
1124 for (int i = 0; i < RANGE; i++) {
1125 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter
1126 // = 16 (or 12 if UseCompactObjectHeaders=true)
1127 a[i]++;
1128 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1129 // = 16 (or 12 if UseCompactObjectHeaders=true)
1130 b[i]++;
1131 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1132 // If UseCompactObjectHeaders=false:
1133 // a: iter % 2 == 0
1134 // b: iter % 4 == 0
1135 // -> Ok, aligns every 4th iteration.
1136 // If UseCompactObjectHeaders=true:
1137 // a: iter % 2 = 1
1138 // b: iter % 4 = 2
1139 // -> we can never align both vectors!
1140 }
1141 return new Object[]{ a, b };
1142 }
1143
1144 @Test
1145 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1146 IRNode.LOAD_VECTOR_S, "> 0",
1147 IRNode.LOAD_VECTOR_I, "> 0",
1148 IRNode.LOAD_VECTOR_L, "> 0",
1149 IRNode.ADD_VB, "> 0",
1150 IRNode.ADD_VS, "> 0",
1151 IRNode.ADD_VI, "> 0",
1152 IRNode.ADD_VL, "> 0",
1153 IRNode.STORE_VECTOR, "> 0"},
1154 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1155 applyIfPlatform = {"64-bit", "true"},
1156 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1157 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1158 for (int i = 0; i < RANGE; i++) {
1159 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1160 // = 16 (or 12 if UseCompactObjectHeaders=true)
1161 a[i]++;
1162 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1163 // = 16 (or 12 if UseCompactObjectHeaders=true)
1164 b[i]++;
1165 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1166 // = 16 (or 12 if UseCompactObjectHeaders=true)
1167 c[i]++;
1168 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter
1169 // = 16 (always)
1170 d[i]++;
1171 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1172 // a: iter % 8 = 4
1173 // c: iter % 2 = 1
1174 // -> can never align both vectors!
1175 }
1176 return new Object[]{ a, b, c, d };
1177 }
1178
1179 @Test
1180 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1181 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1182 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1183 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1184 IRNode.STORE_VECTOR, "> 0"},
1185 applyIfPlatform = {"64-bit", "true"},
1186 applyIfCPUFeature = {"avx2", "true"})
1187 // require avx to ensure vectors are larger than what unrolling produces
1188 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1189 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1190 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1191 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1192 IRNode.STORE_VECTOR, "> 0"},
1193 applyIfPlatform = {"riscv64", "true"},
1194 applyIfCPUFeature = {"rvv", "true"},
1195 applyIf = {"MaxVectorSize", ">=32"})
1196 static Object[] test13bIL(int[] a, long[] b) {
1197 for (int i = 1; i < RANGE; i++) {
1198 a[i]++;
1199 b[i]++;
1200 }
1201 return new Object[]{ a, b };
1202 }
1203
1204 @Test
1205 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1206 IRNode.LOAD_VECTOR_I, "> 0",
1207 IRNode.ADD_VB, "> 0",
1208 IRNode.ADD_VI, "> 0",
1209 IRNode.STORE_VECTOR, "> 0"},
1210 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1211 applyIfPlatform = {"64-bit", "true"},
1212 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1213 static Object[] test13bIB(int[] a, byte[] b) {
1214 for (int i = 1; i < RANGE; i++) {
1215 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1216 // = 16 (or 12 if UseCompactObjectHeaders=true)
1217 a[i]++;
1218 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1219 // = 16 (or 12 if UseCompactObjectHeaders=true)
1220 b[i]++;
1221 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1222 // a: iter % 2 = 0
1223 // b: iter % 8 = 3
1224 // -> can never align both vectors!
1225 }
1226 return new Object[]{ a, b };
1227 }
1228
1229 @Test
1230 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1231 IRNode.LOAD_VECTOR_S, "> 0",
1232 IRNode.ADD_VI, "> 0",
1233 IRNode.ADD_VS, "> 0",
1234 IRNode.STORE_VECTOR, "> 0"},
1235 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1236 applyIfPlatform = {"64-bit", "true"},
1237 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1238 static Object[] test13bIS(int[] a, short[] b) {
1239 for (int i = 1; i < RANGE; i++) {
1240 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1241 // = 16 (or 12 if UseCompactObjectHeaders=true)
1242 a[i]++;
1243 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1244 // = 16 (or 12 if UseCompactObjectHeaders=true)
1245 b[i]++;
1246 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1247 // a: iter % 2 = 0
1248 // b: iter % 4 = 1
1249 // -> can never align both vectors!
1250 }
1251 return new Object[]{ a, b };
1252 }
1253
1254 @Test
1255 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1256 IRNode.LOAD_VECTOR_S, "> 0",
1257 IRNode.LOAD_VECTOR_I, "> 0",
1258 IRNode.LOAD_VECTOR_L, "> 0",
1259 IRNode.ADD_VB, "> 0",
1260 IRNode.ADD_VS, "> 0",
1261 IRNode.ADD_VI, "> 0",
1262 IRNode.ADD_VL, "> 0",
1263 IRNode.STORE_VECTOR, "> 0"},
1264 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1265 applyIfPlatform = {"64-bit", "true"},
1266 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1267 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1268 for (int i = 1; i < RANGE; i++) {
1269 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1270 // = 16 (or 12 if UseCompactObjectHeaders=true)
1271 a[i]++;
1272 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1273 // = 16 (or 12 if UseCompactObjectHeaders=true)
1274 b[i]++;
1275 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1276 // = 16 (or 12 if UseCompactObjectHeaders=true)
1277 c[i]++;
1278 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter
1279 // = 16 (always)
1280 d[i]++;
1281 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1282 // a: iter % 8 = 3
1283 // c: iter % 2 = 0
1284 // -> can never align both vectors!
1285 }
1286 return new Object[]{ a, b, c, d };
1287 }
1288
1289 @Test
1290 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1291 IRNode.ADD_VB, "= 0",
1292 IRNode.STORE_VECTOR, "= 0"},
1293 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1294 applyIfPlatform = {"64-bit", "true"},
1295 applyIf = {"AlignVector", "false"})
1296 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1297 IRNode.ADD_VB, "= 0",
1298 IRNode.STORE_VECTOR, "= 0"},
1299 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1300 applyIfPlatform = {"64-bit", "true"},
1301 applyIf = {"AlignVector", "true"})
1302 static Object[] test14aB(byte[] a) {
1303 // non-power-of-2 stride
1304 for (int i = 0; i < RANGE-20; i+=9) {
|
120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); });
141 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
142 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
143 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
144 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
145 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
146 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
147 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
149 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
150
151 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
152 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
153 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
154 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
155
156 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
157 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
158 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
159 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
160
161 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
162 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
163 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
164 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
165
166 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
167 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
168 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
169 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
170
171 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
172 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
173 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
174 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
204 tests.put("test17c", () -> { return test17c(aL.clone()); });
205 tests.put("test17d", () -> { return test17d(aL.clone()); });
206
207 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
208 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
209
210 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
211 tests.put("test20", () -> { return test20(aB.clone()); });
212
213 // Compute gold value for all test methods before compilation
214 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
215 String name = entry.getKey();
216 TestFunction test = entry.getValue();
217 Object[] gold = test.run();
218 golds.put(name, gold);
219 }
220 }
221
222 @Warmup(100)
223 @Run(test = {"test0",
224 "test1",
225 "test2",
226 "test3",
227 "test4",
228 "test5",
229 "test6",
230 "test7",
231 "test8",
232 "test9",
233 "test10a",
234 "test10b",
235 "test10c",
236 "test10d",
237 "test11aB",
238 "test11aS",
239 "test11aI",
240 "test11aL",
241 "test11bB",
242 "test11bS",
243 "test11bI",
244 "test11bL",
245 "test11cB",
246 "test11cS",
247 "test11cI",
248 "test11cL",
249 "test11dB",
250 "test11dS",
251 "test11dI",
252 "test11dL",
253 "test12",
254 "test13aIL",
255 "test13aIB",
256 "test13aIS",
407 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
408 IRNode.STORE_VECTOR, "> 0"},
409 applyIf = {"MaxVectorSize", ">=8"},
410 applyIfPlatform = {"64-bit", "true"},
411 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
412 static Object[] test0(byte[] a, byte[] b, byte mask) {
413 for (int i = 0; i < RANGE; i+=8) {
414 // Safe to vectorize with AlignVector
415 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
416 b[i+1] = (byte)(a[i+1] & mask);
417 b[i+2] = (byte)(a[i+2] & mask);
418 b[i+3] = (byte)(a[i+3] & mask);
419 }
420 return new Object[]{ a, b };
421 }
422
423 @Test
424 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
425 IRNode.AND_VB, "> 0",
426 IRNode.STORE_VECTOR, "> 0"},
427 applyIfPlatform = {"64-bit", "true"},
428 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
429 static Object[] test1(byte[] a, byte[] b, byte mask) {
430 for (int i = 0; i < RANGE; i+=8) {
431 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
432 b[i+1] = (byte)(a[i+1] & mask);
433 b[i+2] = (byte)(a[i+2] & mask);
434 b[i+3] = (byte)(a[i+3] & mask);
435 b[i+4] = (byte)(a[i+4] & mask);
436 b[i+5] = (byte)(a[i+5] & mask);
437 b[i+6] = (byte)(a[i+6] & mask);
438 b[i+7] = (byte)(a[i+7] & mask);
439 }
440 return new Object[]{ a, b };
441 }
442
443 @Test
444 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
445 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
446 IRNode.STORE_VECTOR, "> 0"},
447 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
448 applyIfPlatform = {"64-bit", "true"},
449 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
450 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
451 IRNode.AND_VB, "= 0",
452 IRNode.STORE_VECTOR, "= 0"},
453 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
454 applyIfPlatform = {"64-bit", "true"},
455 applyIf = {"AlignVector", "true"})
456 static Object[] test2(byte[] a, byte[] b, byte mask) {
457 for (int i = 0; i < RANGE; i+=8) {
458 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
459 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
460 b[i+4] = (byte)(a[i+4] & mask);
461 b[i+5] = (byte)(a[i+5] & mask);
462 b[i+6] = (byte)(a[i+6] & mask);
714 IRNode.STORE_VECTOR, "= 0"},
715 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
716 applyIfPlatform = {"64-bit", "true"},
717 applyIf = {"AlignVector", "true"})
718 static Object[] test10c(short[] a, short[] b, short mask) {
719 // This is not alignable with pre-loop, because of odd init.
720 // Seems not correctly handled with MaxVectorSize >= 32.
721 for (int i = 13; i < RANGE-8; i+=8) {
722 b[i+0] = (short)(a[i+0] & mask);
723 b[i+1] = (short)(a[i+1] & mask);
724 b[i+2] = (short)(a[i+2] & mask);
725 b[i+3] = (short)(a[i+3] & mask);
726 }
727 return new Object[]{ a, b };
728 }
729
730 @Test
731 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
732 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
733 IRNode.STORE_VECTOR, "> 0"},
734 applyIf = {"MaxVectorSize", ">=16"},
735 applyIfPlatform = {"64-bit", "true"},
736 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
737 static Object[] test10d(short[] a, short[] b, short mask) {
738 for (int i = 13; i < RANGE-16; i+=8) {
739 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
740 b[i+0+3] = (short)(a[i+0+3] & mask);
741 b[i+1+3] = (short)(a[i+1+3] & mask);
742 b[i+2+3] = (short)(a[i+2+3] & mask);
743 b[i+3+3] = (short)(a[i+3+3] & mask);
744 }
745 return new Object[]{ a, b };
746 }
747
748 @Test
749 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
750 IRNode.AND_VB, "> 0",
751 IRNode.STORE_VECTOR, "> 0"},
752 applyIfPlatform = {"64-bit", "true"},
753 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
754 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
755 for (int i = 0; i < RANGE; i++) {
756 // always alignable
757 b[i+0] = (byte)(a[i+0] & mask);
758 }
759 return new Object[]{ a, b };
760 }
761
762 @Test
763 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
764 IRNode.AND_VS, "> 0",
765 IRNode.STORE_VECTOR, "> 0"},
766 applyIfPlatform = {"64-bit", "true"},
767 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1018 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1019 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1020 IRNode.STORE_VECTOR, "> 0"},
1021 applyIfPlatform = {"riscv64", "true"},
1022 applyIfCPUFeature = {"rvv", "true"},
1023 applyIf = {"MaxVectorSize", ">=32"})
1024 static Object[] test13aIL(int[] a, long[] b) {
1025 for (int i = 0; i < RANGE; i++) {
1026 a[i]++;
1027 b[i]++;
1028 }
1029 return new Object[]{ a, b };
1030 }
1031
1032 @Test
1033 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1034 IRNode.LOAD_VECTOR_I, "> 0",
1035 IRNode.ADD_VB, "> 0",
1036 IRNode.ADD_VI, "> 0",
1037 IRNode.STORE_VECTOR, "> 0"},
1038 applyIfPlatform = {"64-bit", "true"},
1039 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1040 static Object[] test13aIB(int[] a, byte[] b) {
1041 for (int i = 0; i < RANGE; i++) {
1042 a[i]++;
1043 b[i]++;
1044 }
1045 return new Object[]{ a, b };
1046 }
1047
1048 @Test
1049 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1050 IRNode.LOAD_VECTOR_S, "> 0",
1051 IRNode.ADD_VI, "> 0",
1052 IRNode.ADD_VS, "> 0",
1053 IRNode.STORE_VECTOR, "> 0"},
1054 applyIfPlatform = {"64-bit", "true"},
1055 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1056 static Object[] test13aIS(int[] a, short[] b) {
1057 for (int i = 0; i < RANGE; i++) {
1058 a[i]++;
1059 b[i]++;
1060 }
1061 return new Object[]{ a, b };
1062 }
1063
1064 @Test
1065 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1066 IRNode.LOAD_VECTOR_S, "> 0",
1067 IRNode.LOAD_VECTOR_I, "> 0",
1068 IRNode.LOAD_VECTOR_L, "> 0",
1069 IRNode.ADD_VB, "> 0",
1070 IRNode.ADD_VS, "> 0",
1071 IRNode.ADD_VI, "> 0",
1072 IRNode.ADD_VL, "> 0",
1073 IRNode.STORE_VECTOR, "> 0"},
1074 applyIfPlatform = {"64-bit", "true"},
1075 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1076 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1077 for (int i = 0; i < RANGE; i++) {
1078 a[i]++;
1079 b[i]++;
1080 c[i]++;
1081 d[i]++;
1082 }
1083 return new Object[]{ a, b, c, d };
1084 }
1085
1086 @Test
1087 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1088 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1089 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1090 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1091 IRNode.STORE_VECTOR, "> 0"},
1092 applyIfPlatform = {"64-bit", "true"},
1093 applyIfCPUFeature = {"avx2", "true"})
1094 // require avx to ensure vectors are larger than what unrolling produces
1095 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1096 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1097 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1098 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1099 IRNode.STORE_VECTOR, "> 0"},
1100 applyIfPlatform = {"riscv64", "true"},
1101 applyIfCPUFeature = {"rvv", "true"},
1102 applyIf = {"MaxVectorSize", ">=32"})
1103 static Object[] test13bIL(int[] a, long[] b) {
1104 for (int i = 1; i < RANGE; i++) {
1105 a[i]++;
1106 b[i]++;
1107 }
1108 return new Object[]{ a, b };
1109 }
1110
1111 @Test
1112 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1113 IRNode.LOAD_VECTOR_I, "> 0",
1114 IRNode.ADD_VB, "> 0",
1115 IRNode.ADD_VI, "> 0",
1116 IRNode.STORE_VECTOR, "> 0"},
1117 applyIfPlatform = {"64-bit", "true"},
1118 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1119 static Object[] test13bIB(int[] a, byte[] b) {
1120 for (int i = 1; i < RANGE; i++) {
1121 a[i]++;
1122 b[i]++;
1123 }
1124 return new Object[]{ a, b };
1125 }
1126
1127 @Test
1128 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1129 IRNode.LOAD_VECTOR_S, "> 0",
1130 IRNode.ADD_VI, "> 0",
1131 IRNode.ADD_VS, "> 0",
1132 IRNode.STORE_VECTOR, "> 0"},
1133 applyIfPlatform = {"64-bit", "true"},
1134 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1135 static Object[] test13bIS(int[] a, short[] b) {
1136 for (int i = 1; i < RANGE; i++) {
1137 a[i]++;
1138 b[i]++;
1139 }
1140 return new Object[]{ a, b };
1141 }
1142
1143 @Test
1144 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1145 IRNode.LOAD_VECTOR_S, "> 0",
1146 IRNode.LOAD_VECTOR_I, "> 0",
1147 IRNode.LOAD_VECTOR_L, "> 0",
1148 IRNode.ADD_VB, "> 0",
1149 IRNode.ADD_VS, "> 0",
1150 IRNode.ADD_VI, "> 0",
1151 IRNode.ADD_VL, "> 0",
1152 IRNode.STORE_VECTOR, "> 0"},
1153 applyIfPlatform = {"64-bit", "true"},
1154 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1155 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1156 for (int i = 1; i < RANGE; i++) {
1157 a[i]++;
1158 b[i]++;
1159 c[i]++;
1160 d[i]++;
1161 }
1162 return new Object[]{ a, b, c, d };
1163 }
1164
1165 @Test
1166 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1167 IRNode.ADD_VB, "= 0",
1168 IRNode.STORE_VECTOR, "= 0"},
1169 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1170 applyIfPlatform = {"64-bit", "true"},
1171 applyIf = {"AlignVector", "false"})
1172 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1173 IRNode.ADD_VB, "= 0",
1174 IRNode.STORE_VECTOR, "= 0"},
1175 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1176 applyIfPlatform = {"64-bit", "true"},
1177 applyIf = {"AlignVector", "true"})
1178 static Object[] test14aB(byte[] a) {
1179 // non-power-of-2 stride
1180 for (int i = 0; i < RANGE-20; i+=9) {
|