120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); });
141 tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); });
142 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
143 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
144 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
145 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
146 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
147 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
150 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
151
152 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
153 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
154 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
155 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
156 tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); });
157
158 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
159 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
160 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
161 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
162
163 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
164 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
165 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
166 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
167
168 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
169 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
170 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
171 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
172
173 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
174 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
175 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
176 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
206 tests.put("test17c", () -> { return test17c(aL.clone()); });
207 tests.put("test17d", () -> { return test17d(aL.clone()); });
208
209 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
210 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
211
212 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
213 tests.put("test20", () -> { return test20(aB.clone()); });
214
215 // Compute gold value for all test methods before compilation
216 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
217 String name = entry.getKey();
218 TestFunction test = entry.getValue();
219 Object[] gold = test.run();
220 golds.put(name, gold);
221 }
222 }
223
224 @Warmup(100)
225 @Run(test = {"test0",
226 "test1a",
227 "test1b",
228 "test2",
229 "test3",
230 "test4",
231 "test5",
232 "test6",
233 "test7",
234 "test8",
235 "test9",
236 "test10a",
237 "test10b",
238 "test10c",
239 "test10d",
240 "test10e",
241 "test11aB",
242 "test11aS",
243 "test11aI",
244 "test11aL",
245 "test11bB",
246 "test11bS",
247 "test11bI",
248 "test11bL",
249 "test11cB",
250 "test11cS",
251 "test11cI",
252 "test11cL",
253 "test11dB",
254 "test11dS",
255 "test11dI",
256 "test11dL",
257 "test12",
258 "test13aIL",
259 "test13aIB",
260 "test13aIS",
411 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
412 IRNode.STORE_VECTOR, "> 0"},
413 applyIf = {"MaxVectorSize", ">=8"},
414 applyIfPlatform = {"64-bit", "true"},
415 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
416 static Object[] test0(byte[] a, byte[] b, byte mask) {
417 for (int i = 0; i < RANGE; i+=8) {
418 // Safe to vectorize with AlignVector
419 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
420 b[i+1] = (byte)(a[i+1] & mask);
421 b[i+2] = (byte)(a[i+2] & mask);
422 b[i+3] = (byte)(a[i+3] & mask);
423 }
424 return new Object[]{ a, b };
425 }
426
427 @Test
428 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
429 IRNode.AND_VB, "> 0",
430 IRNode.STORE_VECTOR, "> 0"},
431 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
432 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
433 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
434 applyIfPlatform = {"64-bit", "true"},
435 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
436 static Object[] test1a(byte[] a, byte[] b, byte mask) {
437 for (int i = 0; i < RANGE; i+=8) {
438 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
439 b[i+1] = (byte)(a[i+1] & mask);
440 b[i+2] = (byte)(a[i+2] & mask);
441 b[i+3] = (byte)(a[i+3] & mask);
442 b[i+4] = (byte)(a[i+4] & mask);
443 b[i+5] = (byte)(a[i+5] & mask);
444 b[i+6] = (byte)(a[i+6] & mask);
445 b[i+7] = (byte)(a[i+7] & mask);
446 }
447 return new Object[]{ a, b };
448 }
449
450 @Test
451 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
452 IRNode.AND_VB, "> 0",
453 IRNode.STORE_VECTOR, "> 0"},
454 applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"},
455 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
456 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
457 applyIfPlatform = {"64-bit", "true"},
458 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
459 static Object[] test1b(byte[] a, byte[] b, byte mask) {
460 for (int i = 4; i < RANGE-8; i+=8) {
461 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8
462 b[i+1] = (byte)(a[i+1] & mask);
463 b[i+2] = (byte)(a[i+2] & mask);
464 b[i+3] = (byte)(a[i+3] & mask);
465 b[i+4] = (byte)(a[i+4] & mask);
466 b[i+5] = (byte)(a[i+5] & mask);
467 b[i+6] = (byte)(a[i+6] & mask);
468 b[i+7] = (byte)(a[i+7] & mask);
469 }
470 return new Object[]{ a, b };
471 }
472
473 @Test
474 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
475 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
476 IRNode.STORE_VECTOR, "> 0"},
477 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
478 applyIfPlatform = {"64-bit", "true"},
479 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
480 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
481 IRNode.AND_VB, "= 0",
482 IRNode.STORE_VECTOR, "= 0"},
483 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
484 applyIfPlatform = {"64-bit", "true"},
485 applyIf = {"AlignVector", "true"})
486 static Object[] test2(byte[] a, byte[] b, byte mask) {
487 for (int i = 0; i < RANGE; i+=8) {
488 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
489 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
490 b[i+4] = (byte)(a[i+4] & mask);
491 b[i+5] = (byte)(a[i+5] & mask);
492 b[i+6] = (byte)(a[i+6] & mask);
744 IRNode.STORE_VECTOR, "= 0"},
745 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
746 applyIfPlatform = {"64-bit", "true"},
747 applyIf = {"AlignVector", "true"})
748 static Object[] test10c(short[] a, short[] b, short mask) {
749 // This is not alignable with pre-loop, because of odd init.
750 // Seems not correctly handled with MaxVectorSize >= 32.
751 for (int i = 13; i < RANGE-8; i+=8) {
752 b[i+0] = (short)(a[i+0] & mask);
753 b[i+1] = (short)(a[i+1] & mask);
754 b[i+2] = (short)(a[i+2] & mask);
755 b[i+3] = (short)(a[i+3] & mask);
756 }
757 return new Object[]{ a, b };
758 }
759
760 @Test
761 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
762 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
763 IRNode.STORE_VECTOR, "> 0"},
764 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"},
765 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
766 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
767 applyIfPlatform = {"64-bit", "true"},
768 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
769 static Object[] test10d(short[] a, short[] b, short mask) {
770 for (int i = 13; i < RANGE-16; i+=8) {
771 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
772 b[i+0+3] = (short)(a[i+0+3] & mask);
773 b[i+1+3] = (short)(a[i+1+3] & mask);
774 b[i+2+3] = (short)(a[i+2+3] & mask);
775 b[i+3+3] = (short)(a[i+3+3] & mask);
776 }
777 return new Object[]{ a, b };
778 }
779
780 @Test
781 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
782 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
783 IRNode.STORE_VECTOR, "> 0"},
784 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"},
785 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
786 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
787 applyIfPlatform = {"64-bit", "true"},
788 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
789 static Object[] test10e(short[] a, short[] b, short mask) {
790 for (int i = 11; i < RANGE-16; i+=8) {
791 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16
792 b[i+0+3] = (short)(a[i+0+3] & mask);
793 b[i+1+3] = (short)(a[i+1+3] & mask);
794 b[i+2+3] = (short)(a[i+2+3] & mask);
795 b[i+3+3] = (short)(a[i+3+3] & mask);
796 }
797 return new Object[]{ a, b };
798 }
799
800 @Test
801 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
802 IRNode.AND_VB, "> 0",
803 IRNode.STORE_VECTOR, "> 0"},
804 applyIfPlatform = {"64-bit", "true"},
805 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
806 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
807 for (int i = 0; i < RANGE; i++) {
808 // always alignable
809 b[i+0] = (byte)(a[i+0] & mask);
810 }
811 return new Object[]{ a, b };
812 }
813
814 @Test
815 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
816 IRNode.AND_VS, "> 0",
817 IRNode.STORE_VECTOR, "> 0"},
818 applyIfPlatform = {"64-bit", "true"},
819 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
1061 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1062 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1063 IRNode.STORE_VECTOR, "> 0"},
1064 applyIfPlatform = {"64-bit", "true"},
1065 applyIfCPUFeatureOr = {"avx2", "true"})
1066 // require avx to ensure vectors are larger than what unrolling produces
1067 static Object[] test13aIL(int[] a, long[] b) {
1068 for (int i = 0; i < RANGE; i++) {
1069 a[i]++;
1070 b[i]++;
1071 }
1072 return new Object[]{ a, b };
1073 }
1074
1075 @Test
1076 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1077 IRNode.LOAD_VECTOR_I, "> 0",
1078 IRNode.ADD_VB, "> 0",
1079 IRNode.ADD_VI, "> 0",
1080 IRNode.STORE_VECTOR, "> 0"},
1081 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1082 applyIfPlatform = {"64-bit", "true"},
1083 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1084 static Object[] test13aIB(int[] a, byte[] b) {
1085 for (int i = 0; i < RANGE; i++) {
1086 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1087 // = 16 (or 12 if UseCompactObjectHeaders=true)
1088 a[i]++;
1089 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1090 // = 16 (or 12 if UseCompactObjectHeaders=true)
1091 b[i]++;
1092 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1093 // If UseCompactObjectHeaders=false:
1094 // a: 0, 8, 16, 24, 32, ...
1095 // b: 0, 2, 4, 6, 8, ...
1096 // -> Ok, aligns every 8th iteration.
1097 // If UseCompactObjectHeaders=true:
1098 // a: 4, 12, 20, 28, 36, ...
1099 // b: 1, 3, 5, 7, 9, ...
1100 // -> we can never align both vectors!
1101 }
1102 return new Object[]{ a, b };
1103 }
1104
1105 @Test
1106 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1107 IRNode.LOAD_VECTOR_S, "> 0",
1108 IRNode.ADD_VI, "> 0",
1109 IRNode.ADD_VS, "> 0",
1110 IRNode.STORE_VECTOR, "> 0"},
1111 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1112 applyIfPlatform = {"64-bit", "true"},
1113 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1114 static Object[] test13aIS(int[] a, short[] b) {
1115 for (int i = 0; i < RANGE; i++) {
1116 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter
1117 // = 16 (or 12 if UseCompactObjectHeaders=true)
1118 a[i]++;
1119 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1120 // = 16 (or 12 if UseCompactObjectHeaders=true)
1121 b[i]++;
1122 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1123 // If UseCompactObjectHeaders=false:
1124 // a: iter % 2 == 0
1125 // b: iter % 4 == 0
1126 // -> Ok, aligns every 4th iteration.
1127 // If UseCompactObjectHeaders=true:
1128 // a: iter % 2 = 1
1129 // b: iter % 4 = 2
1130 // -> we can never align both vectors!
1131 }
1132 return new Object[]{ a, b };
1133 }
1134
1135 @Test
1136 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1137 IRNode.LOAD_VECTOR_S, "> 0",
1138 IRNode.LOAD_VECTOR_I, "> 0",
1139 IRNode.LOAD_VECTOR_L, "> 0",
1140 IRNode.ADD_VB, "> 0",
1141 IRNode.ADD_VS, "> 0",
1142 IRNode.ADD_VI, "> 0",
1143 IRNode.ADD_VL, "> 0",
1144 IRNode.STORE_VECTOR, "> 0"},
1145 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1146 applyIfPlatform = {"64-bit", "true"},
1147 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1148 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1149 for (int i = 0; i < RANGE; i++) {
1150 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1151 // = 16 (or 12 if UseCompactObjectHeaders=true)
1152 a[i]++;
1153 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1154 // = 16 (or 12 if UseCompactObjectHeaders=true)
1155 b[i]++;
1156 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1157 // = 16 (or 12 if UseCompactObjectHeaders=true)
1158 c[i]++;
1159 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter
1160 // = 16 (always)
1161 d[i]++;
1162 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1163 // a: iter % 8 = 4
1164 // c: iter % 2 = 1
1165 // -> can never align both vectors!
1166 }
1167 return new Object[]{ a, b, c, d };
1168 }
1169
1170 @Test
1171 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1172 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1173 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1174 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1175 IRNode.STORE_VECTOR, "> 0"},
1176 applyIfPlatform = {"64-bit", "true"},
1177 applyIfCPUFeatureOr = {"avx2", "true"})
1178 // require avx to ensure vectors are larger than what unrolling produces
1179 static Object[] test13bIL(int[] a, long[] b) {
1180 for (int i = 1; i < RANGE; i++) {
1181 a[i]++;
1182 b[i]++;
1183 }
1184 return new Object[]{ a, b };
1185 }
1186
1187 @Test
1188 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1189 IRNode.LOAD_VECTOR_I, "> 0",
1190 IRNode.ADD_VB, "> 0",
1191 IRNode.ADD_VI, "> 0",
1192 IRNode.STORE_VECTOR, "> 0"},
1193 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1194 applyIfPlatform = {"64-bit", "true"},
1195 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1196 static Object[] test13bIB(int[] a, byte[] b) {
1197 for (int i = 1; i < RANGE; i++) {
1198 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1199 // = 16 (or 12 if UseCompactObjectHeaders=true)
1200 a[i]++;
1201 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1202 // = 16 (or 12 if UseCompactObjectHeaders=true)
1203 b[i]++;
1204 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1205 // a: iter % 2 = 0
1206 // b: iter % 8 = 3
1207 // -> can never align both vectors!
1208 }
1209 return new Object[]{ a, b };
1210 }
1211
1212 @Test
1213 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1214 IRNode.LOAD_VECTOR_S, "> 0",
1215 IRNode.ADD_VI, "> 0",
1216 IRNode.ADD_VS, "> 0",
1217 IRNode.STORE_VECTOR, "> 0"},
1218 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1219 applyIfPlatform = {"64-bit", "true"},
1220 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1221 static Object[] test13bIS(int[] a, short[] b) {
1222 for (int i = 1; i < RANGE; i++) {
1223 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1224 // = 16 (or 12 if UseCompactObjectHeaders=true)
1225 a[i]++;
1226 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1227 // = 16 (or 12 if UseCompactObjectHeaders=true)
1228 b[i]++;
1229 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1230 // a: iter % 2 = 0
1231 // b: iter % 4 = 1
1232 // -> can never align both vectors!
1233 }
1234 return new Object[]{ a, b };
1235 }
1236
1237 @Test
1238 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1239 IRNode.LOAD_VECTOR_S, "> 0",
1240 IRNode.LOAD_VECTOR_I, "> 0",
1241 IRNode.LOAD_VECTOR_L, "> 0",
1242 IRNode.ADD_VB, "> 0",
1243 IRNode.ADD_VS, "> 0",
1244 IRNode.ADD_VI, "> 0",
1245 IRNode.ADD_VL, "> 0",
1246 IRNode.STORE_VECTOR, "> 0"},
1247 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1248 applyIfPlatform = {"64-bit", "true"},
1249 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1250 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1251 for (int i = 1; i < RANGE; i++) {
1252 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1253 // = 16 (or 12 if UseCompactObjectHeaders=true)
1254 a[i]++;
1255 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1256 // = 16 (or 12 if UseCompactObjectHeaders=true)
1257 b[i]++;
1258 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1259 // = 16 (or 12 if UseCompactObjectHeaders=true)
1260 c[i]++;
1261 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter
1262 // = 16 (always)
1263 d[i]++;
1264 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1265 // a: iter % 8 = 3
1266 // c: iter % 2 = 0
1267 // -> can never align both vectors!
1268 }
1269 return new Object[]{ a, b, c, d };
1270 }
1271
1272 @Test
1273 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1274 IRNode.ADD_VB, "= 0",
1275 IRNode.STORE_VECTOR, "= 0"},
1276 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1277 applyIfPlatform = {"64-bit", "true"},
1278 applyIf = {"AlignVector", "false"})
1279 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1280 IRNode.ADD_VB, "= 0",
1281 IRNode.STORE_VECTOR, "= 0"},
1282 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1283 applyIfPlatform = {"64-bit", "true"},
1284 applyIf = {"AlignVector", "true"})
1285 static Object[] test14aB(byte[] a) {
1286 // non-power-of-2 stride
1287 for (int i = 0; i < RANGE-20; i+=9) {
|
120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); });
141 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
142 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
143 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
144 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
145 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
146 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
147 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
149 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
150
151 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
152 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
153 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
154 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
155
156 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
157 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
158 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
159 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
160
161 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
162 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
163 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
164 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
165
166 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
167 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
168 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
169 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
170
171 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
172 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
173 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
174 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
204 tests.put("test17c", () -> { return test17c(aL.clone()); });
205 tests.put("test17d", () -> { return test17d(aL.clone()); });
206
207 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
208 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
209
210 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
211 tests.put("test20", () -> { return test20(aB.clone()); });
212
213 // Compute gold value for all test methods before compilation
214 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
215 String name = entry.getKey();
216 TestFunction test = entry.getValue();
217 Object[] gold = test.run();
218 golds.put(name, gold);
219 }
220 }
221
222 @Warmup(100)
223 @Run(test = {"test0",
224 "test1",
225 "test2",
226 "test3",
227 "test4",
228 "test5",
229 "test6",
230 "test7",
231 "test8",
232 "test9",
233 "test10a",
234 "test10b",
235 "test10c",
236 "test10d",
237 "test11aB",
238 "test11aS",
239 "test11aI",
240 "test11aL",
241 "test11bB",
242 "test11bS",
243 "test11bI",
244 "test11bL",
245 "test11cB",
246 "test11cS",
247 "test11cI",
248 "test11cL",
249 "test11dB",
250 "test11dS",
251 "test11dI",
252 "test11dL",
253 "test12",
254 "test13aIL",
255 "test13aIB",
256 "test13aIS",
407 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
408 IRNode.STORE_VECTOR, "> 0"},
409 applyIf = {"MaxVectorSize", ">=8"},
410 applyIfPlatform = {"64-bit", "true"},
411 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
412 static Object[] test0(byte[] a, byte[] b, byte mask) {
413 for (int i = 0; i < RANGE; i+=8) {
414 // Safe to vectorize with AlignVector
415 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
416 b[i+1] = (byte)(a[i+1] & mask);
417 b[i+2] = (byte)(a[i+2] & mask);
418 b[i+3] = (byte)(a[i+3] & mask);
419 }
420 return new Object[]{ a, b };
421 }
422
423 @Test
424 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
425 IRNode.AND_VB, "> 0",
426 IRNode.STORE_VECTOR, "> 0"},
427 applyIfPlatform = {"64-bit", "true"},
428 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
429 static Object[] test1(byte[] a, byte[] b, byte mask) {
430 for (int i = 0; i < RANGE; i+=8) {
431 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
432 b[i+1] = (byte)(a[i+1] & mask);
433 b[i+2] = (byte)(a[i+2] & mask);
434 b[i+3] = (byte)(a[i+3] & mask);
435 b[i+4] = (byte)(a[i+4] & mask);
436 b[i+5] = (byte)(a[i+5] & mask);
437 b[i+6] = (byte)(a[i+6] & mask);
438 b[i+7] = (byte)(a[i+7] & mask);
439 }
440 return new Object[]{ a, b };
441 }
442
443 @Test
444 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
445 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
446 IRNode.STORE_VECTOR, "> 0"},
447 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
448 applyIfPlatform = {"64-bit", "true"},
449 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
450 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
451 IRNode.AND_VB, "= 0",
452 IRNode.STORE_VECTOR, "= 0"},
453 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
454 applyIfPlatform = {"64-bit", "true"},
455 applyIf = {"AlignVector", "true"})
456 static Object[] test2(byte[] a, byte[] b, byte mask) {
457 for (int i = 0; i < RANGE; i+=8) {
458 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
459 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
460 b[i+4] = (byte)(a[i+4] & mask);
461 b[i+5] = (byte)(a[i+5] & mask);
462 b[i+6] = (byte)(a[i+6] & mask);
714 IRNode.STORE_VECTOR, "= 0"},
715 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
716 applyIfPlatform = {"64-bit", "true"},
717 applyIf = {"AlignVector", "true"})
718 static Object[] test10c(short[] a, short[] b, short mask) {
719 // This is not alignable with pre-loop, because of odd init.
720 // Seems not correctly handled with MaxVectorSize >= 32.
721 for (int i = 13; i < RANGE-8; i+=8) {
722 b[i+0] = (short)(a[i+0] & mask);
723 b[i+1] = (short)(a[i+1] & mask);
724 b[i+2] = (short)(a[i+2] & mask);
725 b[i+3] = (short)(a[i+3] & mask);
726 }
727 return new Object[]{ a, b };
728 }
729
730 @Test
731 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
732 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
733 IRNode.STORE_VECTOR, "> 0"},
734 applyIf = {"MaxVectorSize", ">=16"},
735 applyIfPlatform = {"64-bit", "true"},
736 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
737 static Object[] test10d(short[] a, short[] b, short mask) {
738 for (int i = 13; i < RANGE-16; i+=8) {
739 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
740 b[i+0+3] = (short)(a[i+0+3] & mask);
741 b[i+1+3] = (short)(a[i+1+3] & mask);
742 b[i+2+3] = (short)(a[i+2+3] & mask);
743 b[i+3+3] = (short)(a[i+3+3] & mask);
744 }
745 return new Object[]{ a, b };
746 }
747
748 @Test
749 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
750 IRNode.AND_VB, "> 0",
751 IRNode.STORE_VECTOR, "> 0"},
752 applyIfPlatform = {"64-bit", "true"},
753 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
754 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
755 for (int i = 0; i < RANGE; i++) {
756 // always alignable
757 b[i+0] = (byte)(a[i+0] & mask);
758 }
759 return new Object[]{ a, b };
760 }
761
762 @Test
763 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
764 IRNode.AND_VS, "> 0",
765 IRNode.STORE_VECTOR, "> 0"},
766 applyIfPlatform = {"64-bit", "true"},
767 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
1009 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1010 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1011 IRNode.STORE_VECTOR, "> 0"},
1012 applyIfPlatform = {"64-bit", "true"},
1013 applyIfCPUFeatureOr = {"avx2", "true"})
1014 // require avx to ensure vectors are larger than what unrolling produces
1015 static Object[] test13aIL(int[] a, long[] b) {
1016 for (int i = 0; i < RANGE; i++) {
1017 a[i]++;
1018 b[i]++;
1019 }
1020 return new Object[]{ a, b };
1021 }
1022
1023 @Test
1024 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1025 IRNode.LOAD_VECTOR_I, "> 0",
1026 IRNode.ADD_VB, "> 0",
1027 IRNode.ADD_VI, "> 0",
1028 IRNode.STORE_VECTOR, "> 0"},
1029 applyIfPlatform = {"64-bit", "true"},
1030 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1031 static Object[] test13aIB(int[] a, byte[] b) {
1032 for (int i = 0; i < RANGE; i++) {
1033 a[i]++;
1034 b[i]++;
1035 }
1036 return new Object[]{ a, b };
1037 }
1038
1039 @Test
1040 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1041 IRNode.LOAD_VECTOR_S, "> 0",
1042 IRNode.ADD_VI, "> 0",
1043 IRNode.ADD_VS, "> 0",
1044 IRNode.STORE_VECTOR, "> 0"},
1045 applyIfPlatform = {"64-bit", "true"},
1046 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1047 static Object[] test13aIS(int[] a, short[] b) {
1048 for (int i = 0; i < RANGE; i++) {
1049 a[i]++;
1050 b[i]++;
1051 }
1052 return new Object[]{ a, b };
1053 }
1054
1055 @Test
1056 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1057 IRNode.LOAD_VECTOR_S, "> 0",
1058 IRNode.LOAD_VECTOR_I, "> 0",
1059 IRNode.LOAD_VECTOR_L, "> 0",
1060 IRNode.ADD_VB, "> 0",
1061 IRNode.ADD_VS, "> 0",
1062 IRNode.ADD_VI, "> 0",
1063 IRNode.ADD_VL, "> 0",
1064 IRNode.STORE_VECTOR, "> 0"},
1065 applyIfPlatform = {"64-bit", "true"},
1066 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1067 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1068 for (int i = 0; i < RANGE; i++) {
1069 a[i]++;
1070 b[i]++;
1071 c[i]++;
1072 d[i]++;
1073 }
1074 return new Object[]{ a, b, c, d };
1075 }
1076
1077 @Test
1078 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1079 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1080 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1081 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1082 IRNode.STORE_VECTOR, "> 0"},
1083 applyIfPlatform = {"64-bit", "true"},
1084 applyIfCPUFeatureOr = {"avx2", "true"})
1085 // require avx to ensure vectors are larger than what unrolling produces
1086 static Object[] test13bIL(int[] a, long[] b) {
1087 for (int i = 1; i < RANGE; i++) {
1088 a[i]++;
1089 b[i]++;
1090 }
1091 return new Object[]{ a, b };
1092 }
1093
1094 @Test
1095 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1096 IRNode.LOAD_VECTOR_I, "> 0",
1097 IRNode.ADD_VB, "> 0",
1098 IRNode.ADD_VI, "> 0",
1099 IRNode.STORE_VECTOR, "> 0"},
1100 applyIfPlatform = {"64-bit", "true"},
1101 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1102 static Object[] test13bIB(int[] a, byte[] b) {
1103 for (int i = 1; i < RANGE; i++) {
1104 a[i]++;
1105 b[i]++;
1106 }
1107 return new Object[]{ a, b };
1108 }
1109
1110 @Test
1111 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1112 IRNode.LOAD_VECTOR_S, "> 0",
1113 IRNode.ADD_VI, "> 0",
1114 IRNode.ADD_VS, "> 0",
1115 IRNode.STORE_VECTOR, "> 0"},
1116 applyIfPlatform = {"64-bit", "true"},
1117 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1118 static Object[] test13bIS(int[] a, short[] b) {
1119 for (int i = 1; i < RANGE; i++) {
1120 a[i]++;
1121 b[i]++;
1122 }
1123 return new Object[]{ a, b };
1124 }
1125
1126 @Test
1127 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1128 IRNode.LOAD_VECTOR_S, "> 0",
1129 IRNode.LOAD_VECTOR_I, "> 0",
1130 IRNode.LOAD_VECTOR_L, "> 0",
1131 IRNode.ADD_VB, "> 0",
1132 IRNode.ADD_VS, "> 0",
1133 IRNode.ADD_VI, "> 0",
1134 IRNode.ADD_VL, "> 0",
1135 IRNode.STORE_VECTOR, "> 0"},
1136 applyIfPlatform = {"64-bit", "true"},
1137 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1138 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1139 for (int i = 1; i < RANGE; i++) {
1140 a[i]++;
1141 b[i]++;
1142 c[i]++;
1143 d[i]++;
1144 }
1145 return new Object[]{ a, b, c, d };
1146 }
1147
1148 @Test
1149 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1150 IRNode.ADD_VB, "= 0",
1151 IRNode.STORE_VECTOR, "= 0"},
1152 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1153 applyIfPlatform = {"64-bit", "true"},
1154 applyIf = {"AlignVector", "false"})
1155 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1156 IRNode.ADD_VB, "= 0",
1157 IRNode.STORE_VECTOR, "= 0"},
1158 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1159 applyIfPlatform = {"64-bit", "true"},
1160 applyIf = {"AlignVector", "true"})
1161 static Object[] test14aB(byte[] a) {
1162 // non-power-of-2 stride
1163 for (int i = 0; i < RANGE-20; i+=9) {
|