120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); });
141 tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); });
142 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
143 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
144 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
145 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
146 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
147 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
150 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
151
152 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
153 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
154 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
155 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
156 tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); });
157
158 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
159 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
160 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
161 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
162
163 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
164 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
165 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
166 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
167
168 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
169 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
170 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
171 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
172
173 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
174 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
175 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
176 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
206 tests.put("test17c", () -> { return test17c(aL.clone()); });
207 tests.put("test17d", () -> { return test17d(aL.clone()); });
208
209 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
210 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
211
212 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
213 tests.put("test20", () -> { return test20(aB.clone()); });
214
215 // Compute gold value for all test methods before compilation
216 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
217 String name = entry.getKey();
218 TestFunction test = entry.getValue();
219 Object[] gold = test.run();
220 golds.put(name, gold);
221 }
222 }
223
224 @Warmup(100)
225 @Run(test = {"test0",
226 "test1a",
227 "test1b",
228 "test2",
229 "test3",
230 "test4",
231 "test5",
232 "test6",
233 "test7",
234 "test8",
235 "test9",
236 "test10a",
237 "test10b",
238 "test10c",
239 "test10d",
240 "test10e",
241 "test11aB",
242 "test11aS",
243 "test11aI",
244 "test11aL",
245 "test11bB",
246 "test11bS",
247 "test11bI",
248 "test11bL",
249 "test11cB",
250 "test11cS",
251 "test11cI",
252 "test11cL",
253 "test11dB",
254 "test11dS",
255 "test11dI",
256 "test11dL",
257 "test12",
258 "test13aIL",
259 "test13aIB",
260 "test13aIS",
411 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
412 IRNode.STORE_VECTOR, "> 0"},
413 applyIf = {"MaxVectorSize", ">=8"},
414 applyIfPlatform = {"64-bit", "true"},
415 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
416 static Object[] test0(byte[] a, byte[] b, byte mask) {
417 for (int i = 0; i < RANGE; i+=8) {
418 // Safe to vectorize with AlignVector
419 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
420 b[i+1] = (byte)(a[i+1] & mask);
421 b[i+2] = (byte)(a[i+2] & mask);
422 b[i+3] = (byte)(a[i+3] & mask);
423 }
424 return new Object[]{ a, b };
425 }
426
427 @Test
428 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
429 IRNode.AND_VB, "> 0",
430 IRNode.STORE_VECTOR, "> 0"},
431 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
432 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
433 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
434 applyIfPlatform = {"64-bit", "true"},
435 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
436 static Object[] test1a(byte[] a, byte[] b, byte mask) {
437 for (int i = 0; i < RANGE; i+=8) {
438 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
439 b[i+1] = (byte)(a[i+1] & mask);
440 b[i+2] = (byte)(a[i+2] & mask);
441 b[i+3] = (byte)(a[i+3] & mask);
442 b[i+4] = (byte)(a[i+4] & mask);
443 b[i+5] = (byte)(a[i+5] & mask);
444 b[i+6] = (byte)(a[i+6] & mask);
445 b[i+7] = (byte)(a[i+7] & mask);
446 }
447 return new Object[]{ a, b };
448 }
449
450 @Test
451 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
452 IRNode.AND_VB, "> 0",
453 IRNode.STORE_VECTOR, "> 0"},
454 applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"},
455 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
456 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
457 applyIfPlatform = {"64-bit", "true"},
458 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
459 static Object[] test1b(byte[] a, byte[] b, byte mask) {
460 for (int i = 4; i < RANGE-8; i+=8) {
461 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8
462 b[i+1] = (byte)(a[i+1] & mask);
463 b[i+2] = (byte)(a[i+2] & mask);
464 b[i+3] = (byte)(a[i+3] & mask);
465 b[i+4] = (byte)(a[i+4] & mask);
466 b[i+5] = (byte)(a[i+5] & mask);
467 b[i+6] = (byte)(a[i+6] & mask);
468 b[i+7] = (byte)(a[i+7] & mask);
469 }
470 return new Object[]{ a, b };
471 }
472
473 @Test
474 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
475 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
476 IRNode.STORE_VECTOR, "> 0"},
477 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
478 applyIfPlatform = {"64-bit", "true"},
479 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
480 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
481 IRNode.AND_VB, "= 0",
482 IRNode.STORE_VECTOR, "= 0"},
483 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
484 applyIfPlatform = {"64-bit", "true"},
485 applyIf = {"AlignVector", "true"})
486 static Object[] test2(byte[] a, byte[] b, byte mask) {
487 for (int i = 0; i < RANGE; i+=8) {
488 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
489 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
490 b[i+4] = (byte)(a[i+4] & mask);
491 b[i+5] = (byte)(a[i+5] & mask);
492 b[i+6] = (byte)(a[i+6] & mask);
744 IRNode.STORE_VECTOR, "= 0"},
745 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
746 applyIfPlatform = {"64-bit", "true"},
747 applyIf = {"AlignVector", "true"})
748 static Object[] test10c(short[] a, short[] b, short mask) {
749 // This is not alignable with pre-loop, because of odd init.
750 // Seems not correctly handled with MaxVectorSize >= 32.
751 for (int i = 13; i < RANGE-8; i+=8) {
752 b[i+0] = (short)(a[i+0] & mask);
753 b[i+1] = (short)(a[i+1] & mask);
754 b[i+2] = (short)(a[i+2] & mask);
755 b[i+3] = (short)(a[i+3] & mask);
756 }
757 return new Object[]{ a, b };
758 }
759
760 @Test
761 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
762 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
763 IRNode.STORE_VECTOR, "> 0"},
764 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"},
765 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
766 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
767 applyIfPlatform = {"64-bit", "true"},
768 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
769 static Object[] test10d(short[] a, short[] b, short mask) {
770 for (int i = 13; i < RANGE-16; i+=8) {
771 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
772 b[i+0+3] = (short)(a[i+0+3] & mask);
773 b[i+1+3] = (short)(a[i+1+3] & mask);
774 b[i+2+3] = (short)(a[i+2+3] & mask);
775 b[i+3+3] = (short)(a[i+3+3] & mask);
776 }
777 return new Object[]{ a, b };
778 }
779
780 @Test
781 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
782 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
783 IRNode.STORE_VECTOR, "> 0"},
784 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"},
785 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
786 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
787 applyIfPlatform = {"64-bit", "true"},
788 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
789 static Object[] test10e(short[] a, short[] b, short mask) {
790 for (int i = 11; i < RANGE-16; i+=8) {
791 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16
792 b[i+0+3] = (short)(a[i+0+3] & mask);
793 b[i+1+3] = (short)(a[i+1+3] & mask);
794 b[i+2+3] = (short)(a[i+2+3] & mask);
795 b[i+3+3] = (short)(a[i+3+3] & mask);
796 }
797 return new Object[]{ a, b };
798 }
799
800 @Test
801 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
802 IRNode.AND_VB, "> 0",
803 IRNode.STORE_VECTOR, "> 0"},
804 applyIfPlatform = {"64-bit", "true"},
805 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
806 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
807 for (int i = 0; i < RANGE; i++) {
808 // always alignable
809 b[i+0] = (byte)(a[i+0] & mask);
810 }
811 return new Object[]{ a, b };
812 }
813
814 @Test
815 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
816 IRNode.AND_VS, "> 0",
817 IRNode.STORE_VECTOR, "> 0"},
818 applyIfPlatform = {"64-bit", "true"},
819 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
1062 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1063 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1064 IRNode.STORE_VECTOR, "> 0"},
1065 applyIfPlatform = {"64-bit", "true"},
1066 applyIfCPUFeatureOr = {"avx2", "true"})
1067 // require avx to ensure vectors are larger than what unrolling produces
1068 static Object[] test13aIL(int[] a, long[] b) {
1069 for (int i = 0; i < RANGE; i++) {
1070 a[i]++;
1071 b[i]++;
1072 }
1073 return new Object[]{ a, b };
1074 }
1075
1076 @Test
1077 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1078 IRNode.LOAD_VECTOR_I, "> 0",
1079 IRNode.ADD_VB, "> 0",
1080 IRNode.ADD_VI, "> 0",
1081 IRNode.STORE_VECTOR, "> 0"},
1082 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1083 applyIfPlatform = {"64-bit", "true"},
1084 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1085 static Object[] test13aIB(int[] a, byte[] b) {
1086 for (int i = 0; i < RANGE; i++) {
1087 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1088 // = 16 (or 12 if UseCompactObjectHeaders=true)
1089 a[i]++;
1090 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1091 // = 16 (or 12 if UseCompactObjectHeaders=true)
1092 b[i]++;
1093 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1094 // If UseCompactObjectHeaders=false:
1095 // a: 0, 8, 16, 24, 32, ...
1096 // b: 0, 2, 4, 6, 8, ...
1097 // -> Ok, aligns every 8th iteration.
1098 // If UseCompactObjectHeaders=true:
1099 // a: 4, 12, 20, 28, 36, ...
1100 // b: 1, 3, 5, 7, 9, ...
1101 // -> we can never align both vectors!
1102 }
1103 return new Object[]{ a, b };
1104 }
1105
1106 @Test
1107 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1108 IRNode.LOAD_VECTOR_S, "> 0",
1109 IRNode.ADD_VI, "> 0",
1110 IRNode.ADD_VS, "> 0",
1111 IRNode.STORE_VECTOR, "> 0"},
1112 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1113 applyIfPlatform = {"64-bit", "true"},
1114 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1115 static Object[] test13aIS(int[] a, short[] b) {
1116 for (int i = 0; i < RANGE; i++) {
1117 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter
1118 // = 16 (or 12 if UseCompactObjectHeaders=true)
1119 a[i]++;
1120 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1121 // = 16 (or 12 if UseCompactObjectHeaders=true)
1122 b[i]++;
1123 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1124 // If UseCompactObjectHeaders=false:
1125 // a: iter % 2 == 0
1126 // b: iter % 4 == 0
1127 // -> Ok, aligns every 4th iteration.
1128 // If UseCompactObjectHeaders=true:
1129 // a: iter % 2 = 1
1130 // b: iter % 4 = 2
1131 // -> we can never align both vectors!
1132 }
1133 return new Object[]{ a, b };
1134 }
1135
1136 @Test
1137 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1138 IRNode.LOAD_VECTOR_S, "> 0",
1139 IRNode.LOAD_VECTOR_I, "> 0",
1140 IRNode.LOAD_VECTOR_L, "> 0",
1141 IRNode.ADD_VB, "> 0",
1142 IRNode.ADD_VS, "> 0",
1143 IRNode.ADD_VI, "> 0",
1144 IRNode.ADD_VL, "> 0",
1145 IRNode.STORE_VECTOR, "> 0"},
1146 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1147 applyIfPlatform = {"64-bit", "true"},
1148 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1149 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1150 for (int i = 0; i < RANGE; i++) {
1151 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1152 // = 16 (or 12 if UseCompactObjectHeaders=true)
1153 a[i]++;
1154 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1155 // = 16 (or 12 if UseCompactObjectHeaders=true)
1156 b[i]++;
1157 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1158 // = 16 (or 12 if UseCompactObjectHeaders=true)
1159 c[i]++;
1160 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter
1161 // = 16 (always)
1162 d[i]++;
1163 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1164 // a: iter % 8 = 4
1165 // c: iter % 2 = 1
1166 // -> can never align both vectors!
1167 }
1168 return new Object[]{ a, b, c, d };
1169 }
1170
1171 @Test
1172 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1173 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1174 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1175 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1176 IRNode.STORE_VECTOR, "> 0"},
1177 applyIfPlatform = {"64-bit", "true"},
1178 applyIfCPUFeatureOr = {"avx2", "true"})
1179 // require avx to ensure vectors are larger than what unrolling produces
1180 static Object[] test13bIL(int[] a, long[] b) {
1181 for (int i = 1; i < RANGE; i++) {
1182 a[i]++;
1183 b[i]++;
1184 }
1185 return new Object[]{ a, b };
1186 }
1187
1188 @Test
1189 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1190 IRNode.LOAD_VECTOR_I, "> 0",
1191 IRNode.ADD_VB, "> 0",
1192 IRNode.ADD_VI, "> 0",
1193 IRNode.STORE_VECTOR, "> 0"},
1194 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1195 applyIfPlatform = {"64-bit", "true"},
1196 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1197 static Object[] test13bIB(int[] a, byte[] b) {
1198 for (int i = 1; i < RANGE; i++) {
1199 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1200 // = 16 (or 12 if UseCompactObjectHeaders=true)
1201 a[i]++;
1202 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1203 // = 16 (or 12 if UseCompactObjectHeaders=true)
1204 b[i]++;
1205 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1206 // a: iter % 2 = 0
1207 // b: iter % 8 = 3
1208 // -> can never align both vectors!
1209 }
1210 return new Object[]{ a, b };
1211 }
1212
1213 @Test
1214 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1215 IRNode.LOAD_VECTOR_S, "> 0",
1216 IRNode.ADD_VI, "> 0",
1217 IRNode.ADD_VS, "> 0",
1218 IRNode.STORE_VECTOR, "> 0"},
1219 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1220 applyIfPlatform = {"64-bit", "true"},
1221 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1222 static Object[] test13bIS(int[] a, short[] b) {
1223 for (int i = 1; i < RANGE; i++) {
1224 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1225 // = 16 (or 12 if UseCompactObjectHeaders=true)
1226 a[i]++;
1227 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1228 // = 16 (or 12 if UseCompactObjectHeaders=true)
1229 b[i]++;
1230 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1231 // a: iter % 2 = 0
1232 // b: iter % 4 = 1
1233 // -> can never align both vectors!
1234 }
1235 return new Object[]{ a, b };
1236 }
1237
1238 @Test
1239 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1240 IRNode.LOAD_VECTOR_S, "> 0",
1241 IRNode.LOAD_VECTOR_I, "> 0",
1242 IRNode.LOAD_VECTOR_L, "> 0",
1243 IRNode.ADD_VB, "> 0",
1244 IRNode.ADD_VS, "> 0",
1245 IRNode.ADD_VI, "> 0",
1246 IRNode.ADD_VL, "> 0",
1247 IRNode.STORE_VECTOR, "> 0"},
1248 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1249 applyIfPlatform = {"64-bit", "true"},
1250 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1251 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1252 for (int i = 1; i < RANGE; i++) {
1253 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1254 // = 16 (or 12 if UseCompactObjectHeaders=true)
1255 a[i]++;
1256 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1257 // = 16 (or 12 if UseCompactObjectHeaders=true)
1258 b[i]++;
1259 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1260 // = 16 (or 12 if UseCompactObjectHeaders=true)
1261 c[i]++;
1262 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter
1263 // = 16 (always)
1264 d[i]++;
1265 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1266 // a: iter % 8 = 3
1267 // c: iter % 2 = 0
1268 // -> can never align both vectors!
1269 }
1270 return new Object[]{ a, b, c, d };
1271 }
1272
1273 @Test
1274 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1275 IRNode.ADD_VB, "= 0",
1276 IRNode.STORE_VECTOR, "= 0"},
1277 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1278 applyIfPlatform = {"64-bit", "true"},
1279 applyIf = {"AlignVector", "false"})
1280 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1281 IRNode.ADD_VB, "= 0",
1282 IRNode.STORE_VECTOR, "= 0"},
1283 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1284 applyIfPlatform = {"64-bit", "true"},
1285 applyIf = {"AlignVector", "true"})
1286 static Object[] test14aB(byte[] a) {
1287 // non-power-of-2 stride
1288 for (int i = 0; i < RANGE-20; i+=9) {
|
120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); });
141 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
142 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
143 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
144 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
145 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
146 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
147 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
149 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
150
151 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
152 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
153 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
154 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
155
156 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
157 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
158 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
159 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
160
161 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
162 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
163 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
164 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
165
166 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
167 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
168 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
169 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
170
171 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
172 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
173 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
174 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
204 tests.put("test17c", () -> { return test17c(aL.clone()); });
205 tests.put("test17d", () -> { return test17d(aL.clone()); });
206
207 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
208 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
209
210 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
211 tests.put("test20", () -> { return test20(aB.clone()); });
212
213 // Compute gold value for all test methods before compilation
214 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
215 String name = entry.getKey();
216 TestFunction test = entry.getValue();
217 Object[] gold = test.run();
218 golds.put(name, gold);
219 }
220 }
221
222 @Warmup(100)
223 @Run(test = {"test0",
224 "test1",
225 "test2",
226 "test3",
227 "test4",
228 "test5",
229 "test6",
230 "test7",
231 "test8",
232 "test9",
233 "test10a",
234 "test10b",
235 "test10c",
236 "test10d",
237 "test11aB",
238 "test11aS",
239 "test11aI",
240 "test11aL",
241 "test11bB",
242 "test11bS",
243 "test11bI",
244 "test11bL",
245 "test11cB",
246 "test11cS",
247 "test11cI",
248 "test11cL",
249 "test11dB",
250 "test11dS",
251 "test11dI",
252 "test11dL",
253 "test12",
254 "test13aIL",
255 "test13aIB",
256 "test13aIS",
407 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
408 IRNode.STORE_VECTOR, "> 0"},
409 applyIf = {"MaxVectorSize", ">=8"},
410 applyIfPlatform = {"64-bit", "true"},
411 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
412 static Object[] test0(byte[] a, byte[] b, byte mask) {
413 for (int i = 0; i < RANGE; i+=8) {
414 // Safe to vectorize with AlignVector
415 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
416 b[i+1] = (byte)(a[i+1] & mask);
417 b[i+2] = (byte)(a[i+2] & mask);
418 b[i+3] = (byte)(a[i+3] & mask);
419 }
420 return new Object[]{ a, b };
421 }
422
423 @Test
424 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
425 IRNode.AND_VB, "> 0",
426 IRNode.STORE_VECTOR, "> 0"},
427 applyIfPlatform = {"64-bit", "true"},
428 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
429 static Object[] test1(byte[] a, byte[] b, byte mask) {
430 for (int i = 0; i < RANGE; i+=8) {
431 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
432 b[i+1] = (byte)(a[i+1] & mask);
433 b[i+2] = (byte)(a[i+2] & mask);
434 b[i+3] = (byte)(a[i+3] & mask);
435 b[i+4] = (byte)(a[i+4] & mask);
436 b[i+5] = (byte)(a[i+5] & mask);
437 b[i+6] = (byte)(a[i+6] & mask);
438 b[i+7] = (byte)(a[i+7] & mask);
439 }
440 return new Object[]{ a, b };
441 }
442
443 @Test
444 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
445 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
446 IRNode.STORE_VECTOR, "> 0"},
447 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
448 applyIfPlatform = {"64-bit", "true"},
449 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
450 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
451 IRNode.AND_VB, "= 0",
452 IRNode.STORE_VECTOR, "= 0"},
453 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
454 applyIfPlatform = {"64-bit", "true"},
455 applyIf = {"AlignVector", "true"})
456 static Object[] test2(byte[] a, byte[] b, byte mask) {
457 for (int i = 0; i < RANGE; i+=8) {
458 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
459 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
460 b[i+4] = (byte)(a[i+4] & mask);
461 b[i+5] = (byte)(a[i+5] & mask);
462 b[i+6] = (byte)(a[i+6] & mask);
714 IRNode.STORE_VECTOR, "= 0"},
715 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
716 applyIfPlatform = {"64-bit", "true"},
717 applyIf = {"AlignVector", "true"})
718 static Object[] test10c(short[] a, short[] b, short mask) {
719 // This is not alignable with pre-loop, because of odd init.
720 // Seems not correctly handled with MaxVectorSize >= 32.
721 for (int i = 13; i < RANGE-8; i+=8) {
722 b[i+0] = (short)(a[i+0] & mask);
723 b[i+1] = (short)(a[i+1] & mask);
724 b[i+2] = (short)(a[i+2] & mask);
725 b[i+3] = (short)(a[i+3] & mask);
726 }
727 return new Object[]{ a, b };
728 }
729
730 @Test
731 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
732 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
733 IRNode.STORE_VECTOR, "> 0"},
734 applyIf = {"MaxVectorSize", ">=16"},
735 applyIfPlatform = {"64-bit", "true"},
736 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
737 static Object[] test10d(short[] a, short[] b, short mask) {
738 for (int i = 13; i < RANGE-16; i+=8) {
739 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
740 b[i+0+3] = (short)(a[i+0+3] & mask);
741 b[i+1+3] = (short)(a[i+1+3] & mask);
742 b[i+2+3] = (short)(a[i+2+3] & mask);
743 b[i+3+3] = (short)(a[i+3+3] & mask);
744 }
745 return new Object[]{ a, b };
746 }
747
748 @Test
749 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
750 IRNode.AND_VB, "> 0",
751 IRNode.STORE_VECTOR, "> 0"},
752 applyIfPlatform = {"64-bit", "true"},
753 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
754 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
755 for (int i = 0; i < RANGE; i++) {
756 // always alignable
757 b[i+0] = (byte)(a[i+0] & mask);
758 }
759 return new Object[]{ a, b };
760 }
761
762 @Test
763 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
764 IRNode.AND_VS, "> 0",
765 IRNode.STORE_VECTOR, "> 0"},
766 applyIfPlatform = {"64-bit", "true"},
767 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
1010 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1011 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1012 IRNode.STORE_VECTOR, "> 0"},
1013 applyIfPlatform = {"64-bit", "true"},
1014 applyIfCPUFeatureOr = {"avx2", "true"})
1015 // require avx to ensure vectors are larger than what unrolling produces
1016 static Object[] test13aIL(int[] a, long[] b) {
1017 for (int i = 0; i < RANGE; i++) {
1018 a[i]++;
1019 b[i]++;
1020 }
1021 return new Object[]{ a, b };
1022 }
1023
1024 @Test
1025 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1026 IRNode.LOAD_VECTOR_I, "> 0",
1027 IRNode.ADD_VB, "> 0",
1028 IRNode.ADD_VI, "> 0",
1029 IRNode.STORE_VECTOR, "> 0"},
1030 applyIfPlatform = {"64-bit", "true"},
1031 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1032 static Object[] test13aIB(int[] a, byte[] b) {
1033 for (int i = 0; i < RANGE; i++) {
1034 a[i]++;
1035 b[i]++;
1036 }
1037 return new Object[]{ a, b };
1038 }
1039
1040 @Test
1041 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1042 IRNode.LOAD_VECTOR_S, "> 0",
1043 IRNode.ADD_VI, "> 0",
1044 IRNode.ADD_VS, "> 0",
1045 IRNode.STORE_VECTOR, "> 0"},
1046 applyIfPlatform = {"64-bit", "true"},
1047 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1048 static Object[] test13aIS(int[] a, short[] b) {
1049 for (int i = 0; i < RANGE; i++) {
1050 a[i]++;
1051 b[i]++;
1052 }
1053 return new Object[]{ a, b };
1054 }
1055
1056 @Test
1057 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1058 IRNode.LOAD_VECTOR_S, "> 0",
1059 IRNode.LOAD_VECTOR_I, "> 0",
1060 IRNode.LOAD_VECTOR_L, "> 0",
1061 IRNode.ADD_VB, "> 0",
1062 IRNode.ADD_VS, "> 0",
1063 IRNode.ADD_VI, "> 0",
1064 IRNode.ADD_VL, "> 0",
1065 IRNode.STORE_VECTOR, "> 0"},
1066 applyIfPlatform = {"64-bit", "true"},
1067 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1068 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1069 for (int i = 0; i < RANGE; i++) {
1070 a[i]++;
1071 b[i]++;
1072 c[i]++;
1073 d[i]++;
1074 }
1075 return new Object[]{ a, b, c, d };
1076 }
1077
1078 @Test
1079 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1080 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1081 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1082 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1083 IRNode.STORE_VECTOR, "> 0"},
1084 applyIfPlatform = {"64-bit", "true"},
1085 applyIfCPUFeatureOr = {"avx2", "true"})
1086 // require avx to ensure vectors are larger than what unrolling produces
1087 static Object[] test13bIL(int[] a, long[] b) {
1088 for (int i = 1; i < RANGE; i++) {
1089 a[i]++;
1090 b[i]++;
1091 }
1092 return new Object[]{ a, b };
1093 }
1094
1095 @Test
1096 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1097 IRNode.LOAD_VECTOR_I, "> 0",
1098 IRNode.ADD_VB, "> 0",
1099 IRNode.ADD_VI, "> 0",
1100 IRNode.STORE_VECTOR, "> 0"},
1101 applyIfPlatform = {"64-bit", "true"},
1102 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1103 static Object[] test13bIB(int[] a, byte[] b) {
1104 for (int i = 1; i < RANGE; i++) {
1105 a[i]++;
1106 b[i]++;
1107 }
1108 return new Object[]{ a, b };
1109 }
1110
1111 @Test
1112 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1113 IRNode.LOAD_VECTOR_S, "> 0",
1114 IRNode.ADD_VI, "> 0",
1115 IRNode.ADD_VS, "> 0",
1116 IRNode.STORE_VECTOR, "> 0"},
1117 applyIfPlatform = {"64-bit", "true"},
1118 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1119 static Object[] test13bIS(int[] a, short[] b) {
1120 for (int i = 1; i < RANGE; i++) {
1121 a[i]++;
1122 b[i]++;
1123 }
1124 return new Object[]{ a, b };
1125 }
1126
1127 @Test
1128 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1129 IRNode.LOAD_VECTOR_S, "> 0",
1130 IRNode.LOAD_VECTOR_I, "> 0",
1131 IRNode.LOAD_VECTOR_L, "> 0",
1132 IRNode.ADD_VB, "> 0",
1133 IRNode.ADD_VS, "> 0",
1134 IRNode.ADD_VI, "> 0",
1135 IRNode.ADD_VL, "> 0",
1136 IRNode.STORE_VECTOR, "> 0"},
1137 applyIfPlatform = {"64-bit", "true"},
1138 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1139 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1140 for (int i = 1; i < RANGE; i++) {
1141 a[i]++;
1142 b[i]++;
1143 c[i]++;
1144 d[i]++;
1145 }
1146 return new Object[]{ a, b, c, d };
1147 }
1148
1149 @Test
1150 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1151 IRNode.ADD_VB, "= 0",
1152 IRNode.STORE_VECTOR, "= 0"},
1153 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1154 applyIfPlatform = {"64-bit", "true"},
1155 applyIf = {"AlignVector", "false"})
1156 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1157 IRNode.ADD_VB, "= 0",
1158 IRNode.STORE_VECTOR, "= 0"},
1159 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1160 applyIfPlatform = {"64-bit", "true"},
1161 applyIf = {"AlignVector", "true"})
1162 static Object[] test14aB(byte[] a) {
1163 // non-power-of-2 stride
1164 for (int i = 0; i < RANGE-20; i+=9) {
|