113 test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8);
114 init(goldI9, goldF9, goldL9);
115 test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9);
116 init(goldI10, goldF10, goldL10);
117 test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10);
118 }
119
120 @Run(test = "test0")
121 @Warmup(100)
122 public void runTest0() {
123 int[] dataI = new int[RANGE];
124 float[] dataF = new float[RANGE];
125 init(dataI, dataF);
126 test0(dataI, dataI, dataF, dataF);
127 verify("test0", dataI, goldI0);
128 verify("test0", dataF, goldF0);
129 }
130
131 @Test
132 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
133 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
134 applyIfPlatform = {"64-bit", "true"},
135 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
136 static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
137 for (int i = 0; i < RANGE; i+=2) {
138 // Hand-unrolled 2x. Int and Float slice are completely separate.
139 dataIb[i+0] = dataIa[i+0] + 3;
140 dataIb[i+1] = dataIa[i+1] + 3;
141 dataFb[i+0] = dataFa[i+0] * 1.3f;
142 dataFb[i+1] = dataFa[i+1] * 1.3f;
143 // With AlignVector, we need 8-byte alignment of vector loads/stores.
144 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
145 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
146 // -> vectorize -> no vectorization
147 }
148 }
149
150 @Run(test = "test1")
151 @Warmup(100)
152 public void runTest1() {
153 int[] dataI = new int[RANGE];
154 float[] dataF = new float[RANGE];
155 init(dataI, dataF);
156 test1(dataI, dataI, dataF, dataF);
157 verify("test1", dataI, goldI1);
158 verify("test1", dataF, goldF1);
159 }
160
161 @Test
162 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"},
163 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
164 applyIfPlatform = {"64-bit", "true"},
165 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
166 static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
167 for (int i = 0; i < RANGE; i+=2) {
168 // Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency.
169 dataFa[i+0] = dataIa[i+0] + 3;
170 dataFa[i+1] = dataIa[i+1] + 3;
171 dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);
172 dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);
173 // With AlignVector, we need 8-byte alignment of vector loads/stores.
174 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
175 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
176 // -> vectorize -> no vectorization
177 }
178 }
179
180 @Run(test = "test2")
181 public void runTest2() {
182 int[] dataI = new int[RANGE];
183 float[] dataF = new float[RANGE];
184 init(dataI, dataF);
185 test2(dataI, dataI, dataF, dataF);
186 verify("test2", dataI, goldI2);
187 verify("test2", dataF, goldF2);
188 }
189
190 @Test
191 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"},
192 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
193 applyIfPlatform = {"64-bit", "true"},
194 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
195 static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
196 for (int i = 0; i < RANGE; i+=2) {
197 // int and float arrays are two slices. But we pretend both are of type int.
198 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1);
199 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1);
200 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0);
201 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4);
202 // With AlignVector, we need 8-byte alignment of vector loads/stores.
203 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
204 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
205 // -> vectorize -> no vectorization
206 }
207 }
208
209 @Run(test = "test3")
210 @Warmup(100)
211 public void runTest3() {
212 int[] dataI = new int[RANGE];
213 float[] dataF = new float[RANGE];
214 init(dataI, dataF);
215 test3(dataI, dataI, dataF, dataF);
216 verify("test3", dataI, goldI3);
217 verify("test3", dataF, goldF3);
218 }
219
220 @Test
221 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
222 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
223 applyIfPlatform = {"64-bit", "true"},
224 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
225 static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
226 for (int i = 0; i < RANGE; i+=2) {
227 // Inversion of orders. But because we operate on separate slices, this should
228 // safely vectorize. It should detect that each line is independent, so it can
229 // reorder them.
230 dataIb[i+0] = dataIa[i+0] + 3;
231 dataFb[i+1] = dataFa[i+1] * 1.3f;
232 dataFb[i+0] = dataFa[i+0] * 1.3f;
233 dataIb[i+1] = dataIa[i+1] + 3;
234 // With AlignVector, we need 8-byte alignment of vector loads/stores.
235 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
236 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
237 // -> vectorize -> no vectorization
238 }
239 }
240
241 @Run(test = "test4")
242 @Warmup(100)
243 public void runTest4() {
244 int[] dataI = new int[RANGE];
245 float[] dataF = new float[RANGE];
246 init(dataI, dataF);
247 test4(dataI, dataI, dataF, dataF);
248 verify("test4", dataI, goldI4);
249 verify("test4", dataF, goldF4);
250 }
251
252 @Test
253 static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
254 for (int i = 0; i < RANGE; i+=2) {
255 // same as test1, except that reordering leads to different semantics
256 // [A,B] and [X,Y] are both packs that are internally independent
257 // But we have dependencies A -> X (StoreF -> LoadF)
284 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
285 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
286 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B
287 }
288 }
289
290 @Run(test = "test6")
291 public void runTest6() {
292 int[] dataI = new int[RANGE];
293 float[] dataF = new float[RANGE];
294 long[] dataL = new long[RANGE];
295 init(dataI, dataF, dataL);
296 test6(dataI, dataI, dataF, dataF, dataL, dataL);
297 verify("test6", dataI, goldI6);
298 verify("test6", dataF, goldF6);
299 verify("test6", dataL, goldL6);
300 }
301
302 @Test
303 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"},
304 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
305 applyIfPlatform = {"64-bit", "true"},
306 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
307 static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
308 long[] dataLa, long[] dataLb) {
309 for (int i = 0; i < RANGE; i+=2) {
310 // Chain of parallelizable op and conversion
311 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
312 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
313 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
314 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
315 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
316 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
317 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
318 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
319 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
320 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
321 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
322 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
323 // With AlignVector, we need 8-byte alignment of vector loads/stores.
324 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
325 // adr = base + 16 + 8*i -> always adr = base + 12 + 8*i -> never
326 // -> vectorize -> no vectorization
327 }
328 }
329
330 @Run(test = "test7")
331 public void runTest7() {
332 int[] dataI = new int[RANGE];
333 float[] dataF = new float[RANGE];
334 long[] dataL = new long[RANGE];
335 init(dataI, dataF, dataL);
336 test7(dataI, dataI, dataF, dataF, dataL, dataL);
337 verify("test7", dataI, goldI7);
338 verify("test7", dataF, goldF7);
339 verify("test7", dataL, goldL7);
340 }
341
342 @Test
343 static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
344 long[] dataLa, long[] dataLb) {
345 for (int i = 0; i < RANGE; i+=2) {
346 // Cycle involving 3 memory slices
|
113 test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8);
114 init(goldI9, goldF9, goldL9);
115 test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9);
116 init(goldI10, goldF10, goldL10);
117 test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10);
118 }
119
120 @Run(test = "test0")
121 @Warmup(100)
122 public void runTest0() {
123 int[] dataI = new int[RANGE];
124 float[] dataF = new float[RANGE];
125 init(dataI, dataF);
126 test0(dataI, dataI, dataF, dataF);
127 verify("test0", dataI, goldI0);
128 verify("test0", dataF, goldF0);
129 }
130
131 @Test
132 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
133 applyIfPlatform = {"64-bit", "true"},
134 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
135 static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
136 for (int i = 0; i < RANGE; i+=2) {
137 // Hand-unrolled 2x. Int and Float slice are completely separate.
138 dataIb[i+0] = dataIa[i+0] + 3;
139 dataIb[i+1] = dataIa[i+1] + 3;
140 dataFb[i+0] = dataFa[i+0] * 1.3f;
141 dataFb[i+1] = dataFa[i+1] * 1.3f;
142 }
143 }
144
145 @Run(test = "test1")
146 @Warmup(100)
147 public void runTest1() {
148 int[] dataI = new int[RANGE];
149 float[] dataF = new float[RANGE];
150 init(dataI, dataF);
151 test1(dataI, dataI, dataF, dataF);
152 verify("test1", dataI, goldI1);
153 verify("test1", dataF, goldF1);
154 }
155
156 @Test
157 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2I, "> 0", IRNode.VECTOR_CAST_I2F, "> 0"},
158 applyIfPlatform = {"64-bit", "true"},
159 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
160 static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
161 for (int i = 0; i < RANGE; i+=2) {
162 // Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency.
163 dataFa[i+0] = dataIa[i+0] + 3;
164 dataFa[i+1] = dataIa[i+1] + 3;
165 dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);
166 dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);
167 }
168 }
169
170 @Run(test = "test2")
171 public void runTest2() {
172 int[] dataI = new int[RANGE];
173 float[] dataF = new float[RANGE];
174 init(dataI, dataF);
175 test2(dataI, dataI, dataF, dataF);
176 verify("test2", dataI, goldI2);
177 verify("test2", dataF, goldF2);
178 }
179
180 @Test
181 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"},
182 applyIfPlatform = {"64-bit", "true"},
183 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
184 static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
185 for (int i = 0; i < RANGE; i+=2) {
186 // int and float arrays are two slices. But we pretend both are of type int.
187 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1);
188 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1);
189 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0);
190 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4);
191 }
192 }
193
194 @Run(test = "test3")
195 @Warmup(100)
196 public void runTest3() {
197 int[] dataI = new int[RANGE];
198 float[] dataF = new float[RANGE];
199 init(dataI, dataF);
200 test3(dataI, dataI, dataF, dataF);
201 verify("test3", dataI, goldI3);
202 verify("test3", dataF, goldF3);
203 }
204
205 @Test
206 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
207 applyIfPlatform = {"64-bit", "true"},
208 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
209 static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
210 for (int i = 0; i < RANGE; i+=2) {
211 // Inversion of orders. But because we operate on separate slices, this should
212 // safely vectorize. It should detect that each line is independent, so it can
213 // reorder them.
214 dataIb[i+0] = dataIa[i+0] + 3;
215 dataFb[i+1] = dataFa[i+1] * 1.3f;
216 dataFb[i+0] = dataFa[i+0] * 1.3f;
217 dataIb[i+1] = dataIa[i+1] + 3;
218 }
219 }
220
221 @Run(test = "test4")
222 @Warmup(100)
223 public void runTest4() {
224 int[] dataI = new int[RANGE];
225 float[] dataF = new float[RANGE];
226 init(dataI, dataF);
227 test4(dataI, dataI, dataF, dataF);
228 verify("test4", dataI, goldI4);
229 verify("test4", dataF, goldF4);
230 }
231
232 @Test
233 static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
234 for (int i = 0; i < RANGE; i+=2) {
235 // same as test1, except that reordering leads to different semantics
236 // [A,B] and [X,Y] are both packs that are internally independent
237 // But we have dependencies A -> X (StoreF -> LoadF)
264 dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
265 dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
266 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B
267 }
268 }
269
270 @Run(test = "test6")
271 public void runTest6() {
272 int[] dataI = new int[RANGE];
273 float[] dataF = new float[RANGE];
274 long[] dataL = new long[RANGE];
275 init(dataI, dataF, dataL);
276 test6(dataI, dataI, dataF, dataF, dataL, dataL);
277 verify("test6", dataI, goldI6);
278 verify("test6", dataF, goldF6);
279 verify("test6", dataL, goldL6);
280 }
281
282 @Test
283 @IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"},
284 applyIfPlatform = {"64-bit", "true"},
285 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
286 static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
287 long[] dataLa, long[] dataLb) {
288 for (int i = 0; i < RANGE; i+=2) {
289 // Chain of parallelizable op and conversion
290 int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
291 int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
292 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
293 unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
294 int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
295 int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
296 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
297 unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
298 float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
299 float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
300 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
301 unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
302 }
303 }
304
305 @Run(test = "test7")
306 public void runTest7() {
307 int[] dataI = new int[RANGE];
308 float[] dataF = new float[RANGE];
309 long[] dataL = new long[RANGE];
310 init(dataI, dataF, dataL);
311 test7(dataI, dataI, dataF, dataF, dataL, dataL);
312 verify("test7", dataI, goldI7);
313 verify("test7", dataF, goldF7);
314 verify("test7", dataL, goldL7);
315 }
316
317 @Test
318 static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
319 long[] dataLa, long[] dataLb) {
320 for (int i = 0; i < RANGE; i+=2) {
321 // Cycle involving 3 memory slices
|