1 /*
2 * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "opto/c2_MacroAssembler.hpp"
29 #include "opto/intrinsicnode.hpp"
30 #include "runtime/vm_version.hpp"
31
32 #ifdef PRODUCT
33 #define BLOCK_COMMENT(str) // nothing
34 #else
35 #define BLOCK_COMMENT(str) block_comment(str)
36 #endif
37 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
38
39
40 void C2_MacroAssembler::fast_lock_lightweight(ConditionRegister flag, Register obj, Register box,
41 Register tmp1, Register tmp2, Register tmp3) {
42 compiler_fast_lock_lightweight_object(flag, obj, tmp1, tmp2, tmp3);
43 }
44
45 void C2_MacroAssembler::fast_unlock_lightweight(ConditionRegister flag, Register obj, Register box,
46 Register tmp1, Register tmp2, Register tmp3) {
47 compiler_fast_unlock_lightweight_object(flag, obj, tmp1, tmp2, tmp3);
48 }
49
50 // Intrinsics for CompactStrings
51
52 // Compress char[] to byte[] by compressing 16 bytes at once.
53 void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
54 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
55 Label& Lfailure, bool ascii) {
56
57 const Register tmp0 = R0;
58 const int byte_mask = ascii ? 0x7F : 0xFF;
59 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
60 Label Lloop, Lslow;
61
62 // Check if cnt >= 8 (= 16 bytes)
63 lis(tmp1, byte_mask); // tmp1 = 0x00FF00FF00FF00FF (non ascii case)
64 srwi_(tmp2, cnt, 3);
65 beq(CCR0, Lslow);
66 ori(tmp1, tmp1, byte_mask);
67 rldimi(tmp1, tmp1, 32, 0);
68 mtctr(tmp2);
69
70 // 2x unrolled loop
71 bind(Lloop);
72 ld(tmp2, 0, src); // _0_1_2_3 (Big Endian)
73 ld(tmp4, 8, src); // _4_5_6_7
74
75 orr(tmp0, tmp2, tmp4);
76 rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
77 rldimi(tmp2, tmp2, 2*8, 2*8); // _0_2_3_3
78 rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
79 rldimi(tmp4, tmp4, 2*8, 2*8); // _4_6_7_7
80
81 andc_(tmp0, tmp0, tmp1);
82 bne(CCR0, Lfailure); // Not latin1/ascii.
83 addi(src, src, 16);
84
85 rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
86 srdi(tmp2, tmp2, 3*8); // ____0_2_
87 rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
88 srdi(tmp4, tmp4, 3*8); // ____4_6_
89
90 orr(tmp2, tmp2, tmp3); // ____0123
91 orr(tmp4, tmp4, tmp5); // ____4567
92
93 stw(tmp2, 0, dst);
94 stw(tmp4, 4, dst);
95 addi(dst, dst, 8);
96 bdnz(Lloop);
97
98 bind(Lslow); // Fallback to slow version
99 }
100
101 // Compress char[] to byte[]. cnt must be positive int.
102 void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp,
103 Label& Lfailure, bool ascii) {
104 const int byte_mask = ascii ? 0x7F : 0xFF;
105 Label Lloop;
106 mtctr(cnt);
107
108 bind(Lloop);
109 lhz(tmp, 0, src);
110 cmplwi(CCR0, tmp, byte_mask);
111 bgt(CCR0, Lfailure); // Not latin1/ascii.
112 addi(src, src, 2);
113 stb(tmp, 0, dst);
114 addi(dst, dst, 1);
115 bdnz(Lloop);
116 }
117
118 void C2_MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
119 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
120 Register result, bool ascii) {
121 Label Lslow, Lfailure1, Lfailure2, Ldone;
122
123 string_compress_16(src, dst, len, tmp1, tmp2, tmp3, tmp4, tmp5, Lfailure1, ascii);
124 rldicl_(result, len, 0, 64-3); // Remaining characters.
125 beq(CCR0, Ldone);
126 bind(Lslow);
127 string_compress(src, dst, result, tmp2, Lfailure2, ascii);
128 li(result, 0);
129 b(Ldone);
130
131 bind(Lfailure1);
132 mr(result, len);
133 mfctr(tmp1);
134 rldimi_(result, tmp1, 3, 0); // Remaining characters.
135 beq(CCR0, Ldone);
136 b(Lslow);
137
138 bind(Lfailure2);
139 mfctr(result); // Remaining characters.
140
141 bind(Ldone);
142 subf(result, result, len);
143 }
144
145 // Inflate byte[] to char[] by inflating 16 bytes at once.
146 void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
147 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
148 const Register tmp0 = R0;
149 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
150 Label Lloop, Lslow;
151
152 // Check if cnt >= 8
153 srwi_(tmp2, cnt, 3);
154 beq(CCR0, Lslow);
155 lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF
156 ori(tmp1, tmp1, 0xFF);
157 mtctr(tmp2);
158
159 // 2x unrolled loop
160 bind(Lloop);
161 lwz(tmp2, 0, src); // ____0123 (Big Endian)
162 lwz(tmp4, 4, src); // ____4567
163 addi(src, src, 8);
164
165 rldicl(tmp3, tmp2, 7*8, 64-8); // _______2
166 rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
167 rldicl(tmp5, tmp4, 7*8, 64-8); // _______6
168 rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
169
170 andc(tmp0, tmp2, tmp1); // ____0_1_
171 rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
172 andc(tmp3, tmp4, tmp1); // ____4_5_
173 rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
174
175 rldimi(tmp2, tmp0, 3*8, 0*8); // _0_1_2_3
176 rldimi(tmp4, tmp3, 3*8, 0*8); // _4_5_6_7
177
178 std(tmp2, 0, dst);
179 std(tmp4, 8, dst);
180 addi(dst, dst, 16);
181 bdnz(Lloop);
182
183 bind(Lslow); // Fallback to slow version
184 }
185
186 // Inflate byte[] to char[]. cnt must be positive int.
187 void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
188 Label Lloop;
189 mtctr(cnt);
190
191 bind(Lloop);
192 lbz(tmp, 0, src);
193 addi(src, src, 1);
194 sth(tmp, 0, dst);
195 addi(dst, dst, 2);
196 bdnz(Lloop);
197 }
198
199 void C2_MacroAssembler::string_compare(Register str1, Register str2,
200 Register cnt1, Register cnt2,
201 Register tmp1, Register result, int ae) {
202 const Register tmp0 = R0,
203 diff = tmp1;
204
205 assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
206 Label Ldone, Lslow, Lloop, Lreturn_diff;
207
208 // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
209 // we interchange str1 and str2 in the UL case and negate the result.
210 // Like this, str1 is always latin1 encoded, except for the UU case.
211 // In addition, we need 0 (or sign which is 0) extend.
212
213 if (ae == StrIntrinsicNode::UU) {
214 srwi(cnt1, cnt1, 1);
215 } else {
216 clrldi(cnt1, cnt1, 32);
217 }
218
219 if (ae != StrIntrinsicNode::LL) {
220 srwi(cnt2, cnt2, 1);
221 } else {
222 clrldi(cnt2, cnt2, 32);
223 }
224
225 // See if the lengths are different, and calculate min in cnt1.
226 // Save diff in case we need it for a tie-breaker.
227 subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
228 // if (diff > 0) { cnt1 = cnt2; }
229 if (VM_Version::has_isel()) {
230 isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
231 } else {
232 Label Lskip;
233 blt(CCR0, Lskip);
234 mr(cnt1, cnt2);
235 bind(Lskip);
236 }
237
238 // Rename registers
239 Register chr1 = result;
240 Register chr2 = tmp0;
241
242 // Compare multiple characters in fast loop (only implemented for same encoding).
243 int stride1 = 8, stride2 = 8;
244 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
245 int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
246 Label Lfastloop, Lskipfast;
247
248 srwi_(tmp0, cnt1, log2_chars_per_iter);
249 beq(CCR0, Lskipfast);
250 rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
251 li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
252 mtctr(tmp0);
253
254 bind(Lfastloop);
255 ld(chr1, 0, str1);
256 ld(chr2, 0, str2);
257 cmpd(CCR0, chr1, chr2);
258 bne(CCR0, Lslow);
259 addi(str1, str1, stride1);
260 addi(str2, str2, stride2);
261 bdnz(Lfastloop);
262 mr(cnt1, cnt2); // Remaining characters.
263 bind(Lskipfast);
264 }
265
266 // Loop which searches the first difference character by character.
267 cmpwi(CCR0, cnt1, 0);
268 beq(CCR0, Lreturn_diff);
269 bind(Lslow);
270 mtctr(cnt1);
271
272 switch (ae) {
273 case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
274 case StrIntrinsicNode::UL: // fallthru (see comment above)
275 case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
276 case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
277 default: ShouldNotReachHere(); break;
278 }
279
280 bind(Lloop);
281 if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
282 if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
283 subf_(result, chr2, chr1); // result = chr1 - chr2
284 bne(CCR0, Ldone);
285 addi(str1, str1, stride1);
286 addi(str2, str2, stride2);
287 bdnz(Lloop);
288
289 // If strings are equal up to min length, return the length difference.
290 bind(Lreturn_diff);
291 mr(result, diff);
292
293 // Otherwise, return the difference between the first mismatched chars.
294 bind(Ldone);
295 if (ae == StrIntrinsicNode::UL) {
296 neg(result, result); // Negate result (see note above).
297 }
298 }
299
300 void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
301 Register limit, Register tmp1, Register result, bool is_byte) {
302 const Register tmp0 = R0;
303 assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
304 Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
305 bool limit_needs_shift = false;
306
307 if (is_array_equ) {
308 const int length_offset = arrayOopDesc::length_offset_in_bytes();
309 const int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
310
311 // Return true if the same array.
312 cmpd(CCR0, ary1, ary2);
313 beq(CCR0, Lskiploop);
314
315 // Return false if one of them is null.
316 cmpdi(CCR0, ary1, 0);
317 cmpdi(CCR1, ary2, 0);
318 li(result, 0);
319 cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
320 beq(CCR0, Ldone);
321
322 // Load the lengths of arrays.
323 lwz(limit, length_offset, ary1);
324 lwz(tmp0, length_offset, ary2);
325
326 // Return false if the two arrays are not equal length.
327 cmpw(CCR0, limit, tmp0);
328 bne(CCR0, Ldone);
329
330 // Load array addresses.
331 addi(ary1, ary1, base_offset);
332 addi(ary2, ary2, base_offset);
333 } else {
334 limit_needs_shift = !is_byte;
335 li(result, 0); // Assume not equal.
336 }
337
338 // Rename registers
339 Register chr1 = tmp0;
340 Register chr2 = tmp1;
341
342 // Compare 8 bytes per iteration in fast loop.
343 const int log2_chars_per_iter = is_byte ? 3 : 2;
344
345 srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
346 beq(CCR0, Lskipfast);
347 mtctr(tmp0);
348
349 bind(Lfastloop);
350 ld(chr1, 0, ary1);
351 ld(chr2, 0, ary2);
352 addi(ary1, ary1, 8);
353 addi(ary2, ary2, 8);
354 cmpd(CCR0, chr1, chr2);
355 bne(CCR0, Ldone);
356 bdnz(Lfastloop);
357
358 bind(Lskipfast);
359 rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
360 beq(CCR0, Lskiploop);
361 mtctr(limit);
362
363 // Character by character.
364 bind(Lloop);
365 if (is_byte) {
366 lbz(chr1, 0, ary1);
367 lbz(chr2, 0, ary2);
368 addi(ary1, ary1, 1);
369 addi(ary2, ary2, 1);
370 } else {
371 lhz(chr1, 0, ary1);
372 lhz(chr2, 0, ary2);
373 addi(ary1, ary1, 2);
374 addi(ary2, ary2, 2);
375 }
376 cmpw(CCR0, chr1, chr2);
377 bne(CCR0, Ldone);
378 bdnz(Lloop);
379
380 bind(Lskiploop);
381 li(result, 1); // All characters are equal.
382 bind(Ldone);
383 }
384
385 void C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
386 Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
387 Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
388
389 // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
390 Label L_TooShort, L_Found, L_NotFound, L_End;
391 Register last_addr = haycnt, // Kill haycnt at the beginning.
392 addr = tmp1,
393 n_start = tmp2,
394 ch1 = tmp3,
395 ch2 = R0;
396
397 assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
398 const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
399 const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
400
401 // **************************************************************************************************
402 // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
403 // **************************************************************************************************
404
405 // Compute last haystack addr to use if no match gets found.
406 clrldi(haycnt, haycnt, 32); // Ensure positive int is valid as 64 bit value.
407 addi(addr, haystack, -h_csize); // Accesses use pre-increment.
408 if (needlecntval == 0) { // variable needlecnt
409 cmpwi(CCR6, needlecnt, 2);
410 clrldi(needlecnt, needlecnt, 32); // Ensure positive int is valid as 64 bit value.
411 blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately.
412 }
413
414 if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
415
416 if (needlecntval == 0) { // variable needlecnt
417 subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt.
418 addi(needlecnt, needlecnt, -2); // Rest of needle.
419 } else { // constant needlecnt
420 guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
421 assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
422 addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt.
423 if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
424 }
425
426 if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
427
428 if (ae ==StrIntrinsicNode::UL) {
429 srwi(tmp4, n_start, 1*8); // ___0
430 rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
431 }
432
433 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
434
435 // Main Loop (now we have at least 2 characters).
436 Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
437 bind(L_OuterLoop); // Search for 1st 2 characters.
438 Register addr_diff = tmp4;
439 subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
440 addi(addr, addr, h_csize); // This is the new address we want to use for comparing.
441 srdi_(ch2, addr_diff, h_csize);
442 beq(CCR0, L_FinalCheck); // 2 characters left?
443 mtctr(ch2); // num of characters / 2
444 bind(L_InnerLoop); // Main work horse (2x unrolled search loop)
445 if (h_csize == 2) { // Load 2 characters of haystack (ignore alignment).
446 lwz(ch1, 0, addr);
447 lwz(ch2, 2, addr);
448 } else {
449 lhz(ch1, 0, addr);
450 lhz(ch2, 1, addr);
451 }
452 cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
453 cmpw(CCR1, ch2, n_start);
454 beq(CCR0, L_Comp1); // Did we find the needle start?
455 beq(CCR1, L_Comp2);
456 addi(addr, addr, 2 * h_csize);
457 bdnz(L_InnerLoop);
458 bind(L_FinalCheck);
459 andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
460 beq(CCR0, L_NotFound);
461 if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
462 cmpw(CCR1, ch1, n_start);
463 beq(CCR1, L_Comp1);
464 bind(L_NotFound);
465 li(result, -1); // not found
466 b(L_End);
467
468 // **************************************************************************************************
469 // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
470 // **************************************************************************************************
471 if (needlecntval == 0) { // We have to handle these cases separately.
472 Label L_OneCharLoop;
473 bind(L_TooShort);
474 mtctr(haycnt);
475 if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
476 bind(L_OneCharLoop);
477 if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
478 cmpw(CCR1, ch1, n_start);
479 beq(CCR1, L_Found); // Did we find the one character needle?
480 bdnz(L_OneCharLoop);
481 li(result, -1); // Not found.
482 b(L_End);
483 }
484
485 // **************************************************************************************************
486 // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
487 // **************************************************************************************************
488
489 // Compare the rest
490 bind(L_Comp2);
491 addi(addr, addr, h_csize); // First comparison has failed, 2nd one hit.
492 bind(L_Comp1); // Addr points to possible needle start.
493 if (needlecntval != 2) { // Const needlecnt==2?
494 if (needlecntval != 3) {
495 if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
496 Register n_ind = tmp4,
497 h_ind = n_ind;
498 li(n_ind, 2 * n_csize); // First 2 characters are already compared, use index 2.
499 mtctr(needlecnt); // Decremented by 2, still > 0.
500 Label L_CompLoop;
501 bind(L_CompLoop);
502 if (ae ==StrIntrinsicNode::UL) {
503 h_ind = ch1;
504 sldi(h_ind, n_ind, 1);
505 }
506 if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
507 if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
508 cmpw(CCR1, ch1, ch2);
509 bne(CCR1, L_OuterLoop);
510 addi(n_ind, n_ind, n_csize);
511 bdnz(L_CompLoop);
512 } else { // No loop required if there's only one needle character left.
513 if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
514 if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
515 cmpw(CCR1, ch1, ch2);
516 bne(CCR1, L_OuterLoop);
517 }
518 }
519 // Return index ...
520 bind(L_Found);
521 subf(result, haystack, addr); // relative to haystack, ...
522 if (h_csize == 2) { srdi(result, result, 1); } // in characters.
523 bind(L_End);
524 } // string_indexof
525
526 void C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
527 Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
528 assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
529
530 Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
531 Register addr = tmp1,
532 ch1 = tmp2,
533 ch2 = R0;
534
535 const int h_csize = is_byte ? 1 : 2;
536
537 //4:
538 srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR).
539 mr(addr, haystack);
540 beq(CCR0, L_FinalCheck);
541 mtctr(tmp2); // Move to count register.
542 //8:
543 bind(L_InnerLoop); // Main work horse (2x unrolled search loop).
544 if (!is_byte) {
545 lhz(ch1, 0, addr);
546 lhz(ch2, 2, addr);
547 } else {
548 lbz(ch1, 0, addr);
549 lbz(ch2, 1, addr);
550 }
551 (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
552 (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
553 beq(CCR0, L_Found1); // Did we find the needle?
554 beq(CCR1, L_Found2);
555 addi(addr, addr, 2 * h_csize);
556 bdnz(L_InnerLoop);
557 //16:
558 bind(L_FinalCheck);
559 andi_(R0, haycnt, 1);
560 beq(CCR0, L_NotFound);
561 if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
562 (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
563 beq(CCR1, L_Found1);
564 //21:
565 bind(L_NotFound);
566 li(result, -1); // Not found.
567 b(L_End);
568
569 bind(L_Found2);
570 addi(addr, addr, h_csize);
571 //24:
572 bind(L_Found1); // Return index ...
573 subf(result, haystack, addr); // relative to haystack, ...
574 if (!is_byte) { srdi(result, result, 1); } // in characters.
575 bind(L_End);
576 } // string_indexof_char
577
578
579 void C2_MacroAssembler::count_positives(Register src, Register cnt, Register result,
580 Register tmp1, Register tmp2) {
581 const Register tmp0 = R0;
582 assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
583 Label Lfastloop, Lslow, Lloop, Ldone;
584
585 // Check if cnt >= 8 (= 16 bytes)
586 lis(tmp1, (int)(short)0x8080); // tmp1 = 0x8080808080808080
587 srwi_(tmp2, cnt, 4);
588 mr(result, src); // Use result reg to point to the current position.
589 beq(CCR0, Lslow);
590 ori(tmp1, tmp1, 0x8080);
591 rldimi(tmp1, tmp1, 32, 0);
592 mtctr(tmp2);
593
594 // 2x unrolled loop
595 bind(Lfastloop);
596 ld(tmp2, 0, result);
597 ld(tmp0, 8, result);
598
599 orr(tmp0, tmp2, tmp0);
600
601 and_(tmp0, tmp0, tmp1);
602 bne(CCR0, Lslow); // Found negative byte.
603 addi(result, result, 16);
604 bdnz(Lfastloop);
605
606 bind(Lslow); // Fallback to slow version.
607 subf(tmp0, src, result); // Bytes known positive.
608 subf_(tmp0, tmp0, cnt); // Remaining Bytes.
609 beq(CCR0, Ldone);
610 mtctr(tmp0);
611 bind(Lloop);
612 lbz(tmp0, 0, result);
613 andi_(tmp0, tmp0, 0x80);
614 bne(CCR0, Ldone); // Found negative byte.
615 addi(result, result, 1);
616 bdnz(Lloop);
617
618 bind(Ldone);
619 subf(result, src, result); // Result is offset from src.
620 }