1 /*
  2  * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/assembler.hpp"
 27 #include "asm/assembler.inline.hpp"
 28 #include "opto/c2_MacroAssembler.hpp"
 29 #include "opto/intrinsicnode.hpp"
 30 #include "runtime/vm_version.hpp"
 31 
 32 #ifdef PRODUCT
 33 #define BLOCK_COMMENT(str) // nothing
 34 #else
 35 #define BLOCK_COMMENT(str) block_comment(str)
 36 #endif
 37 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 38 
 39 // Intrinsics for CompactStrings
 40 
 41 // Compress char[] to byte[] by compressing 16 bytes at once.
 42 void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
 43                                            Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
 44                                            Label& Lfailure, bool ascii) {
 45 
 46   const Register tmp0 = R0;
 47   const int byte_mask = ascii ? 0x7F : 0xFF;
 48   assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
 49   Label Lloop, Lslow;
 50 
 51   // Check if cnt >= 8 (= 16 bytes)
 52   lis(tmp1, byte_mask);           // tmp1 = 0x00FF00FF00FF00FF (non ascii case)
 53   srwi_(tmp2, cnt, 3);
 54   beq(CCR0, Lslow);
 55   ori(tmp1, tmp1, byte_mask);
 56   rldimi(tmp1, tmp1, 32, 0);
 57   mtctr(tmp2);
 58 
 59   // 2x unrolled loop
 60   bind(Lloop);
 61   ld(tmp2, 0, src);               // _0_1_2_3 (Big Endian)
 62   ld(tmp4, 8, src);               // _4_5_6_7
 63 
 64   orr(tmp0, tmp2, tmp4);
 65   rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
 66   rldimi(tmp2, tmp2, 2*8, 2*8);   // _0_2_3_3
 67   rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
 68   rldimi(tmp4, tmp4, 2*8, 2*8);   // _4_6_7_7
 69 
 70   andc_(tmp0, tmp0, tmp1);
 71   bne(CCR0, Lfailure);            // Not latin1/ascii.
 72   addi(src, src, 16);
 73 
 74   rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
 75   srdi(tmp2, tmp2, 3*8);          // ____0_2_
 76   rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
 77   srdi(tmp4, tmp4, 3*8);          // ____4_6_
 78 
 79   orr(tmp2, tmp2, tmp3);          // ____0123
 80   orr(tmp4, tmp4, tmp5);          // ____4567
 81 
 82   stw(tmp2, 0, dst);
 83   stw(tmp4, 4, dst);
 84   addi(dst, dst, 8);
 85   bdnz(Lloop);
 86 
 87   bind(Lslow);                    // Fallback to slow version
 88 }
 89 
 90 // Compress char[] to byte[]. cnt must be positive int.
 91 void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp,
 92                                         Label& Lfailure, bool ascii) {
 93   const int byte_mask = ascii ? 0x7F : 0xFF;
 94   Label Lloop;
 95   mtctr(cnt);
 96 
 97   bind(Lloop);
 98   lhz(tmp, 0, src);
 99   cmplwi(CCR0, tmp, byte_mask);
100   bgt(CCR0, Lfailure);            // Not latin1/ascii.
101   addi(src, src, 2);
102   stb(tmp, 0, dst);
103   addi(dst, dst, 1);
104   bdnz(Lloop);
105 }
106 
107 void C2_MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
108                                          Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
109                                          Register result, bool ascii) {
110   Label Lslow, Lfailure1, Lfailure2, Ldone;
111 
112   string_compress_16(src, dst, len, tmp1, tmp2, tmp3, tmp4, tmp5, Lfailure1, ascii);
113   rldicl_(result, len, 0, 64-3); // Remaining characters.
114   beq(CCR0, Ldone);
115   bind(Lslow);
116   string_compress(src, dst, result, tmp2, Lfailure2, ascii);
117   li(result, 0);
118   b(Ldone);
119 
120   bind(Lfailure1);
121   mr(result, len);
122   mfctr(tmp1);
123   rldimi_(result, tmp1, 3, 0); // Remaining characters.
124   beq(CCR0, Ldone);
125   b(Lslow);
126 
127   bind(Lfailure2);
128   mfctr(result); // Remaining characters.
129 
130   bind(Ldone);
131   subf(result, result, len);
132 }
133 
134 // Inflate byte[] to char[] by inflating 16 bytes at once.
135 void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
136                                           Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
137   const Register tmp0 = R0;
138   assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
139   Label Lloop, Lslow;
140 
141   // Check if cnt >= 8
142   srwi_(tmp2, cnt, 3);
143   beq(CCR0, Lslow);
144   lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF
145   ori(tmp1, tmp1, 0xFF);
146   mtctr(tmp2);
147 
148   // 2x unrolled loop
149   bind(Lloop);
150   lwz(tmp2, 0, src);              // ____0123 (Big Endian)
151   lwz(tmp4, 4, src);              // ____4567
152   addi(src, src, 8);
153 
154   rldicl(tmp3, tmp2, 7*8, 64-8);  // _______2
155   rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
156   rldicl(tmp5, tmp4, 7*8, 64-8);  // _______6
157   rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
158 
159   andc(tmp0, tmp2, tmp1);         // ____0_1_
160   rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
161   andc(tmp3, tmp4, tmp1);         // ____4_5_
162   rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
163 
164   rldimi(tmp2, tmp0, 3*8, 0*8);   // _0_1_2_3
165   rldimi(tmp4, tmp3, 3*8, 0*8);   // _4_5_6_7
166 
167   std(tmp2, 0, dst);
168   std(tmp4, 8, dst);
169   addi(dst, dst, 16);
170   bdnz(Lloop);
171 
172   bind(Lslow);                    // Fallback to slow version
173 }
174 
175 // Inflate byte[] to char[]. cnt must be positive int.
176 void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
177   Label Lloop;
178   mtctr(cnt);
179 
180   bind(Lloop);
181   lbz(tmp, 0, src);
182   addi(src, src, 1);
183   sth(tmp, 0, dst);
184   addi(dst, dst, 2);
185   bdnz(Lloop);
186 }
187 
188 void C2_MacroAssembler::string_compare(Register str1, Register str2,
189                                        Register cnt1, Register cnt2,
190                                        Register tmp1, Register result, int ae) {
191   const Register tmp0 = R0,
192                  diff = tmp1;
193 
194   assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
195   Label Ldone, Lslow, Lloop, Lreturn_diff;
196 
197   // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
198   // we interchange str1 and str2 in the UL case and negate the result.
199   // Like this, str1 is always latin1 encoded, except for the UU case.
200   // In addition, we need 0 (or sign which is 0) extend.
201 
202   if (ae == StrIntrinsicNode::UU) {
203     srwi(cnt1, cnt1, 1);
204   } else {
205     clrldi(cnt1, cnt1, 32);
206   }
207 
208   if (ae != StrIntrinsicNode::LL) {
209     srwi(cnt2, cnt2, 1);
210   } else {
211     clrldi(cnt2, cnt2, 32);
212   }
213 
214   // See if the lengths are different, and calculate min in cnt1.
215   // Save diff in case we need it for a tie-breaker.
216   subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
217   // if (diff > 0) { cnt1 = cnt2; }
218   if (VM_Version::has_isel()) {
219     isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
220   } else {
221     Label Lskip;
222     blt(CCR0, Lskip);
223     mr(cnt1, cnt2);
224     bind(Lskip);
225   }
226 
227   // Rename registers
228   Register chr1 = result;
229   Register chr2 = tmp0;
230 
231   // Compare multiple characters in fast loop (only implemented for same encoding).
232   int stride1 = 8, stride2 = 8;
233   if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
234     int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
235     Label Lfastloop, Lskipfast;
236 
237     srwi_(tmp0, cnt1, log2_chars_per_iter);
238     beq(CCR0, Lskipfast);
239     rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
240     li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
241     mtctr(tmp0);
242 
243     bind(Lfastloop);
244     ld(chr1, 0, str1);
245     ld(chr2, 0, str2);
246     cmpd(CCR0, chr1, chr2);
247     bne(CCR0, Lslow);
248     addi(str1, str1, stride1);
249     addi(str2, str2, stride2);
250     bdnz(Lfastloop);
251     mr(cnt1, cnt2); // Remaining characters.
252     bind(Lskipfast);
253   }
254 
255   // Loop which searches the first difference character by character.
256   cmpwi(CCR0, cnt1, 0);
257   beq(CCR0, Lreturn_diff);
258   bind(Lslow);
259   mtctr(cnt1);
260 
261   switch (ae) {
262     case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
263     case StrIntrinsicNode::UL: // fallthru (see comment above)
264     case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
265     case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
266     default: ShouldNotReachHere(); break;
267   }
268 
269   bind(Lloop);
270   if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
271   if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
272   subf_(result, chr2, chr1); // result = chr1 - chr2
273   bne(CCR0, Ldone);
274   addi(str1, str1, stride1);
275   addi(str2, str2, stride2);
276   bdnz(Lloop);
277 
278   // If strings are equal up to min length, return the length difference.
279   bind(Lreturn_diff);
280   mr(result, diff);
281 
282   // Otherwise, return the difference between the first mismatched chars.
283   bind(Ldone);
284   if (ae == StrIntrinsicNode::UL) {
285     neg(result, result); // Negate result (see note above).
286   }
287 }
288 
289 void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
290                                      Register limit, Register tmp1, Register result, bool is_byte) {
291   const Register tmp0 = R0;
292   assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
293   Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
294   bool limit_needs_shift = false;
295 
296   if (is_array_equ) {
297     const int length_offset = arrayOopDesc::length_offset_in_bytes();
298     const int base_offset   = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
299 
300     // Return true if the same array.
301     cmpd(CCR0, ary1, ary2);
302     beq(CCR0, Lskiploop);
303 
304     // Return false if one of them is null.
305     cmpdi(CCR0, ary1, 0);
306     cmpdi(CCR1, ary2, 0);
307     li(result, 0);
308     cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
309     beq(CCR0, Ldone);
310 
311     // Load the lengths of arrays.
312     lwz(limit, length_offset, ary1);
313     lwz(tmp0, length_offset, ary2);
314 
315     // Return false if the two arrays are not equal length.
316     cmpw(CCR0, limit, tmp0);
317     bne(CCR0, Ldone);
318 
319     // Load array addresses.
320     addi(ary1, ary1, base_offset);
321     addi(ary2, ary2, base_offset);
322   } else {
323     limit_needs_shift = !is_byte;
324     li(result, 0); // Assume not equal.
325   }
326 
327   // Rename registers
328   Register chr1 = tmp0;
329   Register chr2 = tmp1;
330 
331   // Compare 8 bytes per iteration in fast loop.
332   const int log2_chars_per_iter = is_byte ? 3 : 2;
333 
334   srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
335   beq(CCR0, Lskipfast);
336   mtctr(tmp0);
337 
338   bind(Lfastloop);
339   ld(chr1, 0, ary1);
340   ld(chr2, 0, ary2);
341   addi(ary1, ary1, 8);
342   addi(ary2, ary2, 8);
343   cmpd(CCR0, chr1, chr2);
344   bne(CCR0, Ldone);
345   bdnz(Lfastloop);
346 
347   bind(Lskipfast);
348   rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
349   beq(CCR0, Lskiploop);
350   mtctr(limit);
351 
352   // Character by character.
353   bind(Lloop);
354   if (is_byte) {
355     lbz(chr1, 0, ary1);
356     lbz(chr2, 0, ary2);
357     addi(ary1, ary1, 1);
358     addi(ary2, ary2, 1);
359   } else {
360     lhz(chr1, 0, ary1);
361     lhz(chr2, 0, ary2);
362     addi(ary1, ary1, 2);
363     addi(ary2, ary2, 2);
364   }
365   cmpw(CCR0, chr1, chr2);
366   bne(CCR0, Ldone);
367   bdnz(Lloop);
368 
369   bind(Lskiploop);
370   li(result, 1); // All characters are equal.
371   bind(Ldone);
372 }
373 
374 void C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
375                                        Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
376                                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
377 
378   // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
379   Label L_TooShort, L_Found, L_NotFound, L_End;
380   Register last_addr = haycnt, // Kill haycnt at the beginning.
381   addr      = tmp1,
382   n_start   = tmp2,
383   ch1       = tmp3,
384   ch2       = R0;
385 
386   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
387   const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
388   const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
389 
390   // **************************************************************************************************
391   // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
392   // **************************************************************************************************
393 
394   // Compute last haystack addr to use if no match gets found.
395   clrldi(haycnt, haycnt, 32);         // Ensure positive int is valid as 64 bit value.
396   addi(addr, haystack, -h_csize);     // Accesses use pre-increment.
397   if (needlecntval == 0) { // variable needlecnt
398    cmpwi(CCR6, needlecnt, 2);
399    clrldi(needlecnt, needlecnt, 32);  // Ensure positive int is valid as 64 bit value.
400    blt(CCR6, L_TooShort);             // Variable needlecnt: handle short needle separately.
401   }
402 
403   if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
404 
405   if (needlecntval == 0) { // variable needlecnt
406    subf(ch1, needlecnt, haycnt);      // Last character index to compare is haycnt-needlecnt.
407    addi(needlecnt, needlecnt, -2);    // Rest of needle.
408   } else { // constant needlecnt
409   guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
410   assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
411    addi(ch1, haycnt, -needlecntval);  // Last character index to compare is haycnt-needlecnt.
412    if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
413   }
414 
415   if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
416 
417   if (ae ==StrIntrinsicNode::UL) {
418    srwi(tmp4, n_start, 1*8);          // ___0
419    rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
420   }
421 
422   add(last_addr, haystack, ch1);      // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
423 
424   // Main Loop (now we have at least 2 characters).
425   Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
426   bind(L_OuterLoop); // Search for 1st 2 characters.
427   Register addr_diff = tmp4;
428    subf(addr_diff, addr, last_addr);  // Difference between already checked address and last address to check.
429    addi(addr, addr, h_csize);         // This is the new address we want to use for comparing.
430    srdi_(ch2, addr_diff, h_csize);
431    beq(CCR0, L_FinalCheck);           // 2 characters left?
432    mtctr(ch2);                        // num of characters / 2
433   bind(L_InnerLoop);                  // Main work horse (2x unrolled search loop)
434    if (h_csize == 2) {                // Load 2 characters of haystack (ignore alignment).
435     lwz(ch1, 0, addr);
436     lwz(ch2, 2, addr);
437    } else {
438     lhz(ch1, 0, addr);
439     lhz(ch2, 1, addr);
440    }
441    cmpw(CCR0, ch1, n_start);          // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
442    cmpw(CCR1, ch2, n_start);
443    beq(CCR0, L_Comp1);                // Did we find the needle start?
444    beq(CCR1, L_Comp2);
445    addi(addr, addr, 2 * h_csize);
446    bdnz(L_InnerLoop);
447   bind(L_FinalCheck);
448    andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
449    beq(CCR0, L_NotFound);
450    if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
451    cmpw(CCR1, ch1, n_start);
452    beq(CCR1, L_Comp1);
453   bind(L_NotFound);
454    li(result, -1);                    // not found
455    b(L_End);
456 
457    // **************************************************************************************************
458    // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
459    // **************************************************************************************************
460   if (needlecntval == 0) {           // We have to handle these cases separately.
461   Label L_OneCharLoop;
462   bind(L_TooShort);
463    mtctr(haycnt);
464    if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
465   bind(L_OneCharLoop);
466    if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
467    cmpw(CCR1, ch1, n_start);
468    beq(CCR1, L_Found);               // Did we find the one character needle?
469    bdnz(L_OneCharLoop);
470    li(result, -1);                   // Not found.
471    b(L_End);
472   }
473 
474   // **************************************************************************************************
475   // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
476   // **************************************************************************************************
477 
478   // Compare the rest
479   bind(L_Comp2);
480    addi(addr, addr, h_csize);        // First comparison has failed, 2nd one hit.
481   bind(L_Comp1);                     // Addr points to possible needle start.
482   if (needlecntval != 2) {           // Const needlecnt==2?
483    if (needlecntval != 3) {
484     if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
485     Register n_ind = tmp4,
486              h_ind = n_ind;
487     li(n_ind, 2 * n_csize);          // First 2 characters are already compared, use index 2.
488     mtctr(needlecnt);                // Decremented by 2, still > 0.
489    Label L_CompLoop;
490    bind(L_CompLoop);
491     if (ae ==StrIntrinsicNode::UL) {
492       h_ind = ch1;
493       sldi(h_ind, n_ind, 1);
494     }
495     if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
496     if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
497     cmpw(CCR1, ch1, ch2);
498     bne(CCR1, L_OuterLoop);
499     addi(n_ind, n_ind, n_csize);
500     bdnz(L_CompLoop);
501    } else { // No loop required if there's only one needle character left.
502     if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
503     if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
504     cmpw(CCR1, ch1, ch2);
505     bne(CCR1, L_OuterLoop);
506    }
507   }
508   // Return index ...
509   bind(L_Found);
510    subf(result, haystack, addr);     // relative to haystack, ...
511    if (h_csize == 2) { srdi(result, result, 1); } // in characters.
512   bind(L_End);
513 } // string_indexof
514 
515 void C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
516                                             Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
517   assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
518 
519   Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
520   Register addr = tmp1,
521            ch1 = tmp2,
522            ch2 = R0;
523 
524   const int h_csize = is_byte ? 1 : 2;
525 
526 //4:
527    srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
528    mr(addr, haystack);
529    beq(CCR0, L_FinalCheck);
530    mtctr(tmp2);              // Move to count register.
531 //8:
532   bind(L_InnerLoop);         // Main work horse (2x unrolled search loop).
533    if (!is_byte) {
534     lhz(ch1, 0, addr);
535     lhz(ch2, 2, addr);
536    } else {
537     lbz(ch1, 0, addr);
538     lbz(ch2, 1, addr);
539    }
540    (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
541    (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
542    beq(CCR0, L_Found1);      // Did we find the needle?
543    beq(CCR1, L_Found2);
544    addi(addr, addr, 2 * h_csize);
545    bdnz(L_InnerLoop);
546 //16:
547   bind(L_FinalCheck);
548    andi_(R0, haycnt, 1);
549    beq(CCR0, L_NotFound);
550    if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
551    (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
552    beq(CCR1, L_Found1);
553 //21:
554   bind(L_NotFound);
555    li(result, -1);           // Not found.
556    b(L_End);
557 
558   bind(L_Found2);
559    addi(addr, addr, h_csize);
560 //24:
561   bind(L_Found1);            // Return index ...
562    subf(result, haystack, addr); // relative to haystack, ...
563    if (!is_byte) { srdi(result, result, 1); } // in characters.
564   bind(L_End);
565 } // string_indexof_char
566 
567 
568 void C2_MacroAssembler::count_positives(Register src, Register cnt, Register result,
569                                         Register tmp1, Register tmp2) {
570   const Register tmp0 = R0;
571   assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
572   Label Lfastloop, Lslow, Lloop, Ldone;
573 
574   // Check if cnt >= 8 (= 16 bytes)
575   lis(tmp1, (int)(short)0x8080);  // tmp1 = 0x8080808080808080
576   srwi_(tmp2, cnt, 4);
577   mr(result, src);                // Use result reg to point to the current position.
578   beq(CCR0, Lslow);
579   ori(tmp1, tmp1, 0x8080);
580   rldimi(tmp1, tmp1, 32, 0);
581   mtctr(tmp2);
582 
583   // 2x unrolled loop
584   bind(Lfastloop);
585   ld(tmp2, 0, result);
586   ld(tmp0, 8, result);
587 
588   orr(tmp0, tmp2, tmp0);
589 
590   and_(tmp0, tmp0, tmp1);
591   bne(CCR0, Lslow);               // Found negative byte.
592   addi(result, result, 16);
593   bdnz(Lfastloop);
594 
595   bind(Lslow);                    // Fallback to slow version.
596   subf(tmp0, src, result);        // Bytes known positive.
597   subf_(tmp0, tmp0, cnt);         // Remaining Bytes.
598   beq(CCR0, Ldone);
599   mtctr(tmp0);
600   bind(Lloop);
601   lbz(tmp0, 0, result);
602   andi_(tmp0, tmp0, 0x80);
603   bne(CCR0, Ldone);               // Found negative byte.
604   addi(result, result, 1);
605   bdnz(Lloop);
606 
607   bind(Ldone);
608   subf(result, src, result);      // Result is offset from src.
609 }