1 /* 2 * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "opto/c2_MacroAssembler.hpp" 29 #include "opto/intrinsicnode.hpp" 30 #include "runtime/vm_version.hpp" 31 32 #ifdef PRODUCT 33 #define BLOCK_COMMENT(str) // nothing 34 #else 35 #define BLOCK_COMMENT(str) block_comment(str) 36 #endif 37 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 38 39 // Intrinsics for CompactStrings 40 41 // Compress char[] to byte[] by compressing 16 bytes at once. 42 void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, 43 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 44 Label& Lfailure, bool ascii) { 45 46 const Register tmp0 = R0; 47 const int byte_mask = ascii ? 0x7F : 0xFF; 48 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5); 49 Label Lloop, Lslow; 50 51 // Check if cnt >= 8 (= 16 bytes) 52 lis(tmp1, byte_mask); // tmp1 = 0x00FF00FF00FF00FF (non ascii case) 53 srwi_(tmp2, cnt, 3); 54 beq(CCR0, Lslow); 55 ori(tmp1, tmp1, byte_mask); 56 rldimi(tmp1, tmp1, 32, 0); 57 mtctr(tmp2); 58 59 // 2x unrolled loop 60 bind(Lloop); 61 ld(tmp2, 0, src); // _0_1_2_3 (Big Endian) 62 ld(tmp4, 8, src); // _4_5_6_7 63 64 orr(tmp0, tmp2, tmp4); 65 rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2 66 rldimi(tmp2, tmp2, 2*8, 2*8); // _0_2_3_3 67 rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6 68 rldimi(tmp4, tmp4, 2*8, 2*8); // _4_6_7_7 69 70 andc_(tmp0, tmp0, tmp1); 71 bne(CCR0, Lfailure); // Not latin1/ascii. 72 addi(src, src, 16); 73 74 rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3 75 srdi(tmp2, tmp2, 3*8); // ____0_2_ 76 rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7 77 srdi(tmp4, tmp4, 3*8); // ____4_6_ 78 79 orr(tmp2, tmp2, tmp3); // ____0123 80 orr(tmp4, tmp4, tmp5); // ____4567 81 82 stw(tmp2, 0, dst); 83 stw(tmp4, 4, dst); 84 addi(dst, dst, 8); 85 bdnz(Lloop); 86 87 bind(Lslow); // Fallback to slow version 88 } 89 90 // Compress char[] to byte[]. cnt must be positive int. 91 void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, 92 Label& Lfailure, bool ascii) { 93 const int byte_mask = ascii ? 0x7F : 0xFF; 94 Label Lloop; 95 mtctr(cnt); 96 97 bind(Lloop); 98 lhz(tmp, 0, src); 99 cmplwi(CCR0, tmp, byte_mask); 100 bgt(CCR0, Lfailure); // Not latin1/ascii. 101 addi(src, src, 2); 102 stb(tmp, 0, dst); 103 addi(dst, dst, 1); 104 bdnz(Lloop); 105 } 106 107 void C2_MacroAssembler::encode_iso_array(Register src, Register dst, Register len, 108 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 109 Register result, bool ascii) { 110 Label Lslow, Lfailure1, Lfailure2, Ldone; 111 112 string_compress_16(src, dst, len, tmp1, tmp2, tmp3, tmp4, tmp5, Lfailure1, ascii); 113 rldicl_(result, len, 0, 64-3); // Remaining characters. 114 beq(CCR0, Ldone); 115 bind(Lslow); 116 string_compress(src, dst, result, tmp2, Lfailure2, ascii); 117 li(result, 0); 118 b(Ldone); 119 120 bind(Lfailure1); 121 mr(result, len); 122 mfctr(tmp1); 123 rldimi_(result, tmp1, 3, 0); // Remaining characters. 124 beq(CCR0, Ldone); 125 b(Lslow); 126 127 bind(Lfailure2); 128 mfctr(result); // Remaining characters. 129 130 bind(Ldone); 131 subf(result, result, len); 132 } 133 134 // Inflate byte[] to char[] by inflating 16 bytes at once. 135 void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, 136 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) { 137 const Register tmp0 = R0; 138 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5); 139 Label Lloop, Lslow; 140 141 // Check if cnt >= 8 142 srwi_(tmp2, cnt, 3); 143 beq(CCR0, Lslow); 144 lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF 145 ori(tmp1, tmp1, 0xFF); 146 mtctr(tmp2); 147 148 // 2x unrolled loop 149 bind(Lloop); 150 lwz(tmp2, 0, src); // ____0123 (Big Endian) 151 lwz(tmp4, 4, src); // ____4567 152 addi(src, src, 8); 153 154 rldicl(tmp3, tmp2, 7*8, 64-8); // _______2 155 rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113 156 rldicl(tmp5, tmp4, 7*8, 64-8); // _______6 157 rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557 158 159 andc(tmp0, tmp2, tmp1); // ____0_1_ 160 rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3 161 andc(tmp3, tmp4, tmp1); // ____4_5_ 162 rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7 163 164 rldimi(tmp2, tmp0, 3*8, 0*8); // _0_1_2_3 165 rldimi(tmp4, tmp3, 3*8, 0*8); // _4_5_6_7 166 167 std(tmp2, 0, dst); 168 std(tmp4, 8, dst); 169 addi(dst, dst, 16); 170 bdnz(Lloop); 171 172 bind(Lslow); // Fallback to slow version 173 } 174 175 // Inflate byte[] to char[]. cnt must be positive int. 176 void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) { 177 Label Lloop; 178 mtctr(cnt); 179 180 bind(Lloop); 181 lbz(tmp, 0, src); 182 addi(src, src, 1); 183 sth(tmp, 0, dst); 184 addi(dst, dst, 2); 185 bdnz(Lloop); 186 } 187 188 void C2_MacroAssembler::string_compare(Register str1, Register str2, 189 Register cnt1, Register cnt2, 190 Register tmp1, Register result, int ae) { 191 const Register tmp0 = R0, 192 diff = tmp1; 193 194 assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result); 195 Label Ldone, Lslow, Lloop, Lreturn_diff; 196 197 // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a) 198 // we interchange str1 and str2 in the UL case and negate the result. 199 // Like this, str1 is always latin1 encoded, except for the UU case. 200 // In addition, we need 0 (or sign which is 0) extend. 201 202 if (ae == StrIntrinsicNode::UU) { 203 srwi(cnt1, cnt1, 1); 204 } else { 205 clrldi(cnt1, cnt1, 32); 206 } 207 208 if (ae != StrIntrinsicNode::LL) { 209 srwi(cnt2, cnt2, 1); 210 } else { 211 clrldi(cnt2, cnt2, 32); 212 } 213 214 // See if the lengths are different, and calculate min in cnt1. 215 // Save diff in case we need it for a tie-breaker. 216 subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2 217 // if (diff > 0) { cnt1 = cnt2; } 218 if (VM_Version::has_isel()) { 219 isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2); 220 } else { 221 Label Lskip; 222 blt(CCR0, Lskip); 223 mr(cnt1, cnt2); 224 bind(Lskip); 225 } 226 227 // Rename registers 228 Register chr1 = result; 229 Register chr2 = tmp0; 230 231 // Compare multiple characters in fast loop (only implemented for same encoding). 232 int stride1 = 8, stride2 = 8; 233 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 234 int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2; 235 Label Lfastloop, Lskipfast; 236 237 srwi_(tmp0, cnt1, log2_chars_per_iter); 238 beq(CCR0, Lskipfast); 239 rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters. 240 li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration. 241 mtctr(tmp0); 242 243 bind(Lfastloop); 244 ld(chr1, 0, str1); 245 ld(chr2, 0, str2); 246 cmpd(CCR0, chr1, chr2); 247 bne(CCR0, Lslow); 248 addi(str1, str1, stride1); 249 addi(str2, str2, stride2); 250 bdnz(Lfastloop); 251 mr(cnt1, cnt2); // Remaining characters. 252 bind(Lskipfast); 253 } 254 255 // Loop which searches the first difference character by character. 256 cmpwi(CCR0, cnt1, 0); 257 beq(CCR0, Lreturn_diff); 258 bind(Lslow); 259 mtctr(cnt1); 260 261 switch (ae) { 262 case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break; 263 case StrIntrinsicNode::UL: // fallthru (see comment above) 264 case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break; 265 case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break; 266 default: ShouldNotReachHere(); break; 267 } 268 269 bind(Lloop); 270 if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); } 271 if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); } 272 subf_(result, chr2, chr1); // result = chr1 - chr2 273 bne(CCR0, Ldone); 274 addi(str1, str1, stride1); 275 addi(str2, str2, stride2); 276 bdnz(Lloop); 277 278 // If strings are equal up to min length, return the length difference. 279 bind(Lreturn_diff); 280 mr(result, diff); 281 282 // Otherwise, return the difference between the first mismatched chars. 283 bind(Ldone); 284 if (ae == StrIntrinsicNode::UL) { 285 neg(result, result); // Negate result (see note above). 286 } 287 } 288 289 void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, 290 Register limit, Register tmp1, Register result, bool is_byte) { 291 const Register tmp0 = R0; 292 assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result); 293 Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast; 294 bool limit_needs_shift = false; 295 296 if (is_array_equ) { 297 const int length_offset = arrayOopDesc::length_offset_in_bytes(); 298 const int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR); 299 300 // Return true if the same array. 301 cmpd(CCR0, ary1, ary2); 302 beq(CCR0, Lskiploop); 303 304 // Return false if one of them is null. 305 cmpdi(CCR0, ary1, 0); 306 cmpdi(CCR1, ary2, 0); 307 li(result, 0); 308 cror(CCR0, Assembler::equal, CCR1, Assembler::equal); 309 beq(CCR0, Ldone); 310 311 // Load the lengths of arrays. 312 lwz(limit, length_offset, ary1); 313 lwz(tmp0, length_offset, ary2); 314 315 // Return false if the two arrays are not equal length. 316 cmpw(CCR0, limit, tmp0); 317 bne(CCR0, Ldone); 318 319 // Load array addresses. 320 addi(ary1, ary1, base_offset); 321 addi(ary2, ary2, base_offset); 322 } else { 323 limit_needs_shift = !is_byte; 324 li(result, 0); // Assume not equal. 325 } 326 327 // Rename registers 328 Register chr1 = tmp0; 329 Register chr2 = tmp1; 330 331 // Compare 8 bytes per iteration in fast loop. 332 const int log2_chars_per_iter = is_byte ? 3 : 2; 333 334 srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0)); 335 beq(CCR0, Lskipfast); 336 mtctr(tmp0); 337 338 bind(Lfastloop); 339 ld(chr1, 0, ary1); 340 ld(chr2, 0, ary2); 341 addi(ary1, ary1, 8); 342 addi(ary2, ary2, 8); 343 cmpd(CCR0, chr1, chr2); 344 bne(CCR0, Ldone); 345 bdnz(Lfastloop); 346 347 bind(Lskipfast); 348 rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters. 349 beq(CCR0, Lskiploop); 350 mtctr(limit); 351 352 // Character by character. 353 bind(Lloop); 354 if (is_byte) { 355 lbz(chr1, 0, ary1); 356 lbz(chr2, 0, ary2); 357 addi(ary1, ary1, 1); 358 addi(ary2, ary2, 1); 359 } else { 360 lhz(chr1, 0, ary1); 361 lhz(chr2, 0, ary2); 362 addi(ary1, ary1, 2); 363 addi(ary2, ary2, 2); 364 } 365 cmpw(CCR0, chr1, chr2); 366 bne(CCR0, Ldone); 367 bdnz(Lloop); 368 369 bind(Lskiploop); 370 li(result, 1); // All characters are equal. 371 bind(Ldone); 372 } 373 374 void C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt, 375 Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, 376 Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) { 377 378 // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite! 379 Label L_TooShort, L_Found, L_NotFound, L_End; 380 Register last_addr = haycnt, // Kill haycnt at the beginning. 381 addr = tmp1, 382 n_start = tmp2, 383 ch1 = tmp3, 384 ch2 = R0; 385 386 assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); 387 const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2; 388 const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1; 389 390 // ************************************************************************************************** 391 // Prepare for main loop: optimized for needle count >=2, bail out otherwise. 392 // ************************************************************************************************** 393 394 // Compute last haystack addr to use if no match gets found. 395 clrldi(haycnt, haycnt, 32); // Ensure positive int is valid as 64 bit value. 396 addi(addr, haystack, -h_csize); // Accesses use pre-increment. 397 if (needlecntval == 0) { // variable needlecnt 398 cmpwi(CCR6, needlecnt, 2); 399 clrldi(needlecnt, needlecnt, 32); // Ensure positive int is valid as 64 bit value. 400 blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately. 401 } 402 403 if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle. 404 405 if (needlecntval == 0) { // variable needlecnt 406 subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt. 407 addi(needlecnt, needlecnt, -2); // Rest of needle. 408 } else { // constant needlecnt 409 guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately"); 410 assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate"); 411 addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt. 412 if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle. 413 } 414 415 if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes. 416 417 if (ae ==StrIntrinsicNode::UL) { 418 srwi(tmp4, n_start, 1*8); // ___0 419 rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1 420 } 421 422 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). 423 424 // Main Loop (now we have at least 2 characters). 425 Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2; 426 bind(L_OuterLoop); // Search for 1st 2 characters. 427 Register addr_diff = tmp4; 428 subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check. 429 addi(addr, addr, h_csize); // This is the new address we want to use for comparing. 430 srdi_(ch2, addr_diff, h_csize); 431 beq(CCR0, L_FinalCheck); // 2 characters left? 432 mtctr(ch2); // num of characters / 2 433 bind(L_InnerLoop); // Main work horse (2x unrolled search loop) 434 if (h_csize == 2) { // Load 2 characters of haystack (ignore alignment). 435 lwz(ch1, 0, addr); 436 lwz(ch2, 2, addr); 437 } else { 438 lhz(ch1, 0, addr); 439 lhz(ch2, 1, addr); 440 } 441 cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop). 442 cmpw(CCR1, ch2, n_start); 443 beq(CCR0, L_Comp1); // Did we find the needle start? 444 beq(CCR1, L_Comp2); 445 addi(addr, addr, 2 * h_csize); 446 bdnz(L_InnerLoop); 447 bind(L_FinalCheck); 448 andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1. 449 beq(CCR0, L_NotFound); 450 if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare. 451 cmpw(CCR1, ch1, n_start); 452 beq(CCR1, L_Comp1); 453 bind(L_NotFound); 454 li(result, -1); // not found 455 b(L_End); 456 457 // ************************************************************************************************** 458 // Special Case: unfortunately, the variable needle case can be called with needlecnt<2 459 // ************************************************************************************************** 460 if (needlecntval == 0) { // We have to handle these cases separately. 461 Label L_OneCharLoop; 462 bind(L_TooShort); 463 mtctr(haycnt); 464 if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle 465 bind(L_OneCharLoop); 466 if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); } 467 cmpw(CCR1, ch1, n_start); 468 beq(CCR1, L_Found); // Did we find the one character needle? 469 bdnz(L_OneCharLoop); 470 li(result, -1); // Not found. 471 b(L_End); 472 } 473 474 // ************************************************************************************************** 475 // Regular Case Part II: compare rest of needle (first 2 characters have been compared already) 476 // ************************************************************************************************** 477 478 // Compare the rest 479 bind(L_Comp2); 480 addi(addr, addr, h_csize); // First comparison has failed, 2nd one hit. 481 bind(L_Comp1); // Addr points to possible needle start. 482 if (needlecntval != 2) { // Const needlecnt==2? 483 if (needlecntval != 3) { 484 if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2? 485 Register n_ind = tmp4, 486 h_ind = n_ind; 487 li(n_ind, 2 * n_csize); // First 2 characters are already compared, use index 2. 488 mtctr(needlecnt); // Decremented by 2, still > 0. 489 Label L_CompLoop; 490 bind(L_CompLoop); 491 if (ae ==StrIntrinsicNode::UL) { 492 h_ind = ch1; 493 sldi(h_ind, n_ind, 1); 494 } 495 if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); } 496 if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); } 497 cmpw(CCR1, ch1, ch2); 498 bne(CCR1, L_OuterLoop); 499 addi(n_ind, n_ind, n_csize); 500 bdnz(L_CompLoop); 501 } else { // No loop required if there's only one needle character left. 502 if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); } 503 if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); } 504 cmpw(CCR1, ch1, ch2); 505 bne(CCR1, L_OuterLoop); 506 } 507 } 508 // Return index ... 509 bind(L_Found); 510 subf(result, haystack, addr); // relative to haystack, ... 511 if (h_csize == 2) { srdi(result, result, 1); } // in characters. 512 bind(L_End); 513 } // string_indexof 514 515 void C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt, 516 Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) { 517 assert_different_registers(haystack, haycnt, needle, tmp1, tmp2); 518 519 Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End; 520 Register addr = tmp1, 521 ch1 = tmp2, 522 ch2 = R0; 523 524 const int h_csize = is_byte ? 1 : 2; 525 526 //4: 527 srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR). 528 mr(addr, haystack); 529 beq(CCR0, L_FinalCheck); 530 mtctr(tmp2); // Move to count register. 531 //8: 532 bind(L_InnerLoop); // Main work horse (2x unrolled search loop). 533 if (!is_byte) { 534 lhz(ch1, 0, addr); 535 lhz(ch2, 2, addr); 536 } else { 537 lbz(ch1, 0, addr); 538 lbz(ch2, 1, addr); 539 } 540 (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar); 541 (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar); 542 beq(CCR0, L_Found1); // Did we find the needle? 543 beq(CCR1, L_Found2); 544 addi(addr, addr, 2 * h_csize); 545 bdnz(L_InnerLoop); 546 //16: 547 bind(L_FinalCheck); 548 andi_(R0, haycnt, 1); 549 beq(CCR0, L_NotFound); 550 if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare. 551 (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar); 552 beq(CCR1, L_Found1); 553 //21: 554 bind(L_NotFound); 555 li(result, -1); // Not found. 556 b(L_End); 557 558 bind(L_Found2); 559 addi(addr, addr, h_csize); 560 //24: 561 bind(L_Found1); // Return index ... 562 subf(result, haystack, addr); // relative to haystack, ... 563 if (!is_byte) { srdi(result, result, 1); } // in characters. 564 bind(L_End); 565 } // string_indexof_char 566 567 568 void C2_MacroAssembler::count_positives(Register src, Register cnt, Register result, 569 Register tmp1, Register tmp2) { 570 const Register tmp0 = R0; 571 assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2); 572 Label Lfastloop, Lslow, Lloop, Ldone; 573 574 // Check if cnt >= 8 (= 16 bytes) 575 lis(tmp1, (int)(short)0x8080); // tmp1 = 0x8080808080808080 576 srwi_(tmp2, cnt, 4); 577 mr(result, src); // Use result reg to point to the current position. 578 beq(CCR0, Lslow); 579 ori(tmp1, tmp1, 0x8080); 580 rldimi(tmp1, tmp1, 32, 0); 581 mtctr(tmp2); 582 583 // 2x unrolled loop 584 bind(Lfastloop); 585 ld(tmp2, 0, result); 586 ld(tmp0, 8, result); 587 588 orr(tmp0, tmp2, tmp0); 589 590 and_(tmp0, tmp0, tmp1); 591 bne(CCR0, Lslow); // Found negative byte. 592 addi(result, result, 16); 593 bdnz(Lfastloop); 594 595 bind(Lslow); // Fallback to slow version. 596 subf(tmp0, src, result); // Bytes known positive. 597 subf_(tmp0, tmp0, cnt); // Remaining Bytes. 598 beq(CCR0, Ldone); 599 mtctr(tmp0); 600 bind(Lloop); 601 lbz(tmp0, 0, result); 602 andi_(tmp0, tmp0, 0x80); 603 bne(CCR0, Ldone); // Found negative byte. 604 addi(result, result, 1); 605 bdnz(Lloop); 606 607 bind(Ldone); 608 subf(result, src, result); // Result is offset from src. 609 }