1 /* 2 * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "opto/c2_MacroAssembler.hpp" 29 #include "opto/intrinsicnode.hpp" 30 #include "runtime/vm_version.hpp" 31 32 #ifdef PRODUCT 33 #define BLOCK_COMMENT(str) // nothing 34 #else 35 #define BLOCK_COMMENT(str) block_comment(str) 36 #endif 37 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 38 39 40 void C2_MacroAssembler::fast_lock_lightweight(ConditionRegister flag, Register obj, Register box, 41 Register tmp1, Register tmp2, Register tmp3) { 42 compiler_fast_lock_lightweight_object(flag, obj, tmp1, tmp2, tmp3); 43 } 44 45 void C2_MacroAssembler::fast_unlock_lightweight(ConditionRegister flag, Register obj, Register box, 46 Register tmp1, Register tmp2, Register tmp3) { 47 compiler_fast_unlock_lightweight_object(flag, obj, tmp1, tmp2, tmp3); 48 } 49 50 // Intrinsics for CompactStrings 51 52 // Compress char[] to byte[] by compressing 16 bytes at once. 53 void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, 54 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 55 Label& Lfailure, bool ascii) { 56 57 const Register tmp0 = R0; 58 const int byte_mask = ascii ? 0x7F : 0xFF; 59 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5); 60 Label Lloop, Lslow; 61 62 // Check if cnt >= 8 (= 16 bytes) 63 lis(tmp1, byte_mask); // tmp1 = 0x00FF00FF00FF00FF (non ascii case) 64 srwi_(tmp2, cnt, 3); 65 beq(CCR0, Lslow); 66 ori(tmp1, tmp1, byte_mask); 67 rldimi(tmp1, tmp1, 32, 0); 68 mtctr(tmp2); 69 70 // 2x unrolled loop 71 bind(Lloop); 72 ld(tmp2, 0, src); // _0_1_2_3 (Big Endian) 73 ld(tmp4, 8, src); // _4_5_6_7 74 75 orr(tmp0, tmp2, tmp4); 76 rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2 77 rldimi(tmp2, tmp2, 2*8, 2*8); // _0_2_3_3 78 rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6 79 rldimi(tmp4, tmp4, 2*8, 2*8); // _4_6_7_7 80 81 andc_(tmp0, tmp0, tmp1); 82 bne(CCR0, Lfailure); // Not latin1/ascii. 83 addi(src, src, 16); 84 85 rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3 86 srdi(tmp2, tmp2, 3*8); // ____0_2_ 87 rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7 88 srdi(tmp4, tmp4, 3*8); // ____4_6_ 89 90 orr(tmp2, tmp2, tmp3); // ____0123 91 orr(tmp4, tmp4, tmp5); // ____4567 92 93 stw(tmp2, 0, dst); 94 stw(tmp4, 4, dst); 95 addi(dst, dst, 8); 96 bdnz(Lloop); 97 98 bind(Lslow); // Fallback to slow version 99 } 100 101 // Compress char[] to byte[]. cnt must be positive int. 102 void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, 103 Label& Lfailure, bool ascii) { 104 const int byte_mask = ascii ? 0x7F : 0xFF; 105 Label Lloop; 106 mtctr(cnt); 107 108 bind(Lloop); 109 lhz(tmp, 0, src); 110 cmplwi(CCR0, tmp, byte_mask); 111 bgt(CCR0, Lfailure); // Not latin1/ascii. 112 addi(src, src, 2); 113 stb(tmp, 0, dst); 114 addi(dst, dst, 1); 115 bdnz(Lloop); 116 } 117 118 void C2_MacroAssembler::encode_iso_array(Register src, Register dst, Register len, 119 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 120 Register result, bool ascii) { 121 Label Lslow, Lfailure1, Lfailure2, Ldone; 122 123 string_compress_16(src, dst, len, tmp1, tmp2, tmp3, tmp4, tmp5, Lfailure1, ascii); 124 rldicl_(result, len, 0, 64-3); // Remaining characters. 125 beq(CCR0, Ldone); 126 bind(Lslow); 127 string_compress(src, dst, result, tmp2, Lfailure2, ascii); 128 li(result, 0); 129 b(Ldone); 130 131 bind(Lfailure1); 132 mr(result, len); 133 mfctr(tmp1); 134 rldimi_(result, tmp1, 3, 0); // Remaining characters. 135 beq(CCR0, Ldone); 136 b(Lslow); 137 138 bind(Lfailure2); 139 mfctr(result); // Remaining characters. 140 141 bind(Ldone); 142 subf(result, result, len); 143 } 144 145 // Inflate byte[] to char[] by inflating 16 bytes at once. 146 void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, 147 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) { 148 const Register tmp0 = R0; 149 assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5); 150 Label Lloop, Lslow; 151 152 // Check if cnt >= 8 153 srwi_(tmp2, cnt, 3); 154 beq(CCR0, Lslow); 155 lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF 156 ori(tmp1, tmp1, 0xFF); 157 mtctr(tmp2); 158 159 // 2x unrolled loop 160 bind(Lloop); 161 lwz(tmp2, 0, src); // ____0123 (Big Endian) 162 lwz(tmp4, 4, src); // ____4567 163 addi(src, src, 8); 164 165 rldicl(tmp3, tmp2, 7*8, 64-8); // _______2 166 rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113 167 rldicl(tmp5, tmp4, 7*8, 64-8); // _______6 168 rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557 169 170 andc(tmp0, tmp2, tmp1); // ____0_1_ 171 rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3 172 andc(tmp3, tmp4, tmp1); // ____4_5_ 173 rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7 174 175 rldimi(tmp2, tmp0, 3*8, 0*8); // _0_1_2_3 176 rldimi(tmp4, tmp3, 3*8, 0*8); // _4_5_6_7 177 178 std(tmp2, 0, dst); 179 std(tmp4, 8, dst); 180 addi(dst, dst, 16); 181 bdnz(Lloop); 182 183 bind(Lslow); // Fallback to slow version 184 } 185 186 // Inflate byte[] to char[]. cnt must be positive int. 187 void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) { 188 Label Lloop; 189 mtctr(cnt); 190 191 bind(Lloop); 192 lbz(tmp, 0, src); 193 addi(src, src, 1); 194 sth(tmp, 0, dst); 195 addi(dst, dst, 2); 196 bdnz(Lloop); 197 } 198 199 void C2_MacroAssembler::string_compare(Register str1, Register str2, 200 Register cnt1, Register cnt2, 201 Register tmp1, Register result, int ae) { 202 const Register tmp0 = R0, 203 diff = tmp1; 204 205 assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result); 206 Label Ldone, Lslow, Lloop, Lreturn_diff; 207 208 // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a) 209 // we interchange str1 and str2 in the UL case and negate the result. 210 // Like this, str1 is always latin1 encoded, except for the UU case. 211 // In addition, we need 0 (or sign which is 0) extend. 212 213 if (ae == StrIntrinsicNode::UU) { 214 srwi(cnt1, cnt1, 1); 215 } else { 216 clrldi(cnt1, cnt1, 32); 217 } 218 219 if (ae != StrIntrinsicNode::LL) { 220 srwi(cnt2, cnt2, 1); 221 } else { 222 clrldi(cnt2, cnt2, 32); 223 } 224 225 // See if the lengths are different, and calculate min in cnt1. 226 // Save diff in case we need it for a tie-breaker. 227 subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2 228 // if (diff > 0) { cnt1 = cnt2; } 229 if (VM_Version::has_isel()) { 230 isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2); 231 } else { 232 Label Lskip; 233 blt(CCR0, Lskip); 234 mr(cnt1, cnt2); 235 bind(Lskip); 236 } 237 238 // Rename registers 239 Register chr1 = result; 240 Register chr2 = tmp0; 241 242 // Compare multiple characters in fast loop (only implemented for same encoding). 243 int stride1 = 8, stride2 = 8; 244 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 245 int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2; 246 Label Lfastloop, Lskipfast; 247 248 srwi_(tmp0, cnt1, log2_chars_per_iter); 249 beq(CCR0, Lskipfast); 250 rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters. 251 li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration. 252 mtctr(tmp0); 253 254 bind(Lfastloop); 255 ld(chr1, 0, str1); 256 ld(chr2, 0, str2); 257 cmpd(CCR0, chr1, chr2); 258 bne(CCR0, Lslow); 259 addi(str1, str1, stride1); 260 addi(str2, str2, stride2); 261 bdnz(Lfastloop); 262 mr(cnt1, cnt2); // Remaining characters. 263 bind(Lskipfast); 264 } 265 266 // Loop which searches the first difference character by character. 267 cmpwi(CCR0, cnt1, 0); 268 beq(CCR0, Lreturn_diff); 269 bind(Lslow); 270 mtctr(cnt1); 271 272 switch (ae) { 273 case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break; 274 case StrIntrinsicNode::UL: // fallthru (see comment above) 275 case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break; 276 case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break; 277 default: ShouldNotReachHere(); break; 278 } 279 280 bind(Lloop); 281 if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); } 282 if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); } 283 subf_(result, chr2, chr1); // result = chr1 - chr2 284 bne(CCR0, Ldone); 285 addi(str1, str1, stride1); 286 addi(str2, str2, stride2); 287 bdnz(Lloop); 288 289 // If strings are equal up to min length, return the length difference. 290 bind(Lreturn_diff); 291 mr(result, diff); 292 293 // Otherwise, return the difference between the first mismatched chars. 294 bind(Ldone); 295 if (ae == StrIntrinsicNode::UL) { 296 neg(result, result); // Negate result (see note above). 297 } 298 } 299 300 void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, 301 Register limit, Register tmp1, Register result, bool is_byte) { 302 const Register tmp0 = R0; 303 assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result); 304 Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast; 305 bool limit_needs_shift = false; 306 307 if (is_array_equ) { 308 const int length_offset = arrayOopDesc::length_offset_in_bytes(); 309 const int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR); 310 311 // Return true if the same array. 312 cmpd(CCR0, ary1, ary2); 313 beq(CCR0, Lskiploop); 314 315 // Return false if one of them is null. 316 cmpdi(CCR0, ary1, 0); 317 cmpdi(CCR1, ary2, 0); 318 li(result, 0); 319 cror(CCR0, Assembler::equal, CCR1, Assembler::equal); 320 beq(CCR0, Ldone); 321 322 // Load the lengths of arrays. 323 lwz(limit, length_offset, ary1); 324 lwz(tmp0, length_offset, ary2); 325 326 // Return false if the two arrays are not equal length. 327 cmpw(CCR0, limit, tmp0); 328 bne(CCR0, Ldone); 329 330 // Load array addresses. 331 addi(ary1, ary1, base_offset); 332 addi(ary2, ary2, base_offset); 333 } else { 334 limit_needs_shift = !is_byte; 335 li(result, 0); // Assume not equal. 336 } 337 338 // Rename registers 339 Register chr1 = tmp0; 340 Register chr2 = tmp1; 341 342 // Compare 8 bytes per iteration in fast loop. 343 const int log2_chars_per_iter = is_byte ? 3 : 2; 344 345 srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0)); 346 beq(CCR0, Lskipfast); 347 mtctr(tmp0); 348 349 bind(Lfastloop); 350 ld(chr1, 0, ary1); 351 ld(chr2, 0, ary2); 352 addi(ary1, ary1, 8); 353 addi(ary2, ary2, 8); 354 cmpd(CCR0, chr1, chr2); 355 bne(CCR0, Ldone); 356 bdnz(Lfastloop); 357 358 bind(Lskipfast); 359 rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters. 360 beq(CCR0, Lskiploop); 361 mtctr(limit); 362 363 // Character by character. 364 bind(Lloop); 365 if (is_byte) { 366 lbz(chr1, 0, ary1); 367 lbz(chr2, 0, ary2); 368 addi(ary1, ary1, 1); 369 addi(ary2, ary2, 1); 370 } else { 371 lhz(chr1, 0, ary1); 372 lhz(chr2, 0, ary2); 373 addi(ary1, ary1, 2); 374 addi(ary2, ary2, 2); 375 } 376 cmpw(CCR0, chr1, chr2); 377 bne(CCR0, Ldone); 378 bdnz(Lloop); 379 380 bind(Lskiploop); 381 li(result, 1); // All characters are equal. 382 bind(Ldone); 383 } 384 385 void C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt, 386 Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, 387 Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) { 388 389 // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite! 390 Label L_TooShort, L_Found, L_NotFound, L_End; 391 Register last_addr = haycnt, // Kill haycnt at the beginning. 392 addr = tmp1, 393 n_start = tmp2, 394 ch1 = tmp3, 395 ch2 = R0; 396 397 assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); 398 const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2; 399 const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1; 400 401 // ************************************************************************************************** 402 // Prepare for main loop: optimized for needle count >=2, bail out otherwise. 403 // ************************************************************************************************** 404 405 // Compute last haystack addr to use if no match gets found. 406 clrldi(haycnt, haycnt, 32); // Ensure positive int is valid as 64 bit value. 407 addi(addr, haystack, -h_csize); // Accesses use pre-increment. 408 if (needlecntval == 0) { // variable needlecnt 409 cmpwi(CCR6, needlecnt, 2); 410 clrldi(needlecnt, needlecnt, 32); // Ensure positive int is valid as 64 bit value. 411 blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately. 412 } 413 414 if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle. 415 416 if (needlecntval == 0) { // variable needlecnt 417 subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt. 418 addi(needlecnt, needlecnt, -2); // Rest of needle. 419 } else { // constant needlecnt 420 guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately"); 421 assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate"); 422 addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt. 423 if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle. 424 } 425 426 if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes. 427 428 if (ae ==StrIntrinsicNode::UL) { 429 srwi(tmp4, n_start, 1*8); // ___0 430 rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1 431 } 432 433 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). 434 435 // Main Loop (now we have at least 2 characters). 436 Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2; 437 bind(L_OuterLoop); // Search for 1st 2 characters. 438 Register addr_diff = tmp4; 439 subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check. 440 addi(addr, addr, h_csize); // This is the new address we want to use for comparing. 441 srdi_(ch2, addr_diff, h_csize); 442 beq(CCR0, L_FinalCheck); // 2 characters left? 443 mtctr(ch2); // num of characters / 2 444 bind(L_InnerLoop); // Main work horse (2x unrolled search loop) 445 if (h_csize == 2) { // Load 2 characters of haystack (ignore alignment). 446 lwz(ch1, 0, addr); 447 lwz(ch2, 2, addr); 448 } else { 449 lhz(ch1, 0, addr); 450 lhz(ch2, 1, addr); 451 } 452 cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop). 453 cmpw(CCR1, ch2, n_start); 454 beq(CCR0, L_Comp1); // Did we find the needle start? 455 beq(CCR1, L_Comp2); 456 addi(addr, addr, 2 * h_csize); 457 bdnz(L_InnerLoop); 458 bind(L_FinalCheck); 459 andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1. 460 beq(CCR0, L_NotFound); 461 if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare. 462 cmpw(CCR1, ch1, n_start); 463 beq(CCR1, L_Comp1); 464 bind(L_NotFound); 465 li(result, -1); // not found 466 b(L_End); 467 468 // ************************************************************************************************** 469 // Special Case: unfortunately, the variable needle case can be called with needlecnt<2 470 // ************************************************************************************************** 471 if (needlecntval == 0) { // We have to handle these cases separately. 472 Label L_OneCharLoop; 473 bind(L_TooShort); 474 mtctr(haycnt); 475 if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle 476 bind(L_OneCharLoop); 477 if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); } 478 cmpw(CCR1, ch1, n_start); 479 beq(CCR1, L_Found); // Did we find the one character needle? 480 bdnz(L_OneCharLoop); 481 li(result, -1); // Not found. 482 b(L_End); 483 } 484 485 // ************************************************************************************************** 486 // Regular Case Part II: compare rest of needle (first 2 characters have been compared already) 487 // ************************************************************************************************** 488 489 // Compare the rest 490 bind(L_Comp2); 491 addi(addr, addr, h_csize); // First comparison has failed, 2nd one hit. 492 bind(L_Comp1); // Addr points to possible needle start. 493 if (needlecntval != 2) { // Const needlecnt==2? 494 if (needlecntval != 3) { 495 if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2? 496 Register n_ind = tmp4, 497 h_ind = n_ind; 498 li(n_ind, 2 * n_csize); // First 2 characters are already compared, use index 2. 499 mtctr(needlecnt); // Decremented by 2, still > 0. 500 Label L_CompLoop; 501 bind(L_CompLoop); 502 if (ae ==StrIntrinsicNode::UL) { 503 h_ind = ch1; 504 sldi(h_ind, n_ind, 1); 505 } 506 if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); } 507 if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); } 508 cmpw(CCR1, ch1, ch2); 509 bne(CCR1, L_OuterLoop); 510 addi(n_ind, n_ind, n_csize); 511 bdnz(L_CompLoop); 512 } else { // No loop required if there's only one needle character left. 513 if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); } 514 if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); } 515 cmpw(CCR1, ch1, ch2); 516 bne(CCR1, L_OuterLoop); 517 } 518 } 519 // Return index ... 520 bind(L_Found); 521 subf(result, haystack, addr); // relative to haystack, ... 522 if (h_csize == 2) { srdi(result, result, 1); } // in characters. 523 bind(L_End); 524 } // string_indexof 525 526 void C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt, 527 Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) { 528 assert_different_registers(haystack, haycnt, needle, tmp1, tmp2); 529 530 Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End; 531 Register addr = tmp1, 532 ch1 = tmp2, 533 ch2 = R0; 534 535 const int h_csize = is_byte ? 1 : 2; 536 537 //4: 538 srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR). 539 mr(addr, haystack); 540 beq(CCR0, L_FinalCheck); 541 mtctr(tmp2); // Move to count register. 542 //8: 543 bind(L_InnerLoop); // Main work horse (2x unrolled search loop). 544 if (!is_byte) { 545 lhz(ch1, 0, addr); 546 lhz(ch2, 2, addr); 547 } else { 548 lbz(ch1, 0, addr); 549 lbz(ch2, 1, addr); 550 } 551 (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar); 552 (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar); 553 beq(CCR0, L_Found1); // Did we find the needle? 554 beq(CCR1, L_Found2); 555 addi(addr, addr, 2 * h_csize); 556 bdnz(L_InnerLoop); 557 //16: 558 bind(L_FinalCheck); 559 andi_(R0, haycnt, 1); 560 beq(CCR0, L_NotFound); 561 if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare. 562 (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar); 563 beq(CCR1, L_Found1); 564 //21: 565 bind(L_NotFound); 566 li(result, -1); // Not found. 567 b(L_End); 568 569 bind(L_Found2); 570 addi(addr, addr, h_csize); 571 //24: 572 bind(L_Found1); // Return index ... 573 subf(result, haystack, addr); // relative to haystack, ... 574 if (!is_byte) { srdi(result, result, 1); } // in characters. 575 bind(L_End); 576 } // string_indexof_char 577 578 579 void C2_MacroAssembler::count_positives(Register src, Register cnt, Register result, 580 Register tmp1, Register tmp2) { 581 const Register tmp0 = R0; 582 assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2); 583 Label Lfastloop, Lslow, Lloop, Ldone; 584 585 // Check if cnt >= 8 (= 16 bytes) 586 lis(tmp1, (int)(short)0x8080); // tmp1 = 0x8080808080808080 587 srwi_(tmp2, cnt, 4); 588 mr(result, src); // Use result reg to point to the current position. 589 beq(CCR0, Lslow); 590 ori(tmp1, tmp1, 0x8080); 591 rldimi(tmp1, tmp1, 32, 0); 592 mtctr(tmp2); 593 594 // 2x unrolled loop 595 bind(Lfastloop); 596 ld(tmp2, 0, result); 597 ld(tmp0, 8, result); 598 599 orr(tmp0, tmp2, tmp0); 600 601 and_(tmp0, tmp0, tmp1); 602 bne(CCR0, Lslow); // Found negative byte. 603 addi(result, result, 16); 604 bdnz(Lfastloop); 605 606 bind(Lslow); // Fallback to slow version. 607 subf(tmp0, src, result); // Bytes known positive. 608 subf_(tmp0, tmp0, cnt); // Remaining Bytes. 609 beq(CCR0, Ldone); 610 mtctr(tmp0); 611 bind(Lloop); 612 lbz(tmp0, 0, result); 613 andi_(tmp0, tmp0, 0x80); 614 bne(CCR0, Ldone); // Found negative byte. 615 addi(result, result, 1); 616 bdnz(Lloop); 617 618 bind(Ldone); 619 subf(result, src, result); // Result is offset from src. 620 }