1 /* 2 * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/assembler.hpp" 28 #include "asm/assembler.inline.hpp" 29 #include "opto/c2_MacroAssembler.hpp" 30 #include "opto/intrinsicnode.hpp" 31 #include "opto/subnode.hpp" 32 #include "runtime/stubRoutines.hpp" 33 34 #ifdef PRODUCT 35 #define BLOCK_COMMENT(str) /* nothing */ 36 #define STOP(error) stop(error) 37 #else 38 #define BLOCK_COMMENT(str) block_comment(str) 39 #define STOP(error) block_comment(error); stop(error) 40 #endif 41 42 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 43 44 // short string 45 // StringUTF16.indexOfChar 46 // StringLatin1.indexOfChar 47 void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, 48 Register ch, Register result, 49 bool isL) 50 { 51 Register ch1 = t0; 52 Register index = t1; 53 54 BLOCK_COMMENT("string_indexof_char_short {"); 55 56 Label LOOP, LOOP1, LOOP4, LOOP8; 57 Label MATCH, MATCH1, MATCH2, MATCH3, 58 MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; 59 60 mv(result, -1); 61 mv(index, zr); 62 63 bind(LOOP); 64 addi(t0, index, 8); 65 ble(t0, cnt1, LOOP8); 66 addi(t0, index, 4); 67 ble(t0, cnt1, LOOP4); 68 j(LOOP1); 69 70 bind(LOOP8); 71 isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); 72 beq(ch, ch1, MATCH); 73 isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); 74 beq(ch, ch1, MATCH1); 75 isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); 76 beq(ch, ch1, MATCH2); 77 isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); 78 beq(ch, ch1, MATCH3); 79 isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); 80 beq(ch, ch1, MATCH4); 81 isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); 82 beq(ch, ch1, MATCH5); 83 isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); 84 beq(ch, ch1, MATCH6); 85 isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); 86 beq(ch, ch1, MATCH7); 87 addi(index, index, 8); 88 addi(str1, str1, isL ? 8 : 16); 89 blt(index, cnt1, LOOP); 90 j(NOMATCH); 91 92 bind(LOOP4); 93 isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); 94 beq(ch, ch1, MATCH); 95 isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); 96 beq(ch, ch1, MATCH1); 97 isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); 98 beq(ch, ch1, MATCH2); 99 isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); 100 beq(ch, ch1, MATCH3); 101 addi(index, index, 4); 102 addi(str1, str1, isL ? 4 : 8); 103 bge(index, cnt1, NOMATCH); 104 105 bind(LOOP1); 106 isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); 107 beq(ch, ch1, MATCH); 108 addi(index, index, 1); 109 addi(str1, str1, isL ? 1 : 2); 110 blt(index, cnt1, LOOP1); 111 j(NOMATCH); 112 113 bind(MATCH1); 114 addi(index, index, 1); 115 j(MATCH); 116 117 bind(MATCH2); 118 addi(index, index, 2); 119 j(MATCH); 120 121 bind(MATCH3); 122 addi(index, index, 3); 123 j(MATCH); 124 125 bind(MATCH4); 126 addi(index, index, 4); 127 j(MATCH); 128 129 bind(MATCH5); 130 addi(index, index, 5); 131 j(MATCH); 132 133 bind(MATCH6); 134 addi(index, index, 6); 135 j(MATCH); 136 137 bind(MATCH7); 138 addi(index, index, 7); 139 140 bind(MATCH); 141 mv(result, index); 142 bind(NOMATCH); 143 BLOCK_COMMENT("} string_indexof_char_short"); 144 } 145 146 // StringUTF16.indexOfChar 147 // StringLatin1.indexOfChar 148 void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, 149 Register ch, Register result, 150 Register tmp1, Register tmp2, 151 Register tmp3, Register tmp4, 152 bool isL) 153 { 154 Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; 155 Register ch1 = t0; 156 Register orig_cnt = t1; 157 Register mask1 = tmp3; 158 Register mask2 = tmp2; 159 Register match_mask = tmp1; 160 Register trailing_char = tmp4; 161 Register unaligned_elems = tmp4; 162 163 BLOCK_COMMENT("string_indexof_char {"); 164 beqz(cnt1, NOMATCH); 165 166 addi(t0, cnt1, isL ? -32 : -16); 167 bgtz(t0, DO_LONG); 168 string_indexof_char_short(str1, cnt1, ch, result, isL); 169 j(DONE); 170 171 bind(DO_LONG); 172 mv(orig_cnt, cnt1); 173 if (AvoidUnalignedAccesses) { 174 Label ALIGNED; 175 andi(unaligned_elems, str1, 0x7); 176 beqz(unaligned_elems, ALIGNED); 177 sub(unaligned_elems, unaligned_elems, 8); 178 neg(unaligned_elems, unaligned_elems); 179 if (!isL) { 180 srli(unaligned_elems, unaligned_elems, 1); 181 } 182 // do unaligned part per element 183 string_indexof_char_short(str1, unaligned_elems, ch, result, isL); 184 bgez(result, DONE); 185 mv(orig_cnt, cnt1); 186 sub(cnt1, cnt1, unaligned_elems); 187 bind(ALIGNED); 188 } 189 190 // duplicate ch 191 if (isL) { 192 slli(ch1, ch, 8); 193 orr(ch, ch1, ch); 194 } 195 slli(ch1, ch, 16); 196 orr(ch, ch1, ch); 197 slli(ch1, ch, 32); 198 orr(ch, ch1, ch); 199 200 if (!isL) { 201 slli(cnt1, cnt1, 1); 202 } 203 204 uint64_t mask0101 = UCONST64(0x0101010101010101); 205 uint64_t mask0001 = UCONST64(0x0001000100010001); 206 mv(mask1, isL ? mask0101 : mask0001); 207 uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); 208 uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); 209 mv(mask2, isL ? mask7f7f : mask7fff); 210 211 bind(CH1_LOOP); 212 ld(ch1, Address(str1)); 213 addi(str1, str1, 8); 214 addi(cnt1, cnt1, -8); 215 compute_match_mask(ch1, ch, match_mask, mask1, mask2); 216 bnez(match_mask, HIT); 217 bgtz(cnt1, CH1_LOOP); 218 j(NOMATCH); 219 220 bind(HIT); 221 ctzc_bit(trailing_char, match_mask, isL, ch1, result); 222 srli(trailing_char, trailing_char, 3); 223 addi(cnt1, cnt1, 8); 224 ble(cnt1, trailing_char, NOMATCH); 225 // match case 226 if (!isL) { 227 srli(cnt1, cnt1, 1); 228 srli(trailing_char, trailing_char, 1); 229 } 230 231 sub(result, orig_cnt, cnt1); 232 add(result, result, trailing_char); 233 j(DONE); 234 235 bind(NOMATCH); 236 mv(result, -1); 237 238 bind(DONE); 239 BLOCK_COMMENT("} string_indexof_char"); 240 } 241 242 typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); 243 244 // Search for needle in haystack and return index or -1 245 // x10: result 246 // x11: haystack 247 // x12: haystack_len 248 // x13: needle 249 // x14: needle_len 250 void C2_MacroAssembler::string_indexof(Register haystack, Register needle, 251 Register haystack_len, Register needle_len, 252 Register tmp1, Register tmp2, 253 Register tmp3, Register tmp4, 254 Register tmp5, Register tmp6, 255 Register result, int ae) 256 { 257 assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); 258 259 Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; 260 261 Register ch1 = t0; 262 Register ch2 = t1; 263 Register nlen_tmp = tmp1; // needle len tmp 264 Register hlen_tmp = tmp2; // haystack len tmp 265 Register result_tmp = tmp4; 266 267 bool isLL = ae == StrIntrinsicNode::LL; 268 269 bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; 270 bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; 271 int needle_chr_shift = needle_isL ? 0 : 1; 272 int haystack_chr_shift = haystack_isL ? 0 : 1; 273 int needle_chr_size = needle_isL ? 1 : 2; 274 int haystack_chr_size = haystack_isL ? 1 : 2; 275 load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : 276 (load_chr_insn)&MacroAssembler::lhu; 277 load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : 278 (load_chr_insn)&MacroAssembler::lhu; 279 280 BLOCK_COMMENT("string_indexof {"); 281 282 // Note, inline_string_indexOf() generates checks: 283 // if (pattern.count > src.count) return -1; 284 // if (pattern.count == 0) return 0; 285 286 // We have two strings, a source string in haystack, haystack_len and a pattern string 287 // in needle, needle_len. Find the first occurence of pattern in source or return -1. 288 289 // For larger pattern and source we use a simplified Boyer Moore algorithm. 290 // With a small pattern and source we use linear scan. 291 292 // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. 293 sub(result_tmp, haystack_len, needle_len); 294 // needle_len < 8, use linear scan 295 sub(t0, needle_len, 8); 296 bltz(t0, LINEARSEARCH); 297 // needle_len >= 256, use linear scan 298 sub(t0, needle_len, 256); 299 bgez(t0, LINEARSTUB); 300 // needle_len >= haystack_len/4, use linear scan 301 srli(t0, haystack_len, 2); 302 bge(needle_len, t0, LINEARSTUB); 303 304 // Boyer-Moore-Horspool introduction: 305 // The Boyer Moore alogorithm is based on the description here:- 306 // 307 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm 308 // 309 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule 310 // and the 'Good Suffix' rule. 311 // 312 // These rules are essentially heuristics for how far we can shift the 313 // pattern along the search string. 314 // 315 // The implementation here uses the 'Bad Character' rule only because of the 316 // complexity of initialisation for the 'Good Suffix' rule. 317 // 318 // This is also known as the Boyer-Moore-Horspool algorithm: 319 // 320 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm 321 // 322 // #define ASIZE 256 323 // 324 // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { 325 // int i, j; 326 // unsigned c; 327 // unsigned char bc[ASIZE]; 328 // 329 // /* Preprocessing */ 330 // for (i = 0; i < ASIZE; ++i) 331 // bc[i] = m; 332 // for (i = 0; i < m - 1; ) { 333 // c = pattern[i]; 334 // ++i; 335 // // c < 256 for Latin1 string, so, no need for branch 336 // #ifdef PATTERN_STRING_IS_LATIN1 337 // bc[c] = m - i; 338 // #else 339 // if (c < ASIZE) bc[c] = m - i; 340 // #endif 341 // } 342 // 343 // /* Searching */ 344 // j = 0; 345 // while (j <= n - m) { 346 // c = src[i+j]; 347 // if (pattern[m-1] == c) 348 // int k; 349 // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); 350 // if (k < 0) return j; 351 // // c < 256 for Latin1 string, so, no need for branch 352 // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 353 // // LL case: (c< 256) always true. Remove branch 354 // j += bc[pattern[j+m-1]]; 355 // #endif 356 // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF 357 // // UU case: need if (c<ASIZE) check. Skip 1 character if not. 358 // if (c < ASIZE) 359 // j += bc[pattern[j+m-1]]; 360 // else 361 // j += 1 362 // #endif 363 // #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1 364 // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not. 365 // if (c < ASIZE) 366 // j += bc[pattern[j+m-1]]; 367 // else 368 // j += m 369 // #endif 370 // } 371 // return -1; 372 // } 373 374 // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result 375 Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, 376 BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; 377 378 Register haystack_end = haystack_len; 379 Register skipch = tmp2; 380 381 // pattern length is >=8, so, we can read at least 1 register for cases when 382 // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for 383 // UL case. We'll re-read last character in inner pre-loop code to have 384 // single outer pre-loop load 385 const int firstStep = isLL ? 7 : 3; 386 387 const int ASIZE = 256; 388 const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) 389 390 sub(sp, sp, ASIZE); 391 392 // init BC offset table with default value: needle_len 393 slli(t0, needle_len, 8); 394 orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] 395 slli(tmp1, t0, 16); 396 orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] 397 slli(tmp1, t0, 32); 398 orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] 399 400 mv(ch1, sp); // ch1 is t0 401 mv(tmp6, ASIZE / STORE_BYTES); // loop iterations 402 403 bind(BM_INIT_LOOP); 404 // for (i = 0; i < ASIZE; ++i) 405 // bc[i] = m; 406 for (int i = 0; i < 4; i++) { 407 sd(tmp5, Address(ch1, i * wordSize)); 408 } 409 add(ch1, ch1, 32); 410 sub(tmp6, tmp6, 4); 411 bgtz(tmp6, BM_INIT_LOOP); 412 413 sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern 414 Register orig_haystack = tmp5; 415 mv(orig_haystack, haystack); 416 // result_tmp = tmp4 417 shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); 418 sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 419 mv(tmp3, needle); 420 421 // for (i = 0; i < m - 1; ) { 422 // c = pattern[i]; 423 // ++i; 424 // // c < 256 for Latin1 string, so, no need for branch 425 // #ifdef PATTERN_STRING_IS_LATIN1 426 // bc[c] = m - i; 427 // #else 428 // if (c < ASIZE) bc[c] = m - i; 429 // #endif 430 // } 431 bind(BCLOOP); 432 (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); 433 add(tmp3, tmp3, needle_chr_size); 434 if (!needle_isL) { 435 // ae == StrIntrinsicNode::UU 436 mv(tmp6, ASIZE); 437 bgeu(ch1, tmp6, BCSKIP); 438 } 439 add(tmp4, sp, ch1); 440 sb(ch2, Address(tmp4)); // store skip offset to BC offset table 441 442 bind(BCSKIP); 443 sub(ch2, ch2, 1); // for next pattern element, skip distance -1 444 bgtz(ch2, BCLOOP); 445 446 // tmp6: pattern end, address after needle 447 shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); 448 if (needle_isL == haystack_isL) { 449 // load last 8 bytes (8LL/4UU symbols) 450 ld(tmp6, Address(tmp6, -wordSize)); 451 } else { 452 // UL: from UTF-16(source) search Latin1(pattern) 453 lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) 454 // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d 455 // We'll have to wait until load completed, but it's still faster than per-character loads+checks 456 srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a 457 slli(ch2, tmp6, XLEN - 24); 458 srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b 459 slli(ch1, tmp6, XLEN - 16); 460 srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c 461 andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d 462 slli(ch2, ch2, 16); 463 orr(ch2, ch2, ch1); // 0x00000b0c 464 slli(result, tmp3, 48); // use result as temp register 465 orr(tmp6, tmp6, result); // 0x0a00000d 466 slli(result, ch2, 16); 467 orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d 468 } 469 470 // i = m - 1; 471 // skipch = j + i; 472 // if (skipch == pattern[m - 1] 473 // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); 474 // else 475 // move j with bad char offset table 476 bind(BMLOOPSTR2); 477 // compare pattern to source string backward 478 shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); 479 (this->*haystack_load_1chr)(skipch, Address(result), noreg); 480 sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 481 if (needle_isL == haystack_isL) { 482 // re-init tmp3. It's for free because it's executed in parallel with 483 // load above. Alternative is to initialize it before loop, but it'll 484 // affect performance on in-order systems with 2 or more ld/st pipelines 485 srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] 486 } 487 if (!isLL) { // UU/UL case 488 slli(ch2, nlen_tmp, 1); // offsets in bytes 489 } 490 bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char 491 add(result, haystack, isLL ? nlen_tmp : ch2); 492 ld(ch2, Address(result)); // load 8 bytes from source string 493 mv(ch1, tmp6); 494 if (isLL) { 495 j(BMLOOPSTR1_AFTER_LOAD); 496 } else { 497 sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 498 j(BMLOOPSTR1_CMP); 499 } 500 501 bind(BMLOOPSTR1); 502 shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); 503 (this->*needle_load_1chr)(ch1, Address(ch1), noreg); 504 shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); 505 (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); 506 507 bind(BMLOOPSTR1_AFTER_LOAD); 508 sub(nlen_tmp, nlen_tmp, 1); 509 bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); 510 511 bind(BMLOOPSTR1_CMP); 512 beq(ch1, ch2, BMLOOPSTR1); 513 514 bind(BMSKIP); 515 if (!isLL) { 516 // if we've met UTF symbol while searching Latin1 pattern, then we can 517 // skip needle_len symbols 518 if (needle_isL != haystack_isL) { 519 mv(result_tmp, needle_len); 520 } else { 521 mv(result_tmp, 1); 522 } 523 mv(t0, ASIZE); 524 bgeu(skipch, t0, BMADV); 525 } 526 add(result_tmp, sp, skipch); 527 lbu(result_tmp, Address(result_tmp)); // load skip offset 528 529 bind(BMADV); 530 sub(nlen_tmp, needle_len, 1); 531 // move haystack after bad char skip offset 532 shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); 533 ble(haystack, haystack_end, BMLOOPSTR2); 534 add(sp, sp, ASIZE); 535 j(NOMATCH); 536 537 bind(BMLOOPSTR1_LASTCMP); 538 bne(ch1, ch2, BMSKIP); 539 540 bind(BMMATCH); 541 sub(result, haystack, orig_haystack); 542 if (!haystack_isL) { 543 srli(result, result, 1); 544 } 545 add(sp, sp, ASIZE); 546 j(DONE); 547 548 bind(LINEARSTUB); 549 sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm 550 bltz(t0, LINEARSEARCH); 551 mv(result, zr); 552 RuntimeAddress stub = NULL; 553 if (isLL) { 554 stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); 555 assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); 556 } else if (needle_isL) { 557 stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); 558 assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); 559 } else { 560 stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); 561 assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); 562 } 563 trampoline_call(stub); 564 j(DONE); 565 566 bind(NOMATCH); 567 mv(result, -1); 568 j(DONE); 569 570 bind(LINEARSEARCH); 571 string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); 572 573 bind(DONE); 574 BLOCK_COMMENT("} string_indexof"); 575 } 576 577 // string_indexof 578 // result: x10 579 // src: x11 580 // src_count: x12 581 // pattern: x13 582 // pattern_count: x14 or 1/2/3/4 583 void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, 584 Register haystack_len, Register needle_len, 585 Register tmp1, Register tmp2, 586 Register tmp3, Register tmp4, 587 int needle_con_cnt, Register result, int ae) 588 { 589 // Note: 590 // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant 591 // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 592 assert(needle_con_cnt <= 4, "Invalid needle constant count"); 593 assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); 594 595 Register ch1 = t0; 596 Register ch2 = t1; 597 Register hlen_neg = haystack_len, nlen_neg = needle_len; 598 Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; 599 600 bool isLL = ae == StrIntrinsicNode::LL; 601 602 bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; 603 bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; 604 int needle_chr_shift = needle_isL ? 0 : 1; 605 int haystack_chr_shift = haystack_isL ? 0 : 1; 606 int needle_chr_size = needle_isL ? 1 : 2; 607 int haystack_chr_size = haystack_isL ? 1 : 2; 608 609 load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : 610 (load_chr_insn)&MacroAssembler::lhu; 611 load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : 612 (load_chr_insn)&MacroAssembler::lhu; 613 load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; 614 load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; 615 616 Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; 617 618 Register first = tmp3; 619 620 if (needle_con_cnt == -1) { 621 Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; 622 623 sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); 624 bltz(t0, DOSHORT); 625 626 (this->*needle_load_1chr)(first, Address(needle), noreg); 627 slli(t0, needle_len, needle_chr_shift); 628 add(needle, needle, t0); 629 neg(nlen_neg, t0); 630 slli(t0, result_tmp, haystack_chr_shift); 631 add(haystack, haystack, t0); 632 neg(hlen_neg, t0); 633 634 bind(FIRST_LOOP); 635 add(t0, haystack, hlen_neg); 636 (this->*haystack_load_1chr)(ch2, Address(t0), noreg); 637 beq(first, ch2, STR1_LOOP); 638 639 bind(STR2_NEXT); 640 add(hlen_neg, hlen_neg, haystack_chr_size); 641 blez(hlen_neg, FIRST_LOOP); 642 j(NOMATCH); 643 644 bind(STR1_LOOP); 645 add(nlen_tmp, nlen_neg, needle_chr_size); 646 add(hlen_tmp, hlen_neg, haystack_chr_size); 647 bgez(nlen_tmp, MATCH); 648 649 bind(STR1_NEXT); 650 add(ch1, needle, nlen_tmp); 651 (this->*needle_load_1chr)(ch1, Address(ch1), noreg); 652 add(ch2, haystack, hlen_tmp); 653 (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); 654 bne(ch1, ch2, STR2_NEXT); 655 add(nlen_tmp, nlen_tmp, needle_chr_size); 656 add(hlen_tmp, hlen_tmp, haystack_chr_size); 657 bltz(nlen_tmp, STR1_NEXT); 658 j(MATCH); 659 660 bind(DOSHORT); 661 if (needle_isL == haystack_isL) { 662 sub(t0, needle_len, 2); 663 bltz(t0, DO1); 664 bgtz(t0, DO3); 665 } 666 } 667 668 if (needle_con_cnt == 4) { 669 Label CH1_LOOP; 670 (this->*load_4chr)(ch1, Address(needle), noreg); 671 sub(result_tmp, haystack_len, 4); 672 slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp 673 add(haystack, haystack, tmp3); 674 neg(hlen_neg, tmp3); 675 676 bind(CH1_LOOP); 677 add(ch2, haystack, hlen_neg); 678 (this->*load_4chr)(ch2, Address(ch2), noreg); 679 beq(ch1, ch2, MATCH); 680 add(hlen_neg, hlen_neg, haystack_chr_size); 681 blez(hlen_neg, CH1_LOOP); 682 j(NOMATCH); 683 } 684 685 if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { 686 Label CH1_LOOP; 687 BLOCK_COMMENT("string_indexof DO2 {"); 688 bind(DO2); 689 (this->*load_2chr)(ch1, Address(needle), noreg); 690 if (needle_con_cnt == 2) { 691 sub(result_tmp, haystack_len, 2); 692 } 693 slli(tmp3, result_tmp, haystack_chr_shift); 694 add(haystack, haystack, tmp3); 695 neg(hlen_neg, tmp3); 696 697 bind(CH1_LOOP); 698 add(tmp3, haystack, hlen_neg); 699 (this->*load_2chr)(ch2, Address(tmp3), noreg); 700 beq(ch1, ch2, MATCH); 701 add(hlen_neg, hlen_neg, haystack_chr_size); 702 blez(hlen_neg, CH1_LOOP); 703 j(NOMATCH); 704 BLOCK_COMMENT("} string_indexof DO2"); 705 } 706 707 if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { 708 Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; 709 BLOCK_COMMENT("string_indexof DO3 {"); 710 711 bind(DO3); 712 (this->*load_2chr)(first, Address(needle), noreg); 713 (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); 714 if (needle_con_cnt == 3) { 715 sub(result_tmp, haystack_len, 3); 716 } 717 slli(hlen_tmp, result_tmp, haystack_chr_shift); 718 add(haystack, haystack, hlen_tmp); 719 neg(hlen_neg, hlen_tmp); 720 721 bind(FIRST_LOOP); 722 add(ch2, haystack, hlen_neg); 723 (this->*load_2chr)(ch2, Address(ch2), noreg); 724 beq(first, ch2, STR1_LOOP); 725 726 bind(STR2_NEXT); 727 add(hlen_neg, hlen_neg, haystack_chr_size); 728 blez(hlen_neg, FIRST_LOOP); 729 j(NOMATCH); 730 731 bind(STR1_LOOP); 732 add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); 733 add(ch2, haystack, hlen_tmp); 734 (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); 735 bne(ch1, ch2, STR2_NEXT); 736 j(MATCH); 737 BLOCK_COMMENT("} string_indexof DO3"); 738 } 739 740 if (needle_con_cnt == -1 || needle_con_cnt == 1) { 741 Label DO1_LOOP; 742 743 BLOCK_COMMENT("string_indexof DO1 {"); 744 bind(DO1); 745 (this->*needle_load_1chr)(ch1, Address(needle), noreg); 746 sub(result_tmp, haystack_len, 1); 747 mv(tmp3, result_tmp); 748 if (haystack_chr_shift) { 749 slli(tmp3, result_tmp, haystack_chr_shift); 750 } 751 add(haystack, haystack, tmp3); 752 neg(hlen_neg, tmp3); 753 754 bind(DO1_LOOP); 755 add(tmp3, haystack, hlen_neg); 756 (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); 757 beq(ch1, ch2, MATCH); 758 add(hlen_neg, hlen_neg, haystack_chr_size); 759 blez(hlen_neg, DO1_LOOP); 760 BLOCK_COMMENT("} string_indexof DO1"); 761 } 762 763 bind(NOMATCH); 764 mv(result, -1); 765 j(DONE); 766 767 bind(MATCH); 768 srai(t0, hlen_neg, haystack_chr_shift); 769 add(result, result_tmp, t0); 770 771 bind(DONE); 772 } 773 774 // Compare strings. 775 void C2_MacroAssembler::string_compare(Register str1, Register str2, 776 Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, 777 Register tmp3, int ae) 778 { 779 Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, 780 DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, 781 SHORT_LOOP_START, TAIL_CHECK, L; 782 783 const int STUB_THRESHOLD = 64 + 8; 784 bool isLL = ae == StrIntrinsicNode::LL; 785 bool isLU = ae == StrIntrinsicNode::LU; 786 bool isUL = ae == StrIntrinsicNode::UL; 787 788 bool str1_isL = isLL || isLU; 789 bool str2_isL = isLL || isUL; 790 791 // for L strings, 1 byte for 1 character 792 // for U strings, 2 bytes for 1 character 793 int str1_chr_size = str1_isL ? 1 : 2; 794 int str2_chr_size = str2_isL ? 1 : 2; 795 int minCharsInWord = isLL ? wordSize : wordSize / 2; 796 797 load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; 798 load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; 799 800 BLOCK_COMMENT("string_compare {"); 801 802 // Bizzarely, the counts are passed in bytes, regardless of whether they 803 // are L or U strings, however the result is always in characters. 804 if (!str1_isL) { 805 sraiw(cnt1, cnt1, 1); 806 } 807 if (!str2_isL) { 808 sraiw(cnt2, cnt2, 1); 809 } 810 811 // Compute the minimum of the string lengths and save the difference in result. 812 sub(result, cnt1, cnt2); 813 bgt(cnt1, cnt2, L); 814 mv(cnt2, cnt1); 815 bind(L); 816 817 // A very short string 818 li(t0, minCharsInWord); 819 ble(cnt2, t0, SHORT_STRING); 820 821 // Compare longwords 822 // load first parts of strings and finish initialization while loading 823 { 824 if (str1_isL == str2_isL) { // LL or UU 825 // load 8 bytes once to compare 826 ld(tmp1, Address(str1)); 827 beq(str1, str2, DONE); 828 ld(tmp2, Address(str2)); 829 li(t0, STUB_THRESHOLD); 830 bge(cnt2, t0, STUB); 831 sub(cnt2, cnt2, minCharsInWord); 832 beqz(cnt2, TAIL_CHECK); 833 // convert cnt2 from characters to bytes 834 if (!str1_isL) { 835 slli(cnt2, cnt2, 1); 836 } 837 add(str2, str2, cnt2); 838 add(str1, str1, cnt2); 839 sub(cnt2, zr, cnt2); 840 } else if (isLU) { // LU case 841 lwu(tmp1, Address(str1)); 842 ld(tmp2, Address(str2)); 843 li(t0, STUB_THRESHOLD); 844 bge(cnt2, t0, STUB); 845 addi(cnt2, cnt2, -4); 846 add(str1, str1, cnt2); 847 sub(cnt1, zr, cnt2); 848 slli(cnt2, cnt2, 1); 849 add(str2, str2, cnt2); 850 inflate_lo32(tmp3, tmp1); 851 mv(tmp1, tmp3); 852 sub(cnt2, zr, cnt2); 853 addi(cnt1, cnt1, 4); 854 } else { // UL case 855 ld(tmp1, Address(str1)); 856 lwu(tmp2, Address(str2)); 857 li(t0, STUB_THRESHOLD); 858 bge(cnt2, t0, STUB); 859 addi(cnt2, cnt2, -4); 860 slli(t0, cnt2, 1); 861 sub(cnt1, zr, t0); 862 add(str1, str1, t0); 863 add(str2, str2, cnt2); 864 inflate_lo32(tmp3, tmp2); 865 mv(tmp2, tmp3); 866 sub(cnt2, zr, cnt2); 867 addi(cnt1, cnt1, 8); 868 } 869 addi(cnt2, cnt2, isUL ? 4 : 8); 870 bgez(cnt2, TAIL); 871 xorr(tmp3, tmp1, tmp2); 872 bnez(tmp3, DIFFERENCE); 873 874 // main loop 875 bind(NEXT_WORD); 876 if (str1_isL == str2_isL) { // LL or UU 877 add(t0, str1, cnt2); 878 ld(tmp1, Address(t0)); 879 add(t0, str2, cnt2); 880 ld(tmp2, Address(t0)); 881 addi(cnt2, cnt2, 8); 882 } else if (isLU) { // LU case 883 add(t0, str1, cnt1); 884 lwu(tmp1, Address(t0)); 885 add(t0, str2, cnt2); 886 ld(tmp2, Address(t0)); 887 addi(cnt1, cnt1, 4); 888 inflate_lo32(tmp3, tmp1); 889 mv(tmp1, tmp3); 890 addi(cnt2, cnt2, 8); 891 } else { // UL case 892 add(t0, str2, cnt2); 893 lwu(tmp2, Address(t0)); 894 add(t0, str1, cnt1); 895 ld(tmp1, Address(t0)); 896 inflate_lo32(tmp3, tmp2); 897 mv(tmp2, tmp3); 898 addi(cnt1, cnt1, 8); 899 addi(cnt2, cnt2, 4); 900 } 901 bgez(cnt2, TAIL); 902 903 xorr(tmp3, tmp1, tmp2); 904 beqz(tmp3, NEXT_WORD); 905 j(DIFFERENCE); 906 bind(TAIL); 907 xorr(tmp3, tmp1, tmp2); 908 bnez(tmp3, DIFFERENCE); 909 // Last longword. In the case where length == 4 we compare the 910 // same longword twice, but that's still faster than another 911 // conditional branch. 912 if (str1_isL == str2_isL) { // LL or UU 913 ld(tmp1, Address(str1)); 914 ld(tmp2, Address(str2)); 915 } else if (isLU) { // LU case 916 lwu(tmp1, Address(str1)); 917 ld(tmp2, Address(str2)); 918 inflate_lo32(tmp3, tmp1); 919 mv(tmp1, tmp3); 920 } else { // UL case 921 lwu(tmp2, Address(str2)); 922 ld(tmp1, Address(str1)); 923 inflate_lo32(tmp3, tmp2); 924 mv(tmp2, tmp3); 925 } 926 bind(TAIL_CHECK); 927 xorr(tmp3, tmp1, tmp2); 928 beqz(tmp3, DONE); 929 930 // Find the first different characters in the longwords and 931 // compute their difference. 932 bind(DIFFERENCE); 933 ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb 934 srl(tmp1, tmp1, result); 935 srl(tmp2, tmp2, result); 936 if (isLL) { 937 andi(tmp1, tmp1, 0xFF); 938 andi(tmp2, tmp2, 0xFF); 939 } else { 940 andi(tmp1, tmp1, 0xFFFF); 941 andi(tmp2, tmp2, 0xFFFF); 942 } 943 sub(result, tmp1, tmp2); 944 j(DONE); 945 } 946 947 bind(STUB); 948 RuntimeAddress stub = NULL; 949 switch (ae) { 950 case StrIntrinsicNode::LL: 951 stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); 952 break; 953 case StrIntrinsicNode::UU: 954 stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); 955 break; 956 case StrIntrinsicNode::LU: 957 stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); 958 break; 959 case StrIntrinsicNode::UL: 960 stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); 961 break; 962 default: 963 ShouldNotReachHere(); 964 } 965 assert(stub.target() != NULL, "compare_long_string stub has not been generated"); 966 trampoline_call(stub); 967 j(DONE); 968 969 bind(SHORT_STRING); 970 // Is the minimum length zero? 971 beqz(cnt2, DONE); 972 // arrange code to do most branches while loading and loading next characters 973 // while comparing previous 974 (this->*str1_load_chr)(tmp1, Address(str1), t0); 975 addi(str1, str1, str1_chr_size); 976 addi(cnt2, cnt2, -1); 977 beqz(cnt2, SHORT_LAST_INIT); 978 (this->*str2_load_chr)(cnt1, Address(str2), t0); 979 addi(str2, str2, str2_chr_size); 980 j(SHORT_LOOP_START); 981 bind(SHORT_LOOP); 982 addi(cnt2, cnt2, -1); 983 beqz(cnt2, SHORT_LAST); 984 bind(SHORT_LOOP_START); 985 (this->*str1_load_chr)(tmp2, Address(str1), t0); 986 addi(str1, str1, str1_chr_size); 987 (this->*str2_load_chr)(t0, Address(str2), t0); 988 addi(str2, str2, str2_chr_size); 989 bne(tmp1, cnt1, SHORT_LOOP_TAIL); 990 addi(cnt2, cnt2, -1); 991 beqz(cnt2, SHORT_LAST2); 992 (this->*str1_load_chr)(tmp1, Address(str1), t0); 993 addi(str1, str1, str1_chr_size); 994 (this->*str2_load_chr)(cnt1, Address(str2), t0); 995 addi(str2, str2, str2_chr_size); 996 beq(tmp2, t0, SHORT_LOOP); 997 sub(result, tmp2, t0); 998 j(DONE); 999 bind(SHORT_LOOP_TAIL); 1000 sub(result, tmp1, cnt1); 1001 j(DONE); 1002 bind(SHORT_LAST2); 1003 beq(tmp2, t0, DONE); 1004 sub(result, tmp2, t0); 1005 1006 j(DONE); 1007 bind(SHORT_LAST_INIT); 1008 (this->*str2_load_chr)(cnt1, Address(str2), t0); 1009 addi(str2, str2, str2_chr_size); 1010 bind(SHORT_LAST); 1011 beq(tmp1, cnt1, DONE); 1012 sub(result, tmp1, cnt1); 1013 1014 bind(DONE); 1015 1016 BLOCK_COMMENT("} string_compare"); 1017 } 1018 1019 void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, 1020 Register tmp4, Register tmp5, Register tmp6, Register result, 1021 Register cnt1, int elem_size) { 1022 Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; 1023 Register tmp1 = t0; 1024 Register tmp2 = t1; 1025 Register cnt2 = tmp2; // cnt2 only used in array length compare 1026 Register elem_per_word = tmp6; 1027 int log_elem_size = exact_log2(elem_size); 1028 int length_offset = arrayOopDesc::length_offset_in_bytes(); 1029 int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); 1030 1031 assert(elem_size == 1 || elem_size == 2, "must be char or byte"); 1032 assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); 1033 li(elem_per_word, wordSize / elem_size); 1034 1035 BLOCK_COMMENT("arrays_equals {"); 1036 1037 // if (a1 == a2), return true 1038 beq(a1, a2, SAME); 1039 1040 mv(result, false); 1041 beqz(a1, DONE); 1042 beqz(a2, DONE); 1043 lwu(cnt1, Address(a1, length_offset)); 1044 lwu(cnt2, Address(a2, length_offset)); 1045 bne(cnt2, cnt1, DONE); 1046 beqz(cnt1, SAME); 1047 1048 slli(tmp5, cnt1, 3 + log_elem_size); 1049 sub(tmp5, zr, tmp5); 1050 add(a1, a1, base_offset); 1051 add(a2, a2, base_offset); 1052 ld(tmp3, Address(a1, 0)); 1053 ld(tmp4, Address(a2, 0)); 1054 ble(cnt1, elem_per_word, SHORT); // short or same 1055 1056 // Main 16 byte comparison loop with 2 exits 1057 bind(NEXT_DWORD); { 1058 ld(tmp1, Address(a1, wordSize)); 1059 ld(tmp2, Address(a2, wordSize)); 1060 sub(cnt1, cnt1, 2 * wordSize / elem_size); 1061 blez(cnt1, TAIL); 1062 bne(tmp3, tmp4, DONE); 1063 ld(tmp3, Address(a1, 2 * wordSize)); 1064 ld(tmp4, Address(a2, 2 * wordSize)); 1065 add(a1, a1, 2 * wordSize); 1066 add(a2, a2, 2 * wordSize); 1067 ble(cnt1, elem_per_word, TAIL2); 1068 } beq(tmp1, tmp2, NEXT_DWORD); 1069 j(DONE); 1070 1071 bind(TAIL); 1072 xorr(tmp4, tmp3, tmp4); 1073 xorr(tmp2, tmp1, tmp2); 1074 sll(tmp2, tmp2, tmp5); 1075 orr(tmp5, tmp4, tmp2); 1076 j(IS_TMP5_ZR); 1077 1078 bind(TAIL2); 1079 bne(tmp1, tmp2, DONE); 1080 1081 bind(SHORT); 1082 xorr(tmp4, tmp3, tmp4); 1083 sll(tmp5, tmp4, tmp5); 1084 1085 bind(IS_TMP5_ZR); 1086 bnez(tmp5, DONE); 1087 1088 bind(SAME); 1089 mv(result, true); 1090 // That's it. 1091 bind(DONE); 1092 1093 BLOCK_COMMENT("} array_equals"); 1094 } 1095 1096 // Compare Strings 1097 1098 // For Strings we're passed the address of the first characters in a1 1099 // and a2 and the length in cnt1. 1100 // elem_size is the element size in bytes: either 1 or 2. 1101 // There are two implementations. For arrays >= 8 bytes, all 1102 // comparisons (including the final one, which may overlap) are 1103 // performed 8 bytes at a time. For strings < 8 bytes, we compare a 1104 // halfword, then a short, and then a byte. 1105 1106 void C2_MacroAssembler::string_equals(Register a1, Register a2, 1107 Register result, Register cnt1, int elem_size) 1108 { 1109 Label SAME, DONE, SHORT, NEXT_WORD; 1110 Register tmp1 = t0; 1111 Register tmp2 = t1; 1112 1113 assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); 1114 assert_different_registers(a1, a2, result, cnt1, t0, t1); 1115 1116 BLOCK_COMMENT("string_equals {"); 1117 1118 mv(result, false); 1119 1120 // Check for short strings, i.e. smaller than wordSize. 1121 sub(cnt1, cnt1, wordSize); 1122 bltz(cnt1, SHORT); 1123 1124 // Main 8 byte comparison loop. 1125 bind(NEXT_WORD); { 1126 ld(tmp1, Address(a1, 0)); 1127 add(a1, a1, wordSize); 1128 ld(tmp2, Address(a2, 0)); 1129 add(a2, a2, wordSize); 1130 sub(cnt1, cnt1, wordSize); 1131 bne(tmp1, tmp2, DONE); 1132 } bgtz(cnt1, NEXT_WORD); 1133 1134 // Last longword. In the case where length == 4 we compare the 1135 // same longword twice, but that's still faster than another 1136 // conditional branch. 1137 // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when 1138 // length == 4. 1139 add(tmp1, a1, cnt1); 1140 ld(tmp1, Address(tmp1, 0)); 1141 add(tmp2, a2, cnt1); 1142 ld(tmp2, Address(tmp2, 0)); 1143 bne(tmp1, tmp2, DONE); 1144 j(SAME); 1145 1146 bind(SHORT); 1147 Label TAIL03, TAIL01; 1148 1149 // 0-7 bytes left. 1150 andi(t0, cnt1, 4); 1151 beqz(t0, TAIL03); 1152 { 1153 lwu(tmp1, Address(a1, 0)); 1154 add(a1, a1, 4); 1155 lwu(tmp2, Address(a2, 0)); 1156 add(a2, a2, 4); 1157 bne(tmp1, tmp2, DONE); 1158 } 1159 1160 bind(TAIL03); 1161 // 0-3 bytes left. 1162 andi(t0, cnt1, 2); 1163 beqz(t0, TAIL01); 1164 { 1165 lhu(tmp1, Address(a1, 0)); 1166 add(a1, a1, 2); 1167 lhu(tmp2, Address(a2, 0)); 1168 add(a2, a2, 2); 1169 bne(tmp1, tmp2, DONE); 1170 } 1171 1172 bind(TAIL01); 1173 if (elem_size == 1) { // Only needed when comparing 1-byte elements 1174 // 0-1 bytes left. 1175 andi(t0, cnt1, 1); 1176 beqz(t0, SAME); 1177 { 1178 lbu(tmp1, a1, 0); 1179 lbu(tmp2, a2, 0); 1180 bne(tmp1, tmp2, DONE); 1181 } 1182 } 1183 1184 // Arrays are equal. 1185 bind(SAME); 1186 mv(result, true); 1187 1188 // That's it. 1189 bind(DONE); 1190 BLOCK_COMMENT("} string_equals"); 1191 } 1192 1193 typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); 1194 typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, 1195 bool is_far, bool is_unordered); 1196 1197 static conditional_branch_insn conditional_branches[] = 1198 { 1199 /* SHORT branches */ 1200 (conditional_branch_insn)&Assembler::beq, 1201 (conditional_branch_insn)&Assembler::bgt, 1202 NULL, // BoolTest::overflow 1203 (conditional_branch_insn)&Assembler::blt, 1204 (conditional_branch_insn)&Assembler::bne, 1205 (conditional_branch_insn)&Assembler::ble, 1206 NULL, // BoolTest::no_overflow 1207 (conditional_branch_insn)&Assembler::bge, 1208 1209 /* UNSIGNED branches */ 1210 (conditional_branch_insn)&Assembler::beq, 1211 (conditional_branch_insn)&Assembler::bgtu, 1212 NULL, 1213 (conditional_branch_insn)&Assembler::bltu, 1214 (conditional_branch_insn)&Assembler::bne, 1215 (conditional_branch_insn)&Assembler::bleu, 1216 NULL, 1217 (conditional_branch_insn)&Assembler::bgeu 1218 }; 1219 1220 static float_conditional_branch_insn float_conditional_branches[] = 1221 { 1222 /* FLOAT SHORT branches */ 1223 (float_conditional_branch_insn)&MacroAssembler::float_beq, 1224 (float_conditional_branch_insn)&MacroAssembler::float_bgt, 1225 NULL, // BoolTest::overflow 1226 (float_conditional_branch_insn)&MacroAssembler::float_blt, 1227 (float_conditional_branch_insn)&MacroAssembler::float_bne, 1228 (float_conditional_branch_insn)&MacroAssembler::float_ble, 1229 NULL, // BoolTest::no_overflow 1230 (float_conditional_branch_insn)&MacroAssembler::float_bge, 1231 1232 /* DOUBLE SHORT branches */ 1233 (float_conditional_branch_insn)&MacroAssembler::double_beq, 1234 (float_conditional_branch_insn)&MacroAssembler::double_bgt, 1235 NULL, 1236 (float_conditional_branch_insn)&MacroAssembler::double_blt, 1237 (float_conditional_branch_insn)&MacroAssembler::double_bne, 1238 (float_conditional_branch_insn)&MacroAssembler::double_ble, 1239 NULL, 1240 (float_conditional_branch_insn)&MacroAssembler::double_bge 1241 }; 1242 1243 void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { 1244 assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), 1245 "invalid conditional branch index"); 1246 (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); 1247 } 1248 1249 // This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use 1250 // unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). 1251 void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { 1252 assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), 1253 "invalid float conditional branch index"); 1254 int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask); 1255 (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, 1256 (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); 1257 } 1258 1259 void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { 1260 switch (cmpFlag) { 1261 case BoolTest::eq: 1262 case BoolTest::le: 1263 beqz(op1, L, is_far); 1264 break; 1265 case BoolTest::ne: 1266 case BoolTest::gt: 1267 bnez(op1, L, is_far); 1268 break; 1269 default: 1270 ShouldNotReachHere(); 1271 } 1272 } 1273 1274 void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { 1275 switch (cmpFlag) { 1276 case BoolTest::eq: 1277 beqz(op1, L, is_far); 1278 break; 1279 case BoolTest::ne: 1280 bnez(op1, L, is_far); 1281 break; 1282 default: 1283 ShouldNotReachHere(); 1284 } 1285 } 1286 1287 void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { 1288 Label L; 1289 cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); 1290 mv(dst, src); 1291 bind(L); 1292 } 1293 1294 // Set dst to NaN if any NaN input. 1295 void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, 1296 bool is_double, bool is_min) { 1297 assert_different_registers(dst, src1, src2); 1298 1299 Label Done; 1300 fsflags(zr); 1301 if (is_double) { 1302 is_min ? fmin_d(dst, src1, src2) 1303 : fmax_d(dst, src1, src2); 1304 // Checking NaNs 1305 flt_d(zr, src1, src2); 1306 } else { 1307 is_min ? fmin_s(dst, src1, src2) 1308 : fmax_s(dst, src1, src2); 1309 // Checking NaNs 1310 flt_s(zr, src1, src2); 1311 } 1312 1313 frflags(t0); 1314 beqz(t0, Done); 1315 1316 // In case of NaNs 1317 is_double ? fadd_d(dst, src1, src2) 1318 : fadd_s(dst, src1, src2); 1319 1320 bind(Done); 1321 } 1322 1323 void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, 1324 VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { 1325 Label loop; 1326 Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; 1327 1328 bind(loop); 1329 vsetvli(tmp1, cnt, sew, Assembler::m2); 1330 vlex_v(vr1, a1, sew); 1331 vlex_v(vr2, a2, sew); 1332 vmsne_vv(vrs, vr1, vr2); 1333 vfirst_m(tmp2, vrs); 1334 bgez(tmp2, DONE); 1335 sub(cnt, cnt, tmp1); 1336 if (!islatin) { 1337 slli(tmp1, tmp1, 1); // get byte counts 1338 } 1339 add(a1, a1, tmp1); 1340 add(a2, a2, tmp1); 1341 bnez(cnt, loop); 1342 1343 mv(result, true); 1344 } 1345 1346 void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { 1347 Label DONE; 1348 Register tmp1 = t0; 1349 Register tmp2 = t1; 1350 1351 BLOCK_COMMENT("string_equals_v {"); 1352 1353 mv(result, false); 1354 1355 if (elem_size == 2) { 1356 srli(cnt, cnt, 1); 1357 } 1358 1359 element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); 1360 1361 bind(DONE); 1362 BLOCK_COMMENT("} string_equals_v"); 1363 } 1364 1365 // used by C2 ClearArray patterns. 1366 // base: Address of a buffer to be zeroed 1367 // cnt: Count in HeapWords 1368 // 1369 // base, cnt, v0, v1 and t0 are clobbered. 1370 void C2_MacroAssembler::clear_array_v(Register base, Register cnt) { 1371 Label loop; 1372 1373 // making zero words 1374 vsetvli(t0, cnt, Assembler::e64, Assembler::m4); 1375 vxor_vv(v0, v0, v0); 1376 1377 bind(loop); 1378 vsetvli(t0, cnt, Assembler::e64, Assembler::m4); 1379 vse64_v(v0, base); 1380 sub(cnt, cnt, t0); 1381 shadd(base, t0, base, t0, 3); 1382 bnez(cnt, loop); 1383 } 1384 1385 void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, 1386 Register cnt1, int elem_size) { 1387 Label DONE; 1388 Register tmp1 = t0; 1389 Register tmp2 = t1; 1390 Register cnt2 = tmp2; 1391 int length_offset = arrayOopDesc::length_offset_in_bytes(); 1392 int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); 1393 1394 BLOCK_COMMENT("arrays_equals_v {"); 1395 1396 // if (a1 == a2), return true 1397 mv(result, true); 1398 beq(a1, a2, DONE); 1399 1400 mv(result, false); 1401 // if a1 == null or a2 == null, return false 1402 beqz(a1, DONE); 1403 beqz(a2, DONE); 1404 // if (a1.length != a2.length), return false 1405 lwu(cnt1, Address(a1, length_offset)); 1406 lwu(cnt2, Address(a2, length_offset)); 1407 bne(cnt1, cnt2, DONE); 1408 1409 la(a1, Address(a1, base_offset)); 1410 la(a2, Address(a2, base_offset)); 1411 1412 element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); 1413 1414 bind(DONE); 1415 1416 BLOCK_COMMENT("} arrays_equals_v"); 1417 } 1418 1419 void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, 1420 Register result, Register tmp1, Register tmp2, int encForm) { 1421 Label DIFFERENCE, DONE, L, loop; 1422 bool encLL = encForm == StrIntrinsicNode::LL; 1423 bool encLU = encForm == StrIntrinsicNode::LU; 1424 bool encUL = encForm == StrIntrinsicNode::UL; 1425 1426 bool str1_isL = encLL || encLU; 1427 bool str2_isL = encLL || encUL; 1428 1429 int minCharsInWord = encLL ? wordSize : wordSize / 2; 1430 1431 BLOCK_COMMENT("string_compare {"); 1432 1433 // for Lating strings, 1 byte for 1 character 1434 // for UTF16 strings, 2 bytes for 1 character 1435 if (!str1_isL) 1436 sraiw(cnt1, cnt1, 1); 1437 if (!str2_isL) 1438 sraiw(cnt2, cnt2, 1); 1439 1440 // if str1 == str2, return the difference 1441 // save the minimum of the string lengths in cnt2. 1442 sub(result, cnt1, cnt2); 1443 bgt(cnt1, cnt2, L); 1444 mv(cnt2, cnt1); 1445 bind(L); 1446 1447 if (str1_isL == str2_isL) { // LL or UU 1448 element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); 1449 j(DONE); 1450 } else { // LU or UL 1451 Register strL = encLU ? str1 : str2; 1452 Register strU = encLU ? str2 : str1; 1453 VectorRegister vstr1 = encLU ? v4 : v0; 1454 VectorRegister vstr2 = encLU ? v0 : v4; 1455 1456 bind(loop); 1457 vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); 1458 vle8_v(vstr1, strL); 1459 vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); 1460 vzext_vf2(vstr2, vstr1); 1461 vle16_v(vstr1, strU); 1462 vmsne_vv(v0, vstr2, vstr1); 1463 vfirst_m(tmp2, v0); 1464 bgez(tmp2, DIFFERENCE); 1465 sub(cnt2, cnt2, tmp1); 1466 add(strL, strL, tmp1); 1467 shadd(strU, tmp1, strU, tmp1, 1); 1468 bnez(cnt2, loop); 1469 j(DONE); 1470 } 1471 bind(DIFFERENCE); 1472 slli(tmp1, tmp2, 1); 1473 add(str1, str1, str1_isL ? tmp2 : tmp1); 1474 add(str2, str2, str2_isL ? tmp2 : tmp1); 1475 str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); 1476 str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); 1477 sub(result, tmp1, tmp2); 1478 1479 bind(DONE); 1480 } 1481 1482 void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { 1483 Label loop; 1484 assert_different_registers(src, dst, len, tmp, t0); 1485 1486 BLOCK_COMMENT("byte_array_inflate_v {"); 1487 bind(loop); 1488 vsetvli(tmp, len, Assembler::e8, Assembler::m2); 1489 vle8_v(v2, src); 1490 vsetvli(t0, len, Assembler::e16, Assembler::m4); 1491 vzext_vf2(v0, v2); 1492 vse16_v(v0, dst); 1493 sub(len, len, tmp); 1494 add(src, src, tmp); 1495 shadd(dst, tmp, dst, tmp, 1); 1496 bnez(len, loop); 1497 BLOCK_COMMENT("} byte_array_inflate_v"); 1498 } 1499 1500 // Compress char[] array to byte[]. 1501 // result: the array length if every element in array can be encoded; 0, otherwise. 1502 void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { 1503 Label done; 1504 encode_iso_array_v(src, dst, len, result, tmp); 1505 beqz(len, done); 1506 mv(result, zr); 1507 bind(done); 1508 } 1509 1510 // result: the number of elements had been encoded. 1511 void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { 1512 Label loop, DIFFERENCE, DONE; 1513 1514 BLOCK_COMMENT("encode_iso_array_v {"); 1515 mv(result, 0); 1516 1517 bind(loop); 1518 mv(tmp, 0xff); 1519 vsetvli(t0, len, Assembler::e16, Assembler::m2); 1520 vle16_v(v2, src); 1521 // if element > 0xff, stop 1522 vmsgtu_vx(v1, v2, tmp); 1523 vfirst_m(tmp, v1); 1524 vmsbf_m(v0, v1); 1525 // compress char to byte 1526 vsetvli(t0, len, Assembler::e8); 1527 vncvt_x_x_w(v1, v2, Assembler::v0_t); 1528 vse8_v(v1, dst, Assembler::v0_t); 1529 1530 bgez(tmp, DIFFERENCE); 1531 add(result, result, t0); 1532 add(dst, dst, t0); 1533 sub(len, len, t0); 1534 shadd(src, t0, src, t0, 1); 1535 bnez(len, loop); 1536 j(DONE); 1537 1538 bind(DIFFERENCE); 1539 add(result, result, tmp); 1540 1541 bind(DONE); 1542 BLOCK_COMMENT("} encode_iso_array_v"); 1543 } 1544 1545 void C2_MacroAssembler::has_negatives_v(Register ary, Register len, 1546 Register result, Register tmp) { 1547 Label LOOP, DONE; 1548 1549 BLOCK_COMMENT("has_negatives_v {"); 1550 assert_different_registers(ary, len, result, tmp); 1551 1552 mv(result, true); 1553 1554 bind(LOOP); 1555 vsetvli(t0, len, Assembler::e8, Assembler::m4); 1556 vle8_v(v0, ary); 1557 vmslt_vx(v0, v0, zr); 1558 vfirst_m(tmp, v0); 1559 bgez(tmp, DONE); 1560 1561 sub(len, len, t0); 1562 add(ary, ary, t0); 1563 bnez(len, LOOP); 1564 mv(result, false); 1565 1566 bind(DONE); 1567 BLOCK_COMMENT("} has_negatives_v"); 1568 } 1569 1570 void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, 1571 Register ch, Register result, 1572 Register tmp1, Register tmp2, 1573 bool isL) { 1574 mv(result, zr); 1575 1576 Label loop, MATCH, DONE; 1577 Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16; 1578 bind(loop); 1579 vsetvli(tmp1, cnt1, sew, Assembler::m4); 1580 vlex_v(v0, str1, sew); 1581 vmseq_vx(v0, v0, ch); 1582 vfirst_m(tmp2, v0); 1583 bgez(tmp2, MATCH); // if equal, return index 1584 1585 add(result, result, tmp1); 1586 sub(cnt1, cnt1, tmp1); 1587 if (!isL) slli(tmp1, tmp1, 1); 1588 add(str1, str1, tmp1); 1589 bnez(cnt1, loop); 1590 1591 mv(result, -1); 1592 j(DONE); 1593 1594 bind(MATCH); 1595 add(result, result, tmp2); 1596 1597 bind(DONE); 1598 } 1599 1600 // Set dst to NaN if any NaN input. 1601 void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, 1602 bool is_double, bool is_min) { 1603 assert_different_registers(dst, src1, src2); 1604 1605 vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); 1606 1607 is_min ? vfmin_vv(dst, src1, src2) 1608 : vfmax_vv(dst, src1, src2); 1609 1610 vmfne_vv(v0, src1, src1); 1611 vfadd_vv(dst, src1, src1, Assembler::v0_t); 1612 vmfne_vv(v0, src2, src2); 1613 vfadd_vv(dst, src2, src2, Assembler::v0_t); 1614 } 1615 1616 // Set dst to NaN if any NaN input. 1617 void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, 1618 FloatRegister src1, VectorRegister src2, 1619 VectorRegister tmp1, VectorRegister tmp2, 1620 bool is_double, bool is_min) { 1621 assert_different_registers(src2, tmp1, tmp2); 1622 1623 Label L_done, L_NaN; 1624 vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); 1625 vfmv_s_f(tmp2, src1); 1626 1627 is_min ? vfredmin_vs(tmp1, src2, tmp2) 1628 : vfredmax_vs(tmp1, src2, tmp2); 1629 1630 fsflags(zr); 1631 // Checking NaNs 1632 vmflt_vf(tmp2, src2, src1); 1633 frflags(t0); 1634 bnez(t0, L_NaN); 1635 j(L_done); 1636 1637 bind(L_NaN); 1638 vfmv_s_f(tmp2, src1); 1639 vfredsum_vs(tmp1, src2, tmp2); 1640 1641 bind(L_done); 1642 vfmv_f_s(dst, tmp1); 1643 }