1 /* 2 * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "opto/c2_MacroAssembler.hpp" 29 #include "opto/intrinsicnode.hpp" 30 #include "opto/subnode.hpp" 31 #include "runtime/stubRoutines.hpp" 32 33 #ifdef PRODUCT 34 #define BLOCK_COMMENT(str) /* nothing */ 35 #define STOP(error) stop(error) 36 #else 37 #define BLOCK_COMMENT(str) block_comment(str) 38 #define STOP(error) block_comment(error); stop(error) 39 #endif 40 41 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 42 43 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); 44 45 // Search for str1 in str2 and return index or -1 46 // Clobbers: rscratch1, rscratch2, rflags. May also clobber v0-v1, when icnt1==-1. 47 void C2_MacroAssembler::string_indexof(Register str2, Register str1, 48 Register cnt2, Register cnt1, 49 Register tmp1, Register tmp2, 50 Register tmp3, Register tmp4, 51 Register tmp5, Register tmp6, 52 int icnt1, Register result, int ae) { 53 // NOTE: tmp5, tmp6 can be zr depending on specific method version 54 Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH; 55 56 Register ch1 = rscratch1; 57 Register ch2 = rscratch2; 58 Register cnt1tmp = tmp1; 59 Register cnt2tmp = tmp2; 60 Register cnt1_neg = cnt1; 61 Register cnt2_neg = cnt2; 62 Register result_tmp = tmp4; 63 64 bool isL = ae == StrIntrinsicNode::LL; 65 66 bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; 67 bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; 68 int str1_chr_shift = str1_isL ? 0:1; 69 int str2_chr_shift = str2_isL ? 0:1; 70 int str1_chr_size = str1_isL ? 1:2; 71 int str2_chr_size = str2_isL ? 1:2; 72 chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb : 73 (chr_insn)&MacroAssembler::ldrh; 74 chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb : 75 (chr_insn)&MacroAssembler::ldrh; 76 chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw; 77 chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr; 78 79 // Note, inline_string_indexOf() generates checks: 80 // if (substr.count > string.count) return -1; 81 // if (substr.count == 0) return 0; 82 83 // We have two strings, a source string in str2, cnt2 and a pattern string 84 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1. 85 86 // For larger pattern and source we use a simplified Boyer Moore algorithm. 87 // With a small pattern and source we use linear scan. 88 89 if (icnt1 == -1) { 90 sub(result_tmp, cnt2, cnt1); 91 cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256 92 br(LT, LINEARSEARCH); 93 dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty 94 subs(zr, cnt1, 256); 95 lsr(tmp1, cnt2, 2); 96 ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM 97 br(GE, LINEARSTUB); 98 } 99 100 // The Boyer Moore alogorithm is based on the description here:- 101 // 102 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm 103 // 104 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule 105 // and the 'Good Suffix' rule. 106 // 107 // These rules are essentially heuristics for how far we can shift the 108 // pattern along the search string. 109 // 110 // The implementation here uses the 'Bad Character' rule only because of the 111 // complexity of initialisation for the 'Good Suffix' rule. 112 // 113 // This is also known as the Boyer-Moore-Horspool algorithm:- 114 // 115 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm 116 // 117 // This particular implementation has few java-specific optimizations. 118 // 119 // #define ASIZE 256 120 // 121 // int bm(unsigned char *x, int m, unsigned char *y, int n) { 122 // int i, j; 123 // unsigned c; 124 // unsigned char bc[ASIZE]; 125 // 126 // /* Preprocessing */ 127 // for (i = 0; i < ASIZE; ++i) 128 // bc[i] = m; 129 // for (i = 0; i < m - 1; ) { 130 // c = x[i]; 131 // ++i; 132 // // c < 256 for Latin1 string, so, no need for branch 133 // #ifdef PATTERN_STRING_IS_LATIN1 134 // bc[c] = m - i; 135 // #else 136 // if (c < ASIZE) bc[c] = m - i; 137 // #endif 138 // } 139 // 140 // /* Searching */ 141 // j = 0; 142 // while (j <= n - m) { 143 // c = y[i+j]; 144 // if (x[m-1] == c) 145 // for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i); 146 // if (i < 0) return j; 147 // // c < 256 for Latin1 string, so, no need for branch 148 // #ifdef SOURCE_STRING_IS_LATIN1 149 // // LL case: (c< 256) always true. Remove branch 150 // j += bc[y[j+m-1]]; 151 // #endif 152 // #ifndef PATTERN_STRING_IS_UTF 153 // // UU case: need if (c<ASIZE) check. Skip 1 character if not. 154 // if (c < ASIZE) 155 // j += bc[y[j+m-1]]; 156 // else 157 // j += 1 158 // #endif 159 // #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF 160 // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not. 161 // if (c < ASIZE) 162 // j += bc[y[j+m-1]]; 163 // else 164 // j += m 165 // #endif 166 // } 167 // } 168 169 if (icnt1 == -1) { 170 Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, 171 BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; 172 Register cnt1end = tmp2; 173 Register str2end = cnt2; 174 Register skipch = tmp2; 175 176 // str1 length is >=8, so, we can read at least 1 register for cases when 177 // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for 178 // UL case. We'll re-read last character in inner pre-loop code to have 179 // single outer pre-loop load 180 const int firstStep = isL ? 7 : 3; 181 182 const int ASIZE = 256; 183 const int STORED_BYTES = 32; // amount of bytes stored per instruction 184 sub(sp, sp, ASIZE); 185 mov(tmp5, ASIZE/STORED_BYTES); // loop iterations 186 mov(ch1, sp); 187 BIND(BM_INIT_LOOP); 188 stpq(v0, v0, Address(post(ch1, STORED_BYTES))); 189 subs(tmp5, tmp5, 1); 190 br(GT, BM_INIT_LOOP); 191 192 sub(cnt1tmp, cnt1, 1); 193 mov(tmp5, str2); 194 add(str2end, str2, result_tmp, LSL, str2_chr_shift); 195 sub(ch2, cnt1, 1); 196 mov(tmp3, str1); 197 BIND(BCLOOP); 198 (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size))); 199 if (!str1_isL) { 200 subs(zr, ch1, ASIZE); 201 br(HS, BCSKIP); 202 } 203 strb(ch2, Address(sp, ch1)); 204 BIND(BCSKIP); 205 subs(ch2, ch2, 1); 206 br(GT, BCLOOP); 207 208 add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1 209 if (str1_isL == str2_isL) { 210 // load last 8 bytes (8LL/4UU symbols) 211 ldr(tmp6, Address(tmp6, -wordSize)); 212 } else { 213 ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols) 214 // convert Latin1 to UTF. We'll have to wait until load completed, but 215 // it's still faster than per-character loads+checks 216 lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1] 217 ubfx(ch1, tmp6, 8, 8); // str1[N-2] 218 ubfx(ch2, tmp6, 16, 8); // str1[N-3] 219 andr(tmp6, tmp6, 0xFF); // str1[N-4] 220 orr(ch2, ch1, ch2, LSL, 16); 221 orr(tmp6, tmp6, tmp3, LSL, 48); 222 orr(tmp6, tmp6, ch2, LSL, 16); 223 } 224 BIND(BMLOOPSTR2); 225 (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 226 sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8 227 if (str1_isL == str2_isL) { 228 // re-init tmp3. It's for free because it's executed in parallel with 229 // load above. Alternative is to initialize it before loop, but it'll 230 // affect performance on in-order systems with 2 or more ld/st pipelines 231 lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size)); 232 } 233 if (!isL) { // UU/UL case 234 lsl(ch2, cnt1tmp, 1); // offset in bytes 235 } 236 cmp(tmp3, skipch); 237 br(NE, BMSKIP); 238 ldr(ch2, Address(str2, isL ? cnt1tmp : ch2)); 239 mov(ch1, tmp6); 240 if (isL) { 241 b(BMLOOPSTR1_AFTER_LOAD); 242 } else { 243 sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 244 b(BMLOOPSTR1_CMP); 245 } 246 BIND(BMLOOPSTR1); 247 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift))); 248 (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 249 BIND(BMLOOPSTR1_AFTER_LOAD); 250 subs(cnt1tmp, cnt1tmp, 1); 251 br(LT, BMLOOPSTR1_LASTCMP); 252 BIND(BMLOOPSTR1_CMP); 253 cmp(ch1, ch2); 254 br(EQ, BMLOOPSTR1); 255 BIND(BMSKIP); 256 if (!isL) { 257 // if we've met UTF symbol while searching Latin1 pattern, then we can 258 // skip cnt1 symbols 259 if (str1_isL != str2_isL) { 260 mov(result_tmp, cnt1); 261 } else { 262 mov(result_tmp, 1); 263 } 264 subs(zr, skipch, ASIZE); 265 br(HS, BMADV); 266 } 267 ldrb(result_tmp, Address(sp, skipch)); // load skip distance 268 BIND(BMADV); 269 sub(cnt1tmp, cnt1, 1); 270 add(str2, str2, result_tmp, LSL, str2_chr_shift); 271 cmp(str2, str2end); 272 br(LE, BMLOOPSTR2); 273 add(sp, sp, ASIZE); 274 b(NOMATCH); 275 BIND(BMLOOPSTR1_LASTCMP); 276 cmp(ch1, ch2); 277 br(NE, BMSKIP); 278 BIND(BMMATCH); 279 sub(result, str2, tmp5); 280 if (!str2_isL) lsr(result, result, 1); 281 add(sp, sp, ASIZE); 282 b(DONE); 283 284 BIND(LINEARSTUB); 285 cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm 286 br(LT, LINEAR_MEDIUM); 287 mov(result, zr); 288 RuntimeAddress stub = NULL; 289 if (isL) { 290 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll()); 291 assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); 292 } else if (str1_isL) { 293 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul()); 294 assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); 295 } else { 296 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu()); 297 assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); 298 } 299 trampoline_call(stub); 300 b(DONE); 301 } 302 303 BIND(LINEARSEARCH); 304 { 305 Label DO1, DO2, DO3; 306 307 Register str2tmp = tmp2; 308 Register first = tmp3; 309 310 if (icnt1 == -1) 311 { 312 Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; 313 314 cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2)); 315 br(LT, DOSHORT); 316 BIND(LINEAR_MEDIUM); 317 (this->*str1_load_1chr)(first, Address(str1)); 318 lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift))); 319 sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift); 320 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 321 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 322 323 BIND(FIRST_LOOP); 324 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg)); 325 cmp(first, ch2); 326 br(EQ, STR1_LOOP); 327 BIND(STR2_NEXT); 328 adds(cnt2_neg, cnt2_neg, str2_chr_size); 329 br(LE, FIRST_LOOP); 330 b(NOMATCH); 331 332 BIND(STR1_LOOP); 333 adds(cnt1tmp, cnt1_neg, str1_chr_size); 334 add(cnt2tmp, cnt2_neg, str2_chr_size); 335 br(GE, MATCH); 336 337 BIND(STR1_NEXT); 338 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp)); 339 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 340 cmp(ch1, ch2); 341 br(NE, STR2_NEXT); 342 adds(cnt1tmp, cnt1tmp, str1_chr_size); 343 add(cnt2tmp, cnt2tmp, str2_chr_size); 344 br(LT, STR1_NEXT); 345 b(MATCH); 346 347 BIND(DOSHORT); 348 if (str1_isL == str2_isL) { 349 cmp(cnt1, (u1)2); 350 br(LT, DO1); 351 br(GT, DO3); 352 } 353 } 354 355 if (icnt1 == 4) { 356 Label CH1_LOOP; 357 358 (this->*load_4chr)(ch1, str1); 359 sub(result_tmp, cnt2, 4); 360 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 361 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 362 363 BIND(CH1_LOOP); 364 (this->*load_4chr)(ch2, Address(str2, cnt2_neg)); 365 cmp(ch1, ch2); 366 br(EQ, MATCH); 367 adds(cnt2_neg, cnt2_neg, str2_chr_size); 368 br(LE, CH1_LOOP); 369 b(NOMATCH); 370 } 371 372 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) { 373 Label CH1_LOOP; 374 375 BIND(DO2); 376 (this->*load_2chr)(ch1, str1); 377 if (icnt1 == 2) { 378 sub(result_tmp, cnt2, 2); 379 } 380 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 381 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 382 BIND(CH1_LOOP); 383 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 384 cmp(ch1, ch2); 385 br(EQ, MATCH); 386 adds(cnt2_neg, cnt2_neg, str2_chr_size); 387 br(LE, CH1_LOOP); 388 b(NOMATCH); 389 } 390 391 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) { 392 Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; 393 394 BIND(DO3); 395 (this->*load_2chr)(first, str1); 396 (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size)); 397 if (icnt1 == 3) { 398 sub(result_tmp, cnt2, 3); 399 } 400 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 401 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 402 BIND(FIRST_LOOP); 403 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 404 cmpw(first, ch2); 405 br(EQ, STR1_LOOP); 406 BIND(STR2_NEXT); 407 adds(cnt2_neg, cnt2_neg, str2_chr_size); 408 br(LE, FIRST_LOOP); 409 b(NOMATCH); 410 411 BIND(STR1_LOOP); 412 add(cnt2tmp, cnt2_neg, 2*str2_chr_size); 413 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 414 cmp(ch1, ch2); 415 br(NE, STR2_NEXT); 416 b(MATCH); 417 } 418 419 if (icnt1 == -1 || icnt1 == 1) { 420 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP; 421 422 BIND(DO1); 423 (this->*str1_load_1chr)(ch1, str1); 424 cmp(cnt2, (u1)8); 425 br(LT, DO1_SHORT); 426 427 sub(result_tmp, cnt2, 8/str2_chr_size); 428 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 429 mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001); 430 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 431 432 if (str2_isL) { 433 orr(ch1, ch1, ch1, LSL, 8); 434 } 435 orr(ch1, ch1, ch1, LSL, 16); 436 orr(ch1, ch1, ch1, LSL, 32); 437 BIND(CH1_LOOP); 438 ldr(ch2, Address(str2, cnt2_neg)); 439 eor(ch2, ch1, ch2); 440 sub(tmp1, ch2, tmp3); 441 orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 442 bics(tmp1, tmp1, tmp2); 443 br(NE, HAS_ZERO); 444 adds(cnt2_neg, cnt2_neg, 8); 445 br(LT, CH1_LOOP); 446 447 cmp(cnt2_neg, (u1)8); 448 mov(cnt2_neg, 0); 449 br(LT, CH1_LOOP); 450 b(NOMATCH); 451 452 BIND(HAS_ZERO); 453 rev(tmp1, tmp1); 454 clz(tmp1, tmp1); 455 add(cnt2_neg, cnt2_neg, tmp1, LSR, 3); 456 b(MATCH); 457 458 BIND(DO1_SHORT); 459 mov(result_tmp, cnt2); 460 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift))); 461 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift); 462 BIND(DO1_LOOP); 463 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg)); 464 cmpw(ch1, ch2); 465 br(EQ, MATCH); 466 adds(cnt2_neg, cnt2_neg, str2_chr_size); 467 br(LT, DO1_LOOP); 468 } 469 } 470 BIND(NOMATCH); 471 mov(result, -1); 472 b(DONE); 473 BIND(MATCH); 474 add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift); 475 BIND(DONE); 476 } 477 478 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); 479 typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn); 480 481 void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, 482 Register ch, Register result, 483 Register tmp1, Register tmp2, Register tmp3) 484 { 485 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; 486 Register cnt1_neg = cnt1; 487 Register ch1 = rscratch1; 488 Register result_tmp = rscratch2; 489 490 cbz(cnt1, NOMATCH); 491 492 cmp(cnt1, (u1)4); 493 br(LT, DO1_SHORT); 494 495 orr(ch, ch, ch, LSL, 16); 496 orr(ch, ch, ch, LSL, 32); 497 498 sub(cnt1, cnt1, 4); 499 mov(result_tmp, cnt1); 500 lea(str1, Address(str1, cnt1, Address::uxtw(1))); 501 sub(cnt1_neg, zr, cnt1, LSL, 1); 502 503 mov(tmp3, 0x0001000100010001); 504 505 BIND(CH1_LOOP); 506 ldr(ch1, Address(str1, cnt1_neg)); 507 eor(ch1, ch, ch1); 508 sub(tmp1, ch1, tmp3); 509 orr(tmp2, ch1, 0x7fff7fff7fff7fff); 510 bics(tmp1, tmp1, tmp2); 511 br(NE, HAS_ZERO); 512 adds(cnt1_neg, cnt1_neg, 8); 513 br(LT, CH1_LOOP); 514 515 cmp(cnt1_neg, (u1)8); 516 mov(cnt1_neg, 0); 517 br(LT, CH1_LOOP); 518 b(NOMATCH); 519 520 BIND(HAS_ZERO); 521 rev(tmp1, tmp1); 522 clz(tmp1, tmp1); 523 add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); 524 b(MATCH); 525 526 BIND(DO1_SHORT); 527 mov(result_tmp, cnt1); 528 lea(str1, Address(str1, cnt1, Address::uxtw(1))); 529 sub(cnt1_neg, zr, cnt1, LSL, 1); 530 BIND(DO1_LOOP); 531 ldrh(ch1, Address(str1, cnt1_neg)); 532 cmpw(ch, ch1); 533 br(EQ, MATCH); 534 adds(cnt1_neg, cnt1_neg, 2); 535 br(LT, DO1_LOOP); 536 BIND(NOMATCH); 537 mov(result, -1); 538 b(DONE); 539 BIND(MATCH); 540 add(result, result_tmp, cnt1_neg, ASR, 1); 541 BIND(DONE); 542 } 543 544 void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, 545 Register ch, Register result, 546 Register tmp1, Register tmp2, Register tmp3) 547 { 548 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; 549 Register cnt1_neg = cnt1; 550 Register ch1 = rscratch1; 551 Register result_tmp = rscratch2; 552 553 cbz(cnt1, NOMATCH); 554 555 cmp(cnt1, (u1)8); 556 br(LT, DO1_SHORT); 557 558 orr(ch, ch, ch, LSL, 8); 559 orr(ch, ch, ch, LSL, 16); 560 orr(ch, ch, ch, LSL, 32); 561 562 sub(cnt1, cnt1, 8); 563 mov(result_tmp, cnt1); 564 lea(str1, Address(str1, cnt1)); 565 sub(cnt1_neg, zr, cnt1); 566 567 mov(tmp3, 0x0101010101010101); 568 569 BIND(CH1_LOOP); 570 ldr(ch1, Address(str1, cnt1_neg)); 571 eor(ch1, ch, ch1); 572 sub(tmp1, ch1, tmp3); 573 orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f); 574 bics(tmp1, tmp1, tmp2); 575 br(NE, HAS_ZERO); 576 adds(cnt1_neg, cnt1_neg, 8); 577 br(LT, CH1_LOOP); 578 579 cmp(cnt1_neg, (u1)8); 580 mov(cnt1_neg, 0); 581 br(LT, CH1_LOOP); 582 b(NOMATCH); 583 584 BIND(HAS_ZERO); 585 rev(tmp1, tmp1); 586 clz(tmp1, tmp1); 587 add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); 588 b(MATCH); 589 590 BIND(DO1_SHORT); 591 mov(result_tmp, cnt1); 592 lea(str1, Address(str1, cnt1)); 593 sub(cnt1_neg, zr, cnt1); 594 BIND(DO1_LOOP); 595 ldrb(ch1, Address(str1, cnt1_neg)); 596 cmp(ch, ch1); 597 br(EQ, MATCH); 598 adds(cnt1_neg, cnt1_neg, 1); 599 br(LT, DO1_LOOP); 600 BIND(NOMATCH); 601 mov(result, -1); 602 b(DONE); 603 BIND(MATCH); 604 add(result, result_tmp, cnt1_neg); 605 BIND(DONE); 606 } 607 608 // Compare strings. 609 void C2_MacroAssembler::string_compare(Register str1, Register str2, 610 Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, 611 FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) { 612 Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, 613 DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, 614 SHORT_LOOP_START, TAIL_CHECK; 615 616 bool isLL = ae == StrIntrinsicNode::LL; 617 bool isLU = ae == StrIntrinsicNode::LU; 618 bool isUL = ae == StrIntrinsicNode::UL; 619 620 // The stub threshold for LL strings is: 72 (64 + 8) chars 621 // UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch) 622 // LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least) 623 const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36); 624 625 bool str1_isL = isLL || isLU; 626 bool str2_isL = isLL || isUL; 627 628 int str1_chr_shift = str1_isL ? 0 : 1; 629 int str2_chr_shift = str2_isL ? 0 : 1; 630 int str1_chr_size = str1_isL ? 1 : 2; 631 int str2_chr_size = str2_isL ? 1 : 2; 632 int minCharsInWord = isLL ? wordSize : wordSize/2; 633 634 FloatRegister vtmpZ = vtmp1, vtmp = vtmp2; 635 chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb : 636 (chr_insn)&MacroAssembler::ldrh; 637 chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb : 638 (chr_insn)&MacroAssembler::ldrh; 639 uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw : 640 (uxt_insn)&MacroAssembler::uxthw; 641 642 BLOCK_COMMENT("string_compare {"); 643 644 // Bizzarely, the counts are passed in bytes, regardless of whether they 645 // are L or U strings, however the result is always in characters. 646 if (!str1_isL) asrw(cnt1, cnt1, 1); 647 if (!str2_isL) asrw(cnt2, cnt2, 1); 648 649 // Compute the minimum of the string lengths and save the difference. 650 subsw(result, cnt1, cnt2); 651 cselw(cnt2, cnt1, cnt2, Assembler::LE); // min 652 653 // A very short string 654 cmpw(cnt2, minCharsInWord); 655 br(Assembler::LE, SHORT_STRING); 656 657 // Compare longwords 658 // load first parts of strings and finish initialization while loading 659 { 660 if (str1_isL == str2_isL) { // LL or UU 661 ldr(tmp1, Address(str1)); 662 cmp(str1, str2); 663 br(Assembler::EQ, DONE); 664 ldr(tmp2, Address(str2)); 665 cmp(cnt2, stub_threshold); 666 br(GE, STUB); 667 subsw(cnt2, cnt2, minCharsInWord); 668 br(EQ, TAIL_CHECK); 669 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift))); 670 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift))); 671 sub(cnt2, zr, cnt2, LSL, str2_chr_shift); 672 } else if (isLU) { 673 ldrs(vtmp, Address(str1)); 674 ldr(tmp2, Address(str2)); 675 cmp(cnt2, stub_threshold); 676 br(GE, STUB); 677 subw(cnt2, cnt2, 4); 678 eor(vtmpZ, T16B, vtmpZ, vtmpZ); 679 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift))); 680 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift))); 681 zip1(vtmp, T8B, vtmp, vtmpZ); 682 sub(cnt1, zr, cnt2, LSL, str1_chr_shift); 683 sub(cnt2, zr, cnt2, LSL, str2_chr_shift); 684 add(cnt1, cnt1, 4); 685 fmovd(tmp1, vtmp); 686 } else { // UL case 687 ldr(tmp1, Address(str1)); 688 ldrs(vtmp, Address(str2)); 689 cmp(cnt2, stub_threshold); 690 br(GE, STUB); 691 subw(cnt2, cnt2, 4); 692 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift))); 693 eor(vtmpZ, T16B, vtmpZ, vtmpZ); 694 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift))); 695 sub(cnt1, zr, cnt2, LSL, str1_chr_shift); 696 zip1(vtmp, T8B, vtmp, vtmpZ); 697 sub(cnt2, zr, cnt2, LSL, str2_chr_shift); 698 add(cnt1, cnt1, 8); 699 fmovd(tmp2, vtmp); 700 } 701 adds(cnt2, cnt2, isUL ? 4 : 8); 702 br(GE, TAIL); 703 eor(rscratch2, tmp1, tmp2); 704 cbnz(rscratch2, DIFF); 705 // main loop 706 bind(NEXT_WORD); 707 if (str1_isL == str2_isL) { 708 ldr(tmp1, Address(str1, cnt2)); 709 ldr(tmp2, Address(str2, cnt2)); 710 adds(cnt2, cnt2, 8); 711 } else if (isLU) { 712 ldrs(vtmp, Address(str1, cnt1)); 713 ldr(tmp2, Address(str2, cnt2)); 714 add(cnt1, cnt1, 4); 715 zip1(vtmp, T8B, vtmp, vtmpZ); 716 fmovd(tmp1, vtmp); 717 adds(cnt2, cnt2, 8); 718 } else { // UL 719 ldrs(vtmp, Address(str2, cnt2)); 720 ldr(tmp1, Address(str1, cnt1)); 721 zip1(vtmp, T8B, vtmp, vtmpZ); 722 add(cnt1, cnt1, 8); 723 fmovd(tmp2, vtmp); 724 adds(cnt2, cnt2, 4); 725 } 726 br(GE, TAIL); 727 728 eor(rscratch2, tmp1, tmp2); 729 cbz(rscratch2, NEXT_WORD); 730 b(DIFF); 731 bind(TAIL); 732 eor(rscratch2, tmp1, tmp2); 733 cbnz(rscratch2, DIFF); 734 // Last longword. In the case where length == 4 we compare the 735 // same longword twice, but that's still faster than another 736 // conditional branch. 737 if (str1_isL == str2_isL) { 738 ldr(tmp1, Address(str1)); 739 ldr(tmp2, Address(str2)); 740 } else if (isLU) { 741 ldrs(vtmp, Address(str1)); 742 ldr(tmp2, Address(str2)); 743 zip1(vtmp, T8B, vtmp, vtmpZ); 744 fmovd(tmp1, vtmp); 745 } else { // UL 746 ldrs(vtmp, Address(str2)); 747 ldr(tmp1, Address(str1)); 748 zip1(vtmp, T8B, vtmp, vtmpZ); 749 fmovd(tmp2, vtmp); 750 } 751 bind(TAIL_CHECK); 752 eor(rscratch2, tmp1, tmp2); 753 cbz(rscratch2, DONE); 754 755 // Find the first different characters in the longwords and 756 // compute their difference. 757 bind(DIFF); 758 rev(rscratch2, rscratch2); 759 clz(rscratch2, rscratch2); 760 andr(rscratch2, rscratch2, isLL ? -8 : -16); 761 lsrv(tmp1, tmp1, rscratch2); 762 (this->*ext_chr)(tmp1, tmp1); 763 lsrv(tmp2, tmp2, rscratch2); 764 (this->*ext_chr)(tmp2, tmp2); 765 subw(result, tmp1, tmp2); 766 b(DONE); 767 } 768 769 bind(STUB); 770 RuntimeAddress stub = NULL; 771 switch(ae) { 772 case StrIntrinsicNode::LL: 773 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL()); 774 break; 775 case StrIntrinsicNode::UU: 776 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU()); 777 break; 778 case StrIntrinsicNode::LU: 779 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU()); 780 break; 781 case StrIntrinsicNode::UL: 782 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL()); 783 break; 784 default: 785 ShouldNotReachHere(); 786 } 787 assert(stub.target() != NULL, "compare_long_string stub has not been generated"); 788 trampoline_call(stub); 789 b(DONE); 790 791 bind(SHORT_STRING); 792 // Is the minimum length zero? 793 cbz(cnt2, DONE); 794 // arrange code to do most branches while loading and loading next characters 795 // while comparing previous 796 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size))); 797 subs(cnt2, cnt2, 1); 798 br(EQ, SHORT_LAST_INIT); 799 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size))); 800 b(SHORT_LOOP_START); 801 bind(SHORT_LOOP); 802 subs(cnt2, cnt2, 1); 803 br(EQ, SHORT_LAST); 804 bind(SHORT_LOOP_START); 805 (this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size))); 806 (this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size))); 807 cmp(tmp1, cnt1); 808 br(NE, SHORT_LOOP_TAIL); 809 subs(cnt2, cnt2, 1); 810 br(EQ, SHORT_LAST2); 811 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size))); 812 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size))); 813 cmp(tmp2, rscratch1); 814 br(EQ, SHORT_LOOP); 815 sub(result, tmp2, rscratch1); 816 b(DONE); 817 bind(SHORT_LOOP_TAIL); 818 sub(result, tmp1, cnt1); 819 b(DONE); 820 bind(SHORT_LAST2); 821 cmp(tmp2, rscratch1); 822 br(EQ, DONE); 823 sub(result, tmp2, rscratch1); 824 825 b(DONE); 826 bind(SHORT_LAST_INIT); 827 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size))); 828 bind(SHORT_LAST); 829 cmp(tmp1, cnt1); 830 br(EQ, DONE); 831 sub(result, tmp1, cnt1); 832 833 bind(DONE); 834 835 BLOCK_COMMENT("} string_compare"); 836 } 837 838 void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, 839 FloatRegister src2, int cond, bool isQ) { 840 SIMD_Arrangement size = esize2arrangement(type2aelembytes(bt), isQ); 841 if (bt == T_FLOAT || bt == T_DOUBLE) { 842 switch (cond) { 843 case BoolTest::eq: fcmeq(dst, size, src1, src2); break; 844 case BoolTest::ne: { 845 fcmeq(dst, size, src1, src2); 846 notr(dst, T16B, dst); 847 break; 848 } 849 case BoolTest::ge: fcmge(dst, size, src1, src2); break; 850 case BoolTest::gt: fcmgt(dst, size, src1, src2); break; 851 case BoolTest::le: fcmge(dst, size, src2, src1); break; 852 case BoolTest::lt: fcmgt(dst, size, src2, src1); break; 853 default: 854 assert(false, "unsupported"); 855 ShouldNotReachHere(); 856 } 857 } else { 858 switch (cond) { 859 case BoolTest::eq: cmeq(dst, size, src1, src2); break; 860 case BoolTest::ne: { 861 cmeq(dst, size, src1, src2); 862 notr(dst, T16B, dst); 863 break; 864 } 865 case BoolTest::ge: cmge(dst, size, src1, src2); break; 866 case BoolTest::gt: cmgt(dst, size, src1, src2); break; 867 case BoolTest::le: cmge(dst, size, src2, src1); break; 868 case BoolTest::lt: cmgt(dst, size, src2, src1); break; 869 case BoolTest::uge: cmhs(dst, size, src1, src2); break; 870 case BoolTest::ugt: cmhi(dst, size, src1, src2); break; 871 case BoolTest::ult: cmhi(dst, size, src2, src1); break; 872 case BoolTest::ule: cmhs(dst, size, src2, src1); break; 873 default: 874 assert(false, "unsupported"); 875 ShouldNotReachHere(); 876 } 877 } 878 }