1 /* 2 * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "opto/c2_CodeStubs.hpp" 29 #include "opto/c2_MacroAssembler.hpp" 30 #include "opto/compile.hpp" 31 #include "opto/output.hpp" 32 #include "opto/intrinsicnode.hpp" 33 #include "opto/subnode.hpp" 34 #include "runtime/stubRoutines.hpp" 35 #include "utilities/globalDefinitions.hpp" 36 37 #ifdef PRODUCT 38 #define BLOCK_COMMENT(str) /* nothing */ 39 #define STOP(error) stop(error) 40 #else 41 #define BLOCK_COMMENT(str) block_comment(str) 42 #define STOP(error) block_comment(error); stop(error) 43 #endif 44 45 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 46 47 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); 48 49 void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register tmpReg, 50 Register tmp2Reg, Register tmp3Reg) { 51 Register oop = objectReg; 52 Register box = boxReg; 53 Register disp_hdr = tmpReg; 54 Register tmp = tmp2Reg; 55 Label cont; 56 Label object_has_monitor; 57 Label cas_failed; 58 59 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight"); 60 assert_different_registers(oop, box, tmp, disp_hdr); 61 62 // Load markWord from object into displaced_header. 63 ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); 64 65 if (DiagnoseSyncOnValueBasedClasses != 0) { 66 load_klass(tmp, oop); 67 ldrw(tmp, Address(tmp, Klass::access_flags_offset())); 68 tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS); 69 br(Assembler::NE, cont); 70 } 71 72 if (UseBiasedLocking && !UseOptoBiasInlining) { 73 biased_locking_enter(box, oop, disp_hdr, tmp, true, cont); 74 } 75 76 // Check for existing monitor 77 tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor); 78 79 if (LockingMode == LM_MONITOR) { 80 tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0. 81 b(cont); 82 } else { 83 assert(LockingMode == LM_LEGACY, "must be"); 84 // Set tmp to be (markWord of object | UNLOCK_VALUE). 85 orr(tmp, disp_hdr, markWord::unlocked_value); 86 87 // Initialize the box. (Must happen before we update the object mark!) 88 str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); 89 90 // Compare object markWord with an unlocked value (tmp) and if 91 // equal exchange the stack address of our box with object markWord. 92 // On failure disp_hdr contains the possibly locked markWord. 93 cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true, 94 /*release*/ true, /*weak*/ false, disp_hdr); 95 br(Assembler::EQ, cont); 96 97 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); 98 99 // If the compare-and-exchange succeeded, then we found an unlocked 100 // object, will have now locked it will continue at label cont 101 102 bind(cas_failed); 103 // We did not see an unlocked object so try the fast recursive case. 104 105 // Check if the owner is self by comparing the value in the 106 // markWord of object (disp_hdr) with the stack pointer. 107 mov(rscratch1, sp); 108 sub(disp_hdr, disp_hdr, rscratch1); 109 mov(tmp, (address) (~(os::vm_page_size()-1) | markWord::lock_mask_in_place)); 110 // If condition is true we are cont and hence we can store 0 as the 111 // displaced header in the box, which indicates that it is a recursive lock. 112 ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result 113 str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); 114 b(cont); 115 } 116 117 // Handle existing monitor. 118 bind(object_has_monitor); 119 120 // The object's monitor m is unlocked iff m->owner == NULL, 121 // otherwise m->owner may contain a thread or a stack address. 122 // 123 // Try to CAS m->owner from NULL to current thread. 124 add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markWord::monitor_value)); 125 cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true, 126 /*release*/ true, /*weak*/ false, rscratch1); // Sets flags for result 127 128 // Store a non-null value into the box to avoid looking like a re-entrant 129 // lock. The fast-path monitor unlock code checks for 130 // markWord::monitor_value so use markWord::unused_mark which has the 131 // relevant bit set, and also matches ObjectSynchronizer::enter. 132 mov(tmp, (address)markWord::unused_mark().value()); 133 str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); 134 135 br(Assembler::EQ, cont); // CAS success means locking succeeded 136 137 cmp(rscratch1, rthread); 138 br(Assembler::NE, cont); // Check for recursive locking 139 140 // Recursive lock case 141 increment(Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value), 1); 142 // flag == EQ still from the cmp above, checking if this is a reentrant lock 143 144 bind(cont); 145 // flag == EQ indicates success 146 // flag == NE indicates failure 147 } 148 149 void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Register tmpReg, 150 Register tmp2Reg) { 151 Register oop = objectReg; 152 Register box = boxReg; 153 Register disp_hdr = tmpReg; 154 Register tmp = tmp2Reg; 155 Label cont; 156 Label object_has_monitor; 157 158 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight"); 159 assert_different_registers(oop, box, tmp, disp_hdr); 160 161 if (UseBiasedLocking && !UseOptoBiasInlining) { 162 biased_locking_exit(oop, tmp, cont); 163 } 164 165 if (LockingMode == LM_LEGACY) { 166 // Find the lock address and load the displaced header from the stack. 167 ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); 168 169 // If the displaced header is 0, we have a recursive unlock. 170 cmp(disp_hdr, zr); 171 br(Assembler::EQ, cont); 172 } 173 174 // Handle existing monitor. 175 ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); 176 tbnz(tmp, exact_log2(markWord::monitor_value), object_has_monitor); 177 178 if (LockingMode == LM_MONITOR) { 179 tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0. 180 b(cont); 181 } else { 182 assert(LockingMode == LM_LEGACY, "must be"); 183 // Check if it is still a light weight lock, this is is true if we 184 // see the stack address of the basicLock in the markWord of the 185 // object. 186 187 cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false, 188 /*release*/ true, /*weak*/ false, tmp); 189 b(cont); 190 } 191 192 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); 193 194 // Handle existing monitor. 195 bind(object_has_monitor); 196 STATIC_ASSERT(markWord::monitor_value <= INT_MAX); 197 add(tmp, tmp, -(int)markWord::monitor_value); // monitor 198 199 ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); 200 201 Label notRecursive; 202 cbz(disp_hdr, notRecursive); 203 204 // Recursive lock 205 sub(disp_hdr, disp_hdr, 1u); 206 str(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); 207 cmp(disp_hdr, disp_hdr); // Sets flags for result 208 b(cont); 209 210 bind(notRecursive); 211 ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); 212 ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); 213 orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0. 214 cmp(rscratch1, zr); // Sets flags for result 215 cbnz(rscratch1, cont); 216 // need a release store here 217 lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); 218 stlr(zr, tmp); // set unowned 219 220 bind(cont); 221 // flag == EQ indicates success 222 // flag == NE indicates failure 223 } 224 225 void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register t1, 226 Register t2, Register t3) { 227 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 228 assert_different_registers(obj, t1, t2, t3); 229 230 // Handle inflated monitor. 231 Label inflated; 232 // Finish fast lock successfully. MUST branch to with flag == EQ 233 Label locked; 234 // Finish fast lock unsuccessfully. MUST branch to with flag == NE 235 Label slow_path; 236 237 if (DiagnoseSyncOnValueBasedClasses != 0) { 238 load_klass(t1, obj); 239 ldrw(t1, Address(t1, Klass::access_flags_offset())); 240 tstw(t1, JVM_ACC_IS_VALUE_BASED_CLASS); 241 br(Assembler::NE, slow_path); 242 } 243 244 const Register t1_mark = t1; 245 246 { // Lightweight locking 247 248 // Push lock to the lock stack and finish successfully. MUST branch to with flag == EQ 249 Label push; 250 251 const Register t2_top = t2; 252 const Register t3_t = t3; 253 254 // Check if lock-stack is full. 255 ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); 256 cmpw(t2_top, (unsigned)LockStack::end_offset() - 1); 257 br(Assembler::GT, slow_path); 258 259 // Check if recursive. 260 subw(t3_t, t2_top, oopSize); 261 ldr(t3_t, Address(rthread, t3_t)); 262 cmp(obj, t3_t); 263 br(Assembler::EQ, push); 264 265 // Relaxed normal load to check for monitor. Optimization for monitor case. 266 ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); 267 tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated); 268 269 // Not inflated 270 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea"); 271 272 // Try to lock. Transition lock-bits 0b01 => 0b00 273 orr(t1_mark, t1_mark, markWord::unlocked_value); 274 eor(t3_t, t1_mark, markWord::unlocked_value); 275 cmpxchg(/*addr*/ obj, /*expected*/ t1_mark, /*new*/ t3_t, Assembler::xword, 276 /*acquire*/ true, /*release*/ false, /*weak*/ false, noreg); 277 br(Assembler::NE, slow_path); 278 279 bind(push); 280 // After successful lock, push object on lock-stack. 281 str(obj, Address(rthread, t2_top)); 282 addw(t2_top, t2_top, oopSize); 283 strw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); 284 b(locked); 285 } 286 287 { // Handle inflated monitor. 288 bind(inflated); 289 290 // mark contains the tagged ObjectMonitor*. 291 const Register t1_tagged_monitor = t1_mark; 292 const uintptr_t monitor_tag = markWord::monitor_value; 293 const Register t2_owner_addr = t2; 294 const Register t3_owner = t3; 295 296 // Compute owner address. 297 lea(t2_owner_addr, Address(t1_tagged_monitor, ObjectMonitor::owner_offset_in_bytes() - monitor_tag)); 298 299 // CAS owner (null => current thread). 300 cmpxchg(t2_owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true, 301 /*release*/ false, /*weak*/ false, t3_owner); 302 br(Assembler::EQ, locked); 303 304 // Check if recursive. 305 cmp(t3_owner, rthread); 306 br(Assembler::NE, slow_path); 307 308 // Recursive. 309 increment(Address(t1_tagged_monitor, ObjectMonitor::recursions_offset_in_bytes() - monitor_tag), 1); 310 } 311 312 bind(locked); 313 #ifdef ASSERT 314 // Check that locked label is reached with Flags == EQ. 315 Label flag_correct; 316 br(Assembler::EQ, flag_correct); 317 stop("Fast Lock Flag != EQ"); 318 #endif 319 320 bind(slow_path); 321 #ifdef ASSERT 322 // Check that slow_path label is reached with Flags == NE. 323 br(Assembler::NE, flag_correct); 324 stop("Fast Lock Flag != NE"); 325 bind(flag_correct); 326 #endif 327 // C2 uses the value of Flags (NE vs EQ) to determine the continuation. 328 } 329 330 void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Register t2, 331 Register t3) { 332 assert(LockingMode == LM_LIGHTWEIGHT, "must be"); 333 assert_different_registers(obj, t1, t2, t3); 334 335 // Handle inflated monitor. 336 Label inflated, inflated_load_monitor; 337 // Finish fast unlock successfully. MUST branch to with flag == EQ 338 Label unlocked; 339 // Finish fast unlock unsuccessfully. MUST branch to with flag == NE 340 Label slow_path; 341 342 const Register t1_mark = t1; 343 const Register t2_top = t2; 344 const Register t3_t = t3; 345 346 { // Lightweight unlock 347 348 // Check if obj is top of lock-stack. 349 ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); 350 subw(t2_top, t2_top, oopSize); 351 ldr(t3_t, Address(rthread, t2_top)); 352 cmp(obj, t3_t); 353 // Top of lock stack was not obj. Must be monitor. 354 br(Assembler::NE, inflated_load_monitor); 355 356 // Pop lock-stack. 357 DEBUG_ONLY(str(zr, Address(rthread, t2_top));) 358 strw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); 359 360 // Check if recursive. 361 subw(t3_t, t2_top, oopSize); 362 ldr(t3_t, Address(rthread, t3_t)); 363 cmp(obj, t3_t); 364 br(Assembler::EQ, unlocked); 365 366 // Not recursive. 367 // Load Mark. 368 ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); 369 370 // Check header for monitor (0b10). 371 tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated); 372 373 // Try to unlock. Transition lock bits 0b00 => 0b01 374 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); 375 orr(t3_t, t1_mark, markWord::unlocked_value); 376 cmpxchg(/*addr*/ obj, /*expected*/ t1_mark, /*new*/ t3_t, Assembler::xword, 377 /*acquire*/ false, /*release*/ true, /*weak*/ false, noreg); 378 br(Assembler::EQ, unlocked); 379 380 // Compare and exchange failed. 381 // Restore lock-stack and handle the unlock in runtime. 382 DEBUG_ONLY(str(obj, Address(rthread, t2_top));) 383 addw(t2_top, t2_top, oopSize); 384 str(t2_top, Address(rthread, JavaThread::lock_stack_top_offset())); 385 b(slow_path); 386 } 387 388 389 { // Handle inflated monitor. 390 bind(inflated_load_monitor); 391 ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); 392 #ifdef ASSERT 393 tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated); 394 stop("Fast Unlock not monitor"); 395 #endif 396 397 bind(inflated); 398 399 #ifdef ASSERT 400 Label check_done; 401 subw(t2_top, t2_top, oopSize); 402 cmpw(t2_top, in_bytes(JavaThread::lock_stack_base_offset())); 403 br(Assembler::LT, check_done); 404 ldr(t3_t, Address(rthread, t2_top)); 405 cmp(obj, t3_t); 406 br(Assembler::NE, inflated); 407 stop("Fast Unlock lock on stack"); 408 bind(check_done); 409 #endif 410 411 // mark contains the tagged ObjectMonitor*. 412 const Register t1_monitor = t1_mark; 413 const uintptr_t monitor_tag = markWord::monitor_value; 414 415 // Untag the monitor. 416 sub(t1_monitor, t1_mark, monitor_tag); 417 418 const Register t2_recursions = t2; 419 Label not_recursive; 420 421 // Check if recursive. 422 ldr(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset_in_bytes())); 423 cbz(t2_recursions, not_recursive); 424 425 // Recursive unlock. 426 sub(t2_recursions, t2_recursions, 1u); 427 str(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset_in_bytes())); 428 // Set flag == EQ 429 cmp(t2_recursions, t2_recursions); 430 b(unlocked); 431 432 bind(not_recursive); 433 434 Label release; 435 const Register t2_owner_addr = t2; 436 437 // Compute owner address. 438 lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset_in_bytes())); 439 440 // Check if the entry lists are empty. 441 ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset_in_bytes())); 442 ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset_in_bytes())); 443 orr(rscratch1, rscratch1, t3_t); 444 cmp(rscratch1, zr); 445 br(Assembler::EQ, release); 446 447 // The owner may be anonymous and we removed the last obj entry in 448 // the lock-stack. This loses the information about the owner. 449 // Write the thread to the owner field so the runtime knows the owner. 450 str(rthread, Address(t2_owner_addr)); 451 b(slow_path); 452 453 bind(release); 454 // Set owner to null. 455 // Release to satisfy the JMM 456 stlr(zr, t2_owner_addr); 457 } 458 459 bind(unlocked); 460 #ifdef ASSERT 461 // Check that unlocked label is reached with Flags == EQ. 462 Label flag_correct; 463 br(Assembler::EQ, flag_correct); 464 stop("Fast Unlock Flag != EQ"); 465 #endif 466 467 bind(slow_path); 468 #ifdef ASSERT 469 // Check that slow_path label is reached with Flags == NE. 470 br(Assembler::NE, flag_correct); 471 stop("Fast Unlock Flag != NE"); 472 bind(flag_correct); 473 #endif 474 // C2 uses the value of Flags (NE vs EQ) to determine the continuation. 475 } 476 477 // Search for str1 in str2 and return index or -1 478 // Clobbers: rscratch1, rscratch2, rflags. May also clobber v0-v1, when icnt1==-1. 479 void C2_MacroAssembler::string_indexof(Register str2, Register str1, 480 Register cnt2, Register cnt1, 481 Register tmp1, Register tmp2, 482 Register tmp3, Register tmp4, 483 Register tmp5, Register tmp6, 484 int icnt1, Register result, int ae) { 485 // NOTE: tmp5, tmp6 can be zr depending on specific method version 486 Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH; 487 488 Register ch1 = rscratch1; 489 Register ch2 = rscratch2; 490 Register cnt1tmp = tmp1; 491 Register cnt2tmp = tmp2; 492 Register cnt1_neg = cnt1; 493 Register cnt2_neg = cnt2; 494 Register result_tmp = tmp4; 495 496 bool isL = ae == StrIntrinsicNode::LL; 497 498 bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; 499 bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; 500 int str1_chr_shift = str1_isL ? 0:1; 501 int str2_chr_shift = str2_isL ? 0:1; 502 int str1_chr_size = str1_isL ? 1:2; 503 int str2_chr_size = str2_isL ? 1:2; 504 chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb : 505 (chr_insn)&MacroAssembler::ldrh; 506 chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb : 507 (chr_insn)&MacroAssembler::ldrh; 508 chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw; 509 chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr; 510 511 // Note, inline_string_indexOf() generates checks: 512 // if (substr.count > string.count) return -1; 513 // if (substr.count == 0) return 0; 514 515 // We have two strings, a source string in str2, cnt2 and a pattern string 516 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1. 517 518 // For larger pattern and source we use a simplified Boyer Moore algorithm. 519 // With a small pattern and source we use linear scan. 520 521 if (icnt1 == -1) { 522 sub(result_tmp, cnt2, cnt1); 523 cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256 524 br(LT, LINEARSEARCH); 525 dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty 526 subs(zr, cnt1, 256); 527 lsr(tmp1, cnt2, 2); 528 ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM 529 br(GE, LINEARSTUB); 530 } 531 532 // The Boyer Moore alogorithm is based on the description here:- 533 // 534 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm 535 // 536 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule 537 // and the 'Good Suffix' rule. 538 // 539 // These rules are essentially heuristics for how far we can shift the 540 // pattern along the search string. 541 // 542 // The implementation here uses the 'Bad Character' rule only because of the 543 // complexity of initialisation for the 'Good Suffix' rule. 544 // 545 // This is also known as the Boyer-Moore-Horspool algorithm:- 546 // 547 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm 548 // 549 // This particular implementation has few java-specific optimizations. 550 // 551 // #define ASIZE 256 552 // 553 // int bm(unsigned char *x, int m, unsigned char *y, int n) { 554 // int i, j; 555 // unsigned c; 556 // unsigned char bc[ASIZE]; 557 // 558 // /* Preprocessing */ 559 // for (i = 0; i < ASIZE; ++i) 560 // bc[i] = m; 561 // for (i = 0; i < m - 1; ) { 562 // c = x[i]; 563 // ++i; 564 // // c < 256 for Latin1 string, so, no need for branch 565 // #ifdef PATTERN_STRING_IS_LATIN1 566 // bc[c] = m - i; 567 // #else 568 // if (c < ASIZE) bc[c] = m - i; 569 // #endif 570 // } 571 // 572 // /* Searching */ 573 // j = 0; 574 // while (j <= n - m) { 575 // c = y[i+j]; 576 // if (x[m-1] == c) 577 // for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i); 578 // if (i < 0) return j; 579 // // c < 256 for Latin1 string, so, no need for branch 580 // #ifdef SOURCE_STRING_IS_LATIN1 581 // // LL case: (c< 256) always true. Remove branch 582 // j += bc[y[j+m-1]]; 583 // #endif 584 // #ifndef PATTERN_STRING_IS_UTF 585 // // UU case: need if (c<ASIZE) check. Skip 1 character if not. 586 // if (c < ASIZE) 587 // j += bc[y[j+m-1]]; 588 // else 589 // j += 1 590 // #endif 591 // #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF 592 // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not. 593 // if (c < ASIZE) 594 // j += bc[y[j+m-1]]; 595 // else 596 // j += m 597 // #endif 598 // } 599 // } 600 601 if (icnt1 == -1) { 602 Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, 603 BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; 604 Register cnt1end = tmp2; 605 Register str2end = cnt2; 606 Register skipch = tmp2; 607 608 // str1 length is >=8, so, we can read at least 1 register for cases when 609 // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for 610 // UL case. We'll re-read last character in inner pre-loop code to have 611 // single outer pre-loop load 612 const int firstStep = isL ? 7 : 3; 613 614 const int ASIZE = 256; 615 const int STORED_BYTES = 32; // amount of bytes stored per instruction 616 sub(sp, sp, ASIZE); 617 mov(tmp5, ASIZE/STORED_BYTES); // loop iterations 618 mov(ch1, sp); 619 BIND(BM_INIT_LOOP); 620 stpq(v0, v0, Address(post(ch1, STORED_BYTES))); 621 subs(tmp5, tmp5, 1); 622 br(GT, BM_INIT_LOOP); 623 624 sub(cnt1tmp, cnt1, 1); 625 mov(tmp5, str2); 626 add(str2end, str2, result_tmp, LSL, str2_chr_shift); 627 sub(ch2, cnt1, 1); 628 mov(tmp3, str1); 629 BIND(BCLOOP); 630 (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size))); 631 if (!str1_isL) { 632 subs(zr, ch1, ASIZE); 633 br(HS, BCSKIP); 634 } 635 strb(ch2, Address(sp, ch1)); 636 BIND(BCSKIP); 637 subs(ch2, ch2, 1); 638 br(GT, BCLOOP); 639 640 add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1 641 if (str1_isL == str2_isL) { 642 // load last 8 bytes (8LL/4UU symbols) 643 ldr(tmp6, Address(tmp6, -wordSize)); 644 } else { 645 ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols) 646 // convert Latin1 to UTF. We'll have to wait until load completed, but 647 // it's still faster than per-character loads+checks 648 lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1] 649 ubfx(ch1, tmp6, 8, 8); // str1[N-2] 650 ubfx(ch2, tmp6, 16, 8); // str1[N-3] 651 andr(tmp6, tmp6, 0xFF); // str1[N-4] 652 orr(ch2, ch1, ch2, LSL, 16); 653 orr(tmp6, tmp6, tmp3, LSL, 48); 654 orr(tmp6, tmp6, ch2, LSL, 16); 655 } 656 BIND(BMLOOPSTR2); 657 (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 658 sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8 659 if (str1_isL == str2_isL) { 660 // re-init tmp3. It's for free because it's executed in parallel with 661 // load above. Alternative is to initialize it before loop, but it'll 662 // affect performance on in-order systems with 2 or more ld/st pipelines 663 lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size)); 664 } 665 if (!isL) { // UU/UL case 666 lsl(ch2, cnt1tmp, 1); // offset in bytes 667 } 668 cmp(tmp3, skipch); 669 br(NE, BMSKIP); 670 ldr(ch2, Address(str2, isL ? cnt1tmp : ch2)); 671 mov(ch1, tmp6); 672 if (isL) { 673 b(BMLOOPSTR1_AFTER_LOAD); 674 } else { 675 sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 676 b(BMLOOPSTR1_CMP); 677 } 678 BIND(BMLOOPSTR1); 679 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift))); 680 (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift))); 681 BIND(BMLOOPSTR1_AFTER_LOAD); 682 subs(cnt1tmp, cnt1tmp, 1); 683 br(LT, BMLOOPSTR1_LASTCMP); 684 BIND(BMLOOPSTR1_CMP); 685 cmp(ch1, ch2); 686 br(EQ, BMLOOPSTR1); 687 BIND(BMSKIP); 688 if (!isL) { 689 // if we've met UTF symbol while searching Latin1 pattern, then we can 690 // skip cnt1 symbols 691 if (str1_isL != str2_isL) { 692 mov(result_tmp, cnt1); 693 } else { 694 mov(result_tmp, 1); 695 } 696 subs(zr, skipch, ASIZE); 697 br(HS, BMADV); 698 } 699 ldrb(result_tmp, Address(sp, skipch)); // load skip distance 700 BIND(BMADV); 701 sub(cnt1tmp, cnt1, 1); 702 add(str2, str2, result_tmp, LSL, str2_chr_shift); 703 cmp(str2, str2end); 704 br(LE, BMLOOPSTR2); 705 add(sp, sp, ASIZE); 706 b(NOMATCH); 707 BIND(BMLOOPSTR1_LASTCMP); 708 cmp(ch1, ch2); 709 br(NE, BMSKIP); 710 BIND(BMMATCH); 711 sub(result, str2, tmp5); 712 if (!str2_isL) lsr(result, result, 1); 713 add(sp, sp, ASIZE); 714 b(DONE); 715 716 BIND(LINEARSTUB); 717 cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm 718 br(LT, LINEAR_MEDIUM); 719 mov(result, zr); 720 RuntimeAddress stub = NULL; 721 if (isL) { 722 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll()); 723 assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); 724 } else if (str1_isL) { 725 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul()); 726 assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); 727 } else { 728 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu()); 729 assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); 730 } 731 trampoline_call(stub); 732 b(DONE); 733 } 734 735 BIND(LINEARSEARCH); 736 { 737 Label DO1, DO2, DO3; 738 739 Register str2tmp = tmp2; 740 Register first = tmp3; 741 742 if (icnt1 == -1) 743 { 744 Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; 745 746 cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2)); 747 br(LT, DOSHORT); 748 BIND(LINEAR_MEDIUM); 749 (this->*str1_load_1chr)(first, Address(str1)); 750 lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift))); 751 sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift); 752 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 753 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 754 755 BIND(FIRST_LOOP); 756 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg)); 757 cmp(first, ch2); 758 br(EQ, STR1_LOOP); 759 BIND(STR2_NEXT); 760 adds(cnt2_neg, cnt2_neg, str2_chr_size); 761 br(LE, FIRST_LOOP); 762 b(NOMATCH); 763 764 BIND(STR1_LOOP); 765 adds(cnt1tmp, cnt1_neg, str1_chr_size); 766 add(cnt2tmp, cnt2_neg, str2_chr_size); 767 br(GE, MATCH); 768 769 BIND(STR1_NEXT); 770 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp)); 771 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 772 cmp(ch1, ch2); 773 br(NE, STR2_NEXT); 774 adds(cnt1tmp, cnt1tmp, str1_chr_size); 775 add(cnt2tmp, cnt2tmp, str2_chr_size); 776 br(LT, STR1_NEXT); 777 b(MATCH); 778 779 BIND(DOSHORT); 780 if (str1_isL == str2_isL) { 781 cmp(cnt1, (u1)2); 782 br(LT, DO1); 783 br(GT, DO3); 784 } 785 } 786 787 if (icnt1 == 4) { 788 Label CH1_LOOP; 789 790 (this->*load_4chr)(ch1, str1); 791 sub(result_tmp, cnt2, 4); 792 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 793 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 794 795 BIND(CH1_LOOP); 796 (this->*load_4chr)(ch2, Address(str2, cnt2_neg)); 797 cmp(ch1, ch2); 798 br(EQ, MATCH); 799 adds(cnt2_neg, cnt2_neg, str2_chr_size); 800 br(LE, CH1_LOOP); 801 b(NOMATCH); 802 } 803 804 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) { 805 Label CH1_LOOP; 806 807 BIND(DO2); 808 (this->*load_2chr)(ch1, str1); 809 if (icnt1 == 2) { 810 sub(result_tmp, cnt2, 2); 811 } 812 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 813 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 814 BIND(CH1_LOOP); 815 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 816 cmp(ch1, ch2); 817 br(EQ, MATCH); 818 adds(cnt2_neg, cnt2_neg, str2_chr_size); 819 br(LE, CH1_LOOP); 820 b(NOMATCH); 821 } 822 823 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) { 824 Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; 825 826 BIND(DO3); 827 (this->*load_2chr)(first, str1); 828 (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size)); 829 if (icnt1 == 3) { 830 sub(result_tmp, cnt2, 3); 831 } 832 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 833 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 834 BIND(FIRST_LOOP); 835 (this->*load_2chr)(ch2, Address(str2, cnt2_neg)); 836 cmpw(first, ch2); 837 br(EQ, STR1_LOOP); 838 BIND(STR2_NEXT); 839 adds(cnt2_neg, cnt2_neg, str2_chr_size); 840 br(LE, FIRST_LOOP); 841 b(NOMATCH); 842 843 BIND(STR1_LOOP); 844 add(cnt2tmp, cnt2_neg, 2*str2_chr_size); 845 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp)); 846 cmp(ch1, ch2); 847 br(NE, STR2_NEXT); 848 b(MATCH); 849 } 850 851 if (icnt1 == -1 || icnt1 == 1) { 852 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP; 853 854 BIND(DO1); 855 (this->*str1_load_1chr)(ch1, str1); 856 cmp(cnt2, (u1)8); 857 br(LT, DO1_SHORT); 858 859 sub(result_tmp, cnt2, 8/str2_chr_size); 860 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift); 861 mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001); 862 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift))); 863 864 if (str2_isL) { 865 orr(ch1, ch1, ch1, LSL, 8); 866 } 867 orr(ch1, ch1, ch1, LSL, 16); 868 orr(ch1, ch1, ch1, LSL, 32); 869 BIND(CH1_LOOP); 870 ldr(ch2, Address(str2, cnt2_neg)); 871 eor(ch2, ch1, ch2); 872 sub(tmp1, ch2, tmp3); 873 orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); 874 bics(tmp1, tmp1, tmp2); 875 br(NE, HAS_ZERO); 876 adds(cnt2_neg, cnt2_neg, 8); 877 br(LT, CH1_LOOP); 878 879 cmp(cnt2_neg, (u1)8); 880 mov(cnt2_neg, 0); 881 br(LT, CH1_LOOP); 882 b(NOMATCH); 883 884 BIND(HAS_ZERO); 885 rev(tmp1, tmp1); 886 clz(tmp1, tmp1); 887 add(cnt2_neg, cnt2_neg, tmp1, LSR, 3); 888 b(MATCH); 889 890 BIND(DO1_SHORT); 891 mov(result_tmp, cnt2); 892 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift))); 893 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift); 894 BIND(DO1_LOOP); 895 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg)); 896 cmpw(ch1, ch2); 897 br(EQ, MATCH); 898 adds(cnt2_neg, cnt2_neg, str2_chr_size); 899 br(LT, DO1_LOOP); 900 } 901 } 902 BIND(NOMATCH); 903 mov(result, -1); 904 b(DONE); 905 BIND(MATCH); 906 add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift); 907 BIND(DONE); 908 } 909 910 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); 911 typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn); 912 913 void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, 914 Register ch, Register result, 915 Register tmp1, Register tmp2, Register tmp3) 916 { 917 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; 918 Register cnt1_neg = cnt1; 919 Register ch1 = rscratch1; 920 Register result_tmp = rscratch2; 921 922 cbz(cnt1, NOMATCH); 923 924 cmp(cnt1, (u1)4); 925 br(LT, DO1_SHORT); 926 927 orr(ch, ch, ch, LSL, 16); 928 orr(ch, ch, ch, LSL, 32); 929 930 sub(cnt1, cnt1, 4); 931 mov(result_tmp, cnt1); 932 lea(str1, Address(str1, cnt1, Address::uxtw(1))); 933 sub(cnt1_neg, zr, cnt1, LSL, 1); 934 935 mov(tmp3, 0x0001000100010001); 936 937 BIND(CH1_LOOP); 938 ldr(ch1, Address(str1, cnt1_neg)); 939 eor(ch1, ch, ch1); 940 sub(tmp1, ch1, tmp3); 941 orr(tmp2, ch1, 0x7fff7fff7fff7fff); 942 bics(tmp1, tmp1, tmp2); 943 br(NE, HAS_ZERO); 944 adds(cnt1_neg, cnt1_neg, 8); 945 br(LT, CH1_LOOP); 946 947 cmp(cnt1_neg, (u1)8); 948 mov(cnt1_neg, 0); 949 br(LT, CH1_LOOP); 950 b(NOMATCH); 951 952 BIND(HAS_ZERO); 953 rev(tmp1, tmp1); 954 clz(tmp1, tmp1); 955 add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); 956 b(MATCH); 957 958 BIND(DO1_SHORT); 959 mov(result_tmp, cnt1); 960 lea(str1, Address(str1, cnt1, Address::uxtw(1))); 961 sub(cnt1_neg, zr, cnt1, LSL, 1); 962 BIND(DO1_LOOP); 963 ldrh(ch1, Address(str1, cnt1_neg)); 964 cmpw(ch, ch1); 965 br(EQ, MATCH); 966 adds(cnt1_neg, cnt1_neg, 2); 967 br(LT, DO1_LOOP); 968 BIND(NOMATCH); 969 mov(result, -1); 970 b(DONE); 971 BIND(MATCH); 972 add(result, result_tmp, cnt1_neg, ASR, 1); 973 BIND(DONE); 974 } 975 976 void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, 977 Register ch, Register result, 978 Register tmp1, Register tmp2, Register tmp3) 979 { 980 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; 981 Register cnt1_neg = cnt1; 982 Register ch1 = rscratch1; 983 Register result_tmp = rscratch2; 984 985 cbz(cnt1, NOMATCH); 986 987 cmp(cnt1, (u1)8); 988 br(LT, DO1_SHORT); 989 990 orr(ch, ch, ch, LSL, 8); 991 orr(ch, ch, ch, LSL, 16); 992 orr(ch, ch, ch, LSL, 32); 993 994 sub(cnt1, cnt1, 8); 995 mov(result_tmp, cnt1); 996 lea(str1, Address(str1, cnt1)); 997 sub(cnt1_neg, zr, cnt1); 998 999 mov(tmp3, 0x0101010101010101); 1000 1001 BIND(CH1_LOOP); 1002 ldr(ch1, Address(str1, cnt1_neg)); 1003 eor(ch1, ch, ch1); 1004 sub(tmp1, ch1, tmp3); 1005 orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f); 1006 bics(tmp1, tmp1, tmp2); 1007 br(NE, HAS_ZERO); 1008 adds(cnt1_neg, cnt1_neg, 8); 1009 br(LT, CH1_LOOP); 1010 1011 cmp(cnt1_neg, (u1)8); 1012 mov(cnt1_neg, 0); 1013 br(LT, CH1_LOOP); 1014 b(NOMATCH); 1015 1016 BIND(HAS_ZERO); 1017 rev(tmp1, tmp1); 1018 clz(tmp1, tmp1); 1019 add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); 1020 b(MATCH); 1021 1022 BIND(DO1_SHORT); 1023 mov(result_tmp, cnt1); 1024 lea(str1, Address(str1, cnt1)); 1025 sub(cnt1_neg, zr, cnt1); 1026 BIND(DO1_LOOP); 1027 ldrb(ch1, Address(str1, cnt1_neg)); 1028 cmp(ch, ch1); 1029 br(EQ, MATCH); 1030 adds(cnt1_neg, cnt1_neg, 1); 1031 br(LT, DO1_LOOP); 1032 BIND(NOMATCH); 1033 mov(result, -1); 1034 b(DONE); 1035 BIND(MATCH); 1036 add(result, result_tmp, cnt1_neg); 1037 BIND(DONE); 1038 } 1039 1040 // Compare strings. 1041 void C2_MacroAssembler::string_compare(Register str1, Register str2, 1042 Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, 1043 FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) { 1044 Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, 1045 DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, 1046 SHORT_LOOP_START, TAIL_CHECK; 1047 1048 bool isLL = ae == StrIntrinsicNode::LL; 1049 bool isLU = ae == StrIntrinsicNode::LU; 1050 bool isUL = ae == StrIntrinsicNode::UL; 1051 1052 // The stub threshold for LL strings is: 72 (64 + 8) chars 1053 // UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch) 1054 // LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least) 1055 const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36); 1056 1057 bool str1_isL = isLL || isLU; 1058 bool str2_isL = isLL || isUL; 1059 1060 int str1_chr_shift = str1_isL ? 0 : 1; 1061 int str2_chr_shift = str2_isL ? 0 : 1; 1062 int str1_chr_size = str1_isL ? 1 : 2; 1063 int str2_chr_size = str2_isL ? 1 : 2; 1064 int minCharsInWord = isLL ? wordSize : wordSize/2; 1065 1066 FloatRegister vtmpZ = vtmp1, vtmp = vtmp2; 1067 chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb : 1068 (chr_insn)&MacroAssembler::ldrh; 1069 chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb : 1070 (chr_insn)&MacroAssembler::ldrh; 1071 uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw : 1072 (uxt_insn)&MacroAssembler::uxthw; 1073 1074 BLOCK_COMMENT("string_compare {"); 1075 1076 // Bizzarely, the counts are passed in bytes, regardless of whether they 1077 // are L or U strings, however the result is always in characters. 1078 if (!str1_isL) asrw(cnt1, cnt1, 1); 1079 if (!str2_isL) asrw(cnt2, cnt2, 1); 1080 1081 // Compute the minimum of the string lengths and save the difference. 1082 subsw(result, cnt1, cnt2); 1083 cselw(cnt2, cnt1, cnt2, Assembler::LE); // min 1084 1085 // A very short string 1086 cmpw(cnt2, minCharsInWord); 1087 br(Assembler::LE, SHORT_STRING); 1088 1089 // Compare longwords 1090 // load first parts of strings and finish initialization while loading 1091 { 1092 if (str1_isL == str2_isL) { // LL or UU 1093 ldr(tmp1, Address(str1)); 1094 cmp(str1, str2); 1095 br(Assembler::EQ, DONE); 1096 ldr(tmp2, Address(str2)); 1097 cmp(cnt2, stub_threshold); 1098 br(GE, STUB); 1099 subsw(cnt2, cnt2, minCharsInWord); 1100 br(EQ, TAIL_CHECK); 1101 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift))); 1102 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift))); 1103 sub(cnt2, zr, cnt2, LSL, str2_chr_shift); 1104 } else if (isLU) { 1105 ldrs(vtmp, Address(str1)); 1106 ldr(tmp2, Address(str2)); 1107 cmp(cnt2, stub_threshold); 1108 br(GE, STUB); 1109 subw(cnt2, cnt2, 4); 1110 eor(vtmpZ, T16B, vtmpZ, vtmpZ); 1111 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift))); 1112 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift))); 1113 zip1(vtmp, T8B, vtmp, vtmpZ); 1114 sub(cnt1, zr, cnt2, LSL, str1_chr_shift); 1115 sub(cnt2, zr, cnt2, LSL, str2_chr_shift); 1116 add(cnt1, cnt1, 4); 1117 fmovd(tmp1, vtmp); 1118 } else { // UL case 1119 ldr(tmp1, Address(str1)); 1120 ldrs(vtmp, Address(str2)); 1121 cmp(cnt2, stub_threshold); 1122 br(GE, STUB); 1123 subw(cnt2, cnt2, 4); 1124 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift))); 1125 eor(vtmpZ, T16B, vtmpZ, vtmpZ); 1126 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift))); 1127 sub(cnt1, zr, cnt2, LSL, str1_chr_shift); 1128 zip1(vtmp, T8B, vtmp, vtmpZ); 1129 sub(cnt2, zr, cnt2, LSL, str2_chr_shift); 1130 add(cnt1, cnt1, 8); 1131 fmovd(tmp2, vtmp); 1132 } 1133 adds(cnt2, cnt2, isUL ? 4 : 8); 1134 br(GE, TAIL); 1135 eor(rscratch2, tmp1, tmp2); 1136 cbnz(rscratch2, DIFF); 1137 // main loop 1138 bind(NEXT_WORD); 1139 if (str1_isL == str2_isL) { 1140 ldr(tmp1, Address(str1, cnt2)); 1141 ldr(tmp2, Address(str2, cnt2)); 1142 adds(cnt2, cnt2, 8); 1143 } else if (isLU) { 1144 ldrs(vtmp, Address(str1, cnt1)); 1145 ldr(tmp2, Address(str2, cnt2)); 1146 add(cnt1, cnt1, 4); 1147 zip1(vtmp, T8B, vtmp, vtmpZ); 1148 fmovd(tmp1, vtmp); 1149 adds(cnt2, cnt2, 8); 1150 } else { // UL 1151 ldrs(vtmp, Address(str2, cnt2)); 1152 ldr(tmp1, Address(str1, cnt1)); 1153 zip1(vtmp, T8B, vtmp, vtmpZ); 1154 add(cnt1, cnt1, 8); 1155 fmovd(tmp2, vtmp); 1156 adds(cnt2, cnt2, 4); 1157 } 1158 br(GE, TAIL); 1159 1160 eor(rscratch2, tmp1, tmp2); 1161 cbz(rscratch2, NEXT_WORD); 1162 b(DIFF); 1163 bind(TAIL); 1164 eor(rscratch2, tmp1, tmp2); 1165 cbnz(rscratch2, DIFF); 1166 // Last longword. In the case where length == 4 we compare the 1167 // same longword twice, but that's still faster than another 1168 // conditional branch. 1169 if (str1_isL == str2_isL) { 1170 ldr(tmp1, Address(str1)); 1171 ldr(tmp2, Address(str2)); 1172 } else if (isLU) { 1173 ldrs(vtmp, Address(str1)); 1174 ldr(tmp2, Address(str2)); 1175 zip1(vtmp, T8B, vtmp, vtmpZ); 1176 fmovd(tmp1, vtmp); 1177 } else { // UL 1178 ldrs(vtmp, Address(str2)); 1179 ldr(tmp1, Address(str1)); 1180 zip1(vtmp, T8B, vtmp, vtmpZ); 1181 fmovd(tmp2, vtmp); 1182 } 1183 bind(TAIL_CHECK); 1184 eor(rscratch2, tmp1, tmp2); 1185 cbz(rscratch2, DONE); 1186 1187 // Find the first different characters in the longwords and 1188 // compute their difference. 1189 bind(DIFF); 1190 rev(rscratch2, rscratch2); 1191 clz(rscratch2, rscratch2); 1192 andr(rscratch2, rscratch2, isLL ? -8 : -16); 1193 lsrv(tmp1, tmp1, rscratch2); 1194 (this->*ext_chr)(tmp1, tmp1); 1195 lsrv(tmp2, tmp2, rscratch2); 1196 (this->*ext_chr)(tmp2, tmp2); 1197 subw(result, tmp1, tmp2); 1198 b(DONE); 1199 } 1200 1201 bind(STUB); 1202 RuntimeAddress stub = NULL; 1203 switch(ae) { 1204 case StrIntrinsicNode::LL: 1205 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL()); 1206 break; 1207 case StrIntrinsicNode::UU: 1208 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU()); 1209 break; 1210 case StrIntrinsicNode::LU: 1211 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU()); 1212 break; 1213 case StrIntrinsicNode::UL: 1214 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL()); 1215 break; 1216 default: 1217 ShouldNotReachHere(); 1218 } 1219 assert(stub.target() != NULL, "compare_long_string stub has not been generated"); 1220 trampoline_call(stub); 1221 b(DONE); 1222 1223 bind(SHORT_STRING); 1224 // Is the minimum length zero? 1225 cbz(cnt2, DONE); 1226 // arrange code to do most branches while loading and loading next characters 1227 // while comparing previous 1228 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size))); 1229 subs(cnt2, cnt2, 1); 1230 br(EQ, SHORT_LAST_INIT); 1231 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size))); 1232 b(SHORT_LOOP_START); 1233 bind(SHORT_LOOP); 1234 subs(cnt2, cnt2, 1); 1235 br(EQ, SHORT_LAST); 1236 bind(SHORT_LOOP_START); 1237 (this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size))); 1238 (this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size))); 1239 cmp(tmp1, cnt1); 1240 br(NE, SHORT_LOOP_TAIL); 1241 subs(cnt2, cnt2, 1); 1242 br(EQ, SHORT_LAST2); 1243 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size))); 1244 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size))); 1245 cmp(tmp2, rscratch1); 1246 br(EQ, SHORT_LOOP); 1247 sub(result, tmp2, rscratch1); 1248 b(DONE); 1249 bind(SHORT_LOOP_TAIL); 1250 sub(result, tmp1, cnt1); 1251 b(DONE); 1252 bind(SHORT_LAST2); 1253 cmp(tmp2, rscratch1); 1254 br(EQ, DONE); 1255 sub(result, tmp2, rscratch1); 1256 1257 b(DONE); 1258 bind(SHORT_LAST_INIT); 1259 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size))); 1260 bind(SHORT_LAST); 1261 cmp(tmp1, cnt1); 1262 br(EQ, DONE); 1263 sub(result, tmp1, cnt1); 1264 1265 bind(DONE); 1266 1267 BLOCK_COMMENT("} string_compare"); 1268 } 1269 1270 void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, 1271 FloatRegister src2, int cond, bool isQ) { 1272 SIMD_Arrangement size = esize2arrangement(type2aelembytes(bt), isQ); 1273 if (bt == T_FLOAT || bt == T_DOUBLE) { 1274 switch (cond) { 1275 case BoolTest::eq: fcmeq(dst, size, src1, src2); break; 1276 case BoolTest::ne: { 1277 fcmeq(dst, size, src1, src2); 1278 notr(dst, T16B, dst); 1279 break; 1280 } 1281 case BoolTest::ge: fcmge(dst, size, src1, src2); break; 1282 case BoolTest::gt: fcmgt(dst, size, src1, src2); break; 1283 case BoolTest::le: fcmge(dst, size, src2, src1); break; 1284 case BoolTest::lt: fcmgt(dst, size, src2, src1); break; 1285 default: 1286 assert(false, "unsupported"); 1287 ShouldNotReachHere(); 1288 } 1289 } else { 1290 switch (cond) { 1291 case BoolTest::eq: cmeq(dst, size, src1, src2); break; 1292 case BoolTest::ne: { 1293 cmeq(dst, size, src1, src2); 1294 notr(dst, T16B, dst); 1295 break; 1296 } 1297 case BoolTest::ge: cmge(dst, size, src1, src2); break; 1298 case BoolTest::gt: cmgt(dst, size, src1, src2); break; 1299 case BoolTest::le: cmge(dst, size, src2, src1); break; 1300 case BoolTest::lt: cmgt(dst, size, src2, src1); break; 1301 case BoolTest::uge: cmhs(dst, size, src1, src2); break; 1302 case BoolTest::ugt: cmhi(dst, size, src1, src2); break; 1303 case BoolTest::ult: cmhi(dst, size, src2, src1); break; 1304 case BoolTest::ule: cmhs(dst, size, src2, src1); break; 1305 default: 1306 assert(false, "unsupported"); 1307 ShouldNotReachHere(); 1308 } 1309 } 1310 } 1311 1312 void C2_MacroAssembler::load_nklass_compact(Register dst, Register obj, Register index, int scale, int disp) { 1313 C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst); 1314 Compile::current()->output()->add_stub(stub); 1315 1316 // Note: Don't clobber obj anywhere in that method! 1317 1318 // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract 1319 // obj-start, so that we can load from the object's mark-word instead. Usually the address 1320 // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2 1321 // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and 1322 // then passes that register as obj and 0 in disp. The following code extracts the base 1323 // and offset to load the mark-word. 1324 int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes(); 1325 if (index == noreg) { 1326 ldr(dst, Address(obj, offset)); 1327 } else { 1328 lea(dst, Address(obj, index, Address::lsl(scale))); 1329 ldr(dst, Address(dst, offset)); 1330 } 1331 // NOTE: We can't use tbnz here, because the target is sometimes too far away 1332 // and cannot be encoded. 1333 tst(dst, markWord::monitor_value); 1334 br(Assembler::NE, stub->entry()); 1335 bind(stub->continuation()); 1336 lsr(dst, dst, markWord::klass_shift); 1337 }