1 /*
2 * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "opto/c2_CodeStubs.hpp"
29 #include "opto/c2_MacroAssembler.hpp"
30 #include "opto/compile.hpp"
31 #include "opto/output.hpp"
32 #include "opto/intrinsicnode.hpp"
33 #include "opto/subnode.hpp"
34 #include "runtime/stubRoutines.hpp"
35 #include "utilities/globalDefinitions.hpp"
36
37 #ifdef PRODUCT
38 #define BLOCK_COMMENT(str) /* nothing */
39 #define STOP(error) stop(error)
40 #else
41 #define BLOCK_COMMENT(str) block_comment(str)
42 #define STOP(error) block_comment(error); stop(error)
43 #endif
44
45 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
46
47 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
48
49 void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register tmpReg,
50 Register tmp2Reg, Register tmp3Reg) {
51 Register oop = objectReg;
52 Register box = boxReg;
53 Register disp_hdr = tmpReg;
54 Register tmp = tmp2Reg;
55 Label cont;
56 Label object_has_monitor;
57 Label cas_failed;
58
59 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
60 assert_different_registers(oop, box, tmp, disp_hdr);
61
62 // Load markWord from object into displaced_header.
63 ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
64
65 if (DiagnoseSyncOnValueBasedClasses != 0) {
66 load_klass(tmp, oop);
67 ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
68 tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
69 br(Assembler::NE, cont);
70 }
71
72 if (UseBiasedLocking && !UseOptoBiasInlining) {
73 biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
74 }
75
76 // Check for existing monitor
77 tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
78
79 if (LockingMode == LM_MONITOR) {
80 tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0.
81 b(cont);
82 } else {
83 assert(LockingMode == LM_LEGACY, "must be");
84 // Set tmp to be (markWord of object | UNLOCK_VALUE).
85 orr(tmp, disp_hdr, markWord::unlocked_value);
86
87 // Initialize the box. (Must happen before we update the object mark!)
88 str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
89
90 // Compare object markWord with an unlocked value (tmp) and if
91 // equal exchange the stack address of our box with object markWord.
92 // On failure disp_hdr contains the possibly locked markWord.
93 cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
94 /*release*/ true, /*weak*/ false, disp_hdr);
95 br(Assembler::EQ, cont);
96
97 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
98
99 // If the compare-and-exchange succeeded, then we found an unlocked
100 // object, will have now locked it will continue at label cont
101
102 bind(cas_failed);
103 // We did not see an unlocked object so try the fast recursive case.
104
105 // Check if the owner is self by comparing the value in the
106 // markWord of object (disp_hdr) with the stack pointer.
107 mov(rscratch1, sp);
108 sub(disp_hdr, disp_hdr, rscratch1);
109 mov(tmp, (address) (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
110 // If condition is true we are cont and hence we can store 0 as the
111 // displaced header in the box, which indicates that it is a recursive lock.
112 ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result
113 str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
114 b(cont);
115 }
116
117 // Handle existing monitor.
118 bind(object_has_monitor);
119
120 // The object's monitor m is unlocked iff m->owner == NULL,
121 // otherwise m->owner may contain a thread or a stack address.
122 //
123 // Try to CAS m->owner from NULL to current thread.
124 add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markWord::monitor_value));
125 cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
126 /*release*/ true, /*weak*/ false, rscratch1); // Sets flags for result
127
128 // Store a non-null value into the box to avoid looking like a re-entrant
129 // lock. The fast-path monitor unlock code checks for
130 // markWord::monitor_value so use markWord::unused_mark which has the
131 // relevant bit set, and also matches ObjectSynchronizer::enter.
132 mov(tmp, (address)markWord::unused_mark().value());
133 str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
134
135 br(Assembler::EQ, cont); // CAS success means locking succeeded
136
137 cmp(rscratch1, rthread);
138 br(Assembler::NE, cont); // Check for recursive locking
139
140 // Recursive lock case
141 increment(Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value), 1);
142 // flag == EQ still from the cmp above, checking if this is a reentrant lock
143
144 bind(cont);
145 // flag == EQ indicates success
146 // flag == NE indicates failure
147 }
148
149 void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Register tmpReg,
150 Register tmp2Reg) {
151 Register oop = objectReg;
152 Register box = boxReg;
153 Register disp_hdr = tmpReg;
154 Register tmp = tmp2Reg;
155 Label cont;
156 Label object_has_monitor;
157
158 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
159 assert_different_registers(oop, box, tmp, disp_hdr);
160
161 if (UseBiasedLocking && !UseOptoBiasInlining) {
162 biased_locking_exit(oop, tmp, cont);
163 }
164
165 if (LockingMode == LM_LEGACY) {
166 // Find the lock address and load the displaced header from the stack.
167 ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
168
169 // If the displaced header is 0, we have a recursive unlock.
170 cmp(disp_hdr, zr);
171 br(Assembler::EQ, cont);
172 }
173
174 // Handle existing monitor.
175 ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
176 tbnz(tmp, exact_log2(markWord::monitor_value), object_has_monitor);
177
178 if (LockingMode == LM_MONITOR) {
179 tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0.
180 b(cont);
181 } else {
182 assert(LockingMode == LM_LEGACY, "must be");
183 // Check if it is still a light weight lock, this is is true if we
184 // see the stack address of the basicLock in the markWord of the
185 // object.
186
187 cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
188 /*release*/ true, /*weak*/ false, tmp);
189 b(cont);
190 }
191
192 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
193
194 // Handle existing monitor.
195 bind(object_has_monitor);
196 STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
197 add(tmp, tmp, -(int)markWord::monitor_value); // monitor
198
199 ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
200
201 Label notRecursive;
202 cbz(disp_hdr, notRecursive);
203
204 // Recursive lock
205 sub(disp_hdr, disp_hdr, 1u);
206 str(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
207 cmp(disp_hdr, disp_hdr); // Sets flags for result
208 b(cont);
209
210 bind(notRecursive);
211 ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
212 ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
213 orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
214 cmp(rscratch1, zr); // Sets flags for result
215 cbnz(rscratch1, cont);
216 // need a release store here
217 lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
218 stlr(zr, tmp); // set unowned
219
220 bind(cont);
221 // flag == EQ indicates success
222 // flag == NE indicates failure
223 }
224
225 void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register t1,
226 Register t2, Register t3) {
227 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
228 assert_different_registers(obj, t1, t2, t3);
229
230 // Handle inflated monitor.
231 Label inflated;
232 // Finish fast lock successfully. MUST branch to with flag == EQ
233 Label locked;
234 // Finish fast lock unsuccessfully. MUST branch to with flag == NE
235 Label slow_path;
236
237 if (DiagnoseSyncOnValueBasedClasses != 0) {
238 load_klass(t1, obj);
239 ldrw(t1, Address(t1, Klass::access_flags_offset()));
240 tstw(t1, JVM_ACC_IS_VALUE_BASED_CLASS);
241 br(Assembler::NE, slow_path);
242 }
243
244 const Register t1_mark = t1;
245
246 { // Lightweight locking
247
248 // Push lock to the lock stack and finish successfully. MUST branch to with flag == EQ
249 Label push;
250
251 const Register t2_top = t2;
252 const Register t3_t = t3;
253
254 // Check if lock-stack is full.
255 ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
256 cmpw(t2_top, (unsigned)LockStack::end_offset() - 1);
257 br(Assembler::GT, slow_path);
258
259 // Check if recursive.
260 subw(t3_t, t2_top, oopSize);
261 ldr(t3_t, Address(rthread, t3_t));
262 cmp(obj, t3_t);
263 br(Assembler::EQ, push);
264
265 // Relaxed normal load to check for monitor. Optimization for monitor case.
266 ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
267 tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated);
268
269 // Not inflated
270 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea");
271
272 // Try to lock. Transition lock-bits 0b01 => 0b00
273 orr(t1_mark, t1_mark, markWord::unlocked_value);
274 eor(t3_t, t1_mark, markWord::unlocked_value);
275 cmpxchg(/*addr*/ obj, /*expected*/ t1_mark, /*new*/ t3_t, Assembler::xword,
276 /*acquire*/ true, /*release*/ false, /*weak*/ false, noreg);
277 br(Assembler::NE, slow_path);
278
279 bind(push);
280 // After successful lock, push object on lock-stack.
281 str(obj, Address(rthread, t2_top));
282 addw(t2_top, t2_top, oopSize);
283 strw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
284 b(locked);
285 }
286
287 { // Handle inflated monitor.
288 bind(inflated);
289
290 // mark contains the tagged ObjectMonitor*.
291 const Register t1_tagged_monitor = t1_mark;
292 const uintptr_t monitor_tag = markWord::monitor_value;
293 const Register t2_owner_addr = t2;
294 const Register t3_owner = t3;
295
296 // Compute owner address.
297 lea(t2_owner_addr, Address(t1_tagged_monitor, ObjectMonitor::owner_offset_in_bytes() - monitor_tag));
298
299 // CAS owner (null => current thread).
300 cmpxchg(t2_owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true,
301 /*release*/ false, /*weak*/ false, t3_owner);
302 br(Assembler::EQ, locked);
303
304 // Check if recursive.
305 cmp(t3_owner, rthread);
306 br(Assembler::NE, slow_path);
307
308 // Recursive.
309 increment(Address(t1_tagged_monitor, ObjectMonitor::recursions_offset_in_bytes() - monitor_tag), 1);
310 }
311
312 bind(locked);
313 #ifdef ASSERT
314 // Check that locked label is reached with Flags == EQ.
315 Label flag_correct;
316 br(Assembler::EQ, flag_correct);
317 stop("Fast Lock Flag != EQ");
318 #endif
319
320 bind(slow_path);
321 #ifdef ASSERT
322 // Check that slow_path label is reached with Flags == NE.
323 br(Assembler::NE, flag_correct);
324 stop("Fast Lock Flag != NE");
325 bind(flag_correct);
326 #endif
327 // C2 uses the value of Flags (NE vs EQ) to determine the continuation.
328 }
329
330 void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Register t2,
331 Register t3) {
332 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
333 assert_different_registers(obj, t1, t2, t3);
334
335 // Handle inflated monitor.
336 Label inflated, inflated_load_monitor;
337 // Finish fast unlock successfully. MUST branch to with flag == EQ
338 Label unlocked;
339 // Finish fast unlock unsuccessfully. MUST branch to with flag == NE
340 Label slow_path;
341
342 const Register t1_mark = t1;
343 const Register t2_top = t2;
344 const Register t3_t = t3;
345
346 { // Lightweight unlock
347
348 // Check if obj is top of lock-stack.
349 ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
350 subw(t2_top, t2_top, oopSize);
351 ldr(t3_t, Address(rthread, t2_top));
352 cmp(obj, t3_t);
353 // Top of lock stack was not obj. Must be monitor.
354 br(Assembler::NE, inflated_load_monitor);
355
356 // Pop lock-stack.
357 DEBUG_ONLY(str(zr, Address(rthread, t2_top));)
358 strw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
359
360 // Check if recursive.
361 subw(t3_t, t2_top, oopSize);
362 ldr(t3_t, Address(rthread, t3_t));
363 cmp(obj, t3_t);
364 br(Assembler::EQ, unlocked);
365
366 // Not recursive.
367 // Load Mark.
368 ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
369
370 // Check header for monitor (0b10).
371 tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated);
372
373 // Try to unlock. Transition lock bits 0b00 => 0b01
374 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
375 orr(t3_t, t1_mark, markWord::unlocked_value);
376 cmpxchg(/*addr*/ obj, /*expected*/ t1_mark, /*new*/ t3_t, Assembler::xword,
377 /*acquire*/ false, /*release*/ true, /*weak*/ false, noreg);
378 br(Assembler::EQ, unlocked);
379
380 // Compare and exchange failed.
381 // Restore lock-stack and handle the unlock in runtime.
382 DEBUG_ONLY(str(obj, Address(rthread, t2_top));)
383 addw(t2_top, t2_top, oopSize);
384 str(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
385 b(slow_path);
386 }
387
388
389 { // Handle inflated monitor.
390 bind(inflated_load_monitor);
391 ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
392 #ifdef ASSERT
393 tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated);
394 stop("Fast Unlock not monitor");
395 #endif
396
397 bind(inflated);
398
399 #ifdef ASSERT
400 Label check_done;
401 subw(t2_top, t2_top, oopSize);
402 cmpw(t2_top, in_bytes(JavaThread::lock_stack_base_offset()));
403 br(Assembler::LT, check_done);
404 ldr(t3_t, Address(rthread, t2_top));
405 cmp(obj, t3_t);
406 br(Assembler::NE, inflated);
407 stop("Fast Unlock lock on stack");
408 bind(check_done);
409 #endif
410
411 // mark contains the tagged ObjectMonitor*.
412 const Register t1_monitor = t1_mark;
413 const uintptr_t monitor_tag = markWord::monitor_value;
414
415 // Untag the monitor.
416 sub(t1_monitor, t1_mark, monitor_tag);
417
418 const Register t2_recursions = t2;
419 Label not_recursive;
420
421 // Check if recursive.
422 ldr(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset_in_bytes()));
423 cbz(t2_recursions, not_recursive);
424
425 // Recursive unlock.
426 sub(t2_recursions, t2_recursions, 1u);
427 str(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset_in_bytes()));
428 // Set flag == EQ
429 cmp(t2_recursions, t2_recursions);
430 b(unlocked);
431
432 bind(not_recursive);
433
434 Label release;
435 const Register t2_owner_addr = t2;
436
437 // Compute owner address.
438 lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset_in_bytes()));
439
440 // Check if the entry lists are empty.
441 ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset_in_bytes()));
442 ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset_in_bytes()));
443 orr(rscratch1, rscratch1, t3_t);
444 cmp(rscratch1, zr);
445 br(Assembler::EQ, release);
446
447 // The owner may be anonymous and we removed the last obj entry in
448 // the lock-stack. This loses the information about the owner.
449 // Write the thread to the owner field so the runtime knows the owner.
450 str(rthread, Address(t2_owner_addr));
451 b(slow_path);
452
453 bind(release);
454 // Set owner to null.
455 // Release to satisfy the JMM
456 stlr(zr, t2_owner_addr);
457 }
458
459 bind(unlocked);
460 #ifdef ASSERT
461 // Check that unlocked label is reached with Flags == EQ.
462 Label flag_correct;
463 br(Assembler::EQ, flag_correct);
464 stop("Fast Unlock Flag != EQ");
465 #endif
466
467 bind(slow_path);
468 #ifdef ASSERT
469 // Check that slow_path label is reached with Flags == NE.
470 br(Assembler::NE, flag_correct);
471 stop("Fast Unlock Flag != NE");
472 bind(flag_correct);
473 #endif
474 // C2 uses the value of Flags (NE vs EQ) to determine the continuation.
475 }
476
477 // Search for str1 in str2 and return index or -1
478 // Clobbers: rscratch1, rscratch2, rflags. May also clobber v0-v1, when icnt1==-1.
479 void C2_MacroAssembler::string_indexof(Register str2, Register str1,
480 Register cnt2, Register cnt1,
481 Register tmp1, Register tmp2,
482 Register tmp3, Register tmp4,
483 Register tmp5, Register tmp6,
484 int icnt1, Register result, int ae) {
485 // NOTE: tmp5, tmp6 can be zr depending on specific method version
486 Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
487
488 Register ch1 = rscratch1;
489 Register ch2 = rscratch2;
490 Register cnt1tmp = tmp1;
491 Register cnt2tmp = tmp2;
492 Register cnt1_neg = cnt1;
493 Register cnt2_neg = cnt2;
494 Register result_tmp = tmp4;
495
496 bool isL = ae == StrIntrinsicNode::LL;
497
498 bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
499 bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
500 int str1_chr_shift = str1_isL ? 0:1;
501 int str2_chr_shift = str2_isL ? 0:1;
502 int str1_chr_size = str1_isL ? 1:2;
503 int str2_chr_size = str2_isL ? 1:2;
504 chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
505 (chr_insn)&MacroAssembler::ldrh;
506 chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
507 (chr_insn)&MacroAssembler::ldrh;
508 chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
509 chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
510
511 // Note, inline_string_indexOf() generates checks:
512 // if (substr.count > string.count) return -1;
513 // if (substr.count == 0) return 0;
514
515 // We have two strings, a source string in str2, cnt2 and a pattern string
516 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
517
518 // For larger pattern and source we use a simplified Boyer Moore algorithm.
519 // With a small pattern and source we use linear scan.
520
521 if (icnt1 == -1) {
522 sub(result_tmp, cnt2, cnt1);
523 cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
524 br(LT, LINEARSEARCH);
525 dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
526 subs(zr, cnt1, 256);
527 lsr(tmp1, cnt2, 2);
528 ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
529 br(GE, LINEARSTUB);
530 }
531
532 // The Boyer Moore alogorithm is based on the description here:-
533 //
534 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
535 //
536 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
537 // and the 'Good Suffix' rule.
538 //
539 // These rules are essentially heuristics for how far we can shift the
540 // pattern along the search string.
541 //
542 // The implementation here uses the 'Bad Character' rule only because of the
543 // complexity of initialisation for the 'Good Suffix' rule.
544 //
545 // This is also known as the Boyer-Moore-Horspool algorithm:-
546 //
547 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
548 //
549 // This particular implementation has few java-specific optimizations.
550 //
551 // #define ASIZE 256
552 //
553 // int bm(unsigned char *x, int m, unsigned char *y, int n) {
554 // int i, j;
555 // unsigned c;
556 // unsigned char bc[ASIZE];
557 //
558 // /* Preprocessing */
559 // for (i = 0; i < ASIZE; ++i)
560 // bc[i] = m;
561 // for (i = 0; i < m - 1; ) {
562 // c = x[i];
563 // ++i;
564 // // c < 256 for Latin1 string, so, no need for branch
565 // #ifdef PATTERN_STRING_IS_LATIN1
566 // bc[c] = m - i;
567 // #else
568 // if (c < ASIZE) bc[c] = m - i;
569 // #endif
570 // }
571 //
572 // /* Searching */
573 // j = 0;
574 // while (j <= n - m) {
575 // c = y[i+j];
576 // if (x[m-1] == c)
577 // for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
578 // if (i < 0) return j;
579 // // c < 256 for Latin1 string, so, no need for branch
580 // #ifdef SOURCE_STRING_IS_LATIN1
581 // // LL case: (c< 256) always true. Remove branch
582 // j += bc[y[j+m-1]];
583 // #endif
584 // #ifndef PATTERN_STRING_IS_UTF
585 // // UU case: need if (c<ASIZE) check. Skip 1 character if not.
586 // if (c < ASIZE)
587 // j += bc[y[j+m-1]];
588 // else
589 // j += 1
590 // #endif
591 // #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
592 // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
593 // if (c < ASIZE)
594 // j += bc[y[j+m-1]];
595 // else
596 // j += m
597 // #endif
598 // }
599 // }
600
601 if (icnt1 == -1) {
602 Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
603 BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
604 Register cnt1end = tmp2;
605 Register str2end = cnt2;
606 Register skipch = tmp2;
607
608 // str1 length is >=8, so, we can read at least 1 register for cases when
609 // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
610 // UL case. We'll re-read last character in inner pre-loop code to have
611 // single outer pre-loop load
612 const int firstStep = isL ? 7 : 3;
613
614 const int ASIZE = 256;
615 const int STORED_BYTES = 32; // amount of bytes stored per instruction
616 sub(sp, sp, ASIZE);
617 mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
618 mov(ch1, sp);
619 BIND(BM_INIT_LOOP);
620 stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
621 subs(tmp5, tmp5, 1);
622 br(GT, BM_INIT_LOOP);
623
624 sub(cnt1tmp, cnt1, 1);
625 mov(tmp5, str2);
626 add(str2end, str2, result_tmp, LSL, str2_chr_shift);
627 sub(ch2, cnt1, 1);
628 mov(tmp3, str1);
629 BIND(BCLOOP);
630 (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
631 if (!str1_isL) {
632 subs(zr, ch1, ASIZE);
633 br(HS, BCSKIP);
634 }
635 strb(ch2, Address(sp, ch1));
636 BIND(BCSKIP);
637 subs(ch2, ch2, 1);
638 br(GT, BCLOOP);
639
640 add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
641 if (str1_isL == str2_isL) {
642 // load last 8 bytes (8LL/4UU symbols)
643 ldr(tmp6, Address(tmp6, -wordSize));
644 } else {
645 ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
646 // convert Latin1 to UTF. We'll have to wait until load completed, but
647 // it's still faster than per-character loads+checks
648 lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
649 ubfx(ch1, tmp6, 8, 8); // str1[N-2]
650 ubfx(ch2, tmp6, 16, 8); // str1[N-3]
651 andr(tmp6, tmp6, 0xFF); // str1[N-4]
652 orr(ch2, ch1, ch2, LSL, 16);
653 orr(tmp6, tmp6, tmp3, LSL, 48);
654 orr(tmp6, tmp6, ch2, LSL, 16);
655 }
656 BIND(BMLOOPSTR2);
657 (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
658 sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
659 if (str1_isL == str2_isL) {
660 // re-init tmp3. It's for free because it's executed in parallel with
661 // load above. Alternative is to initialize it before loop, but it'll
662 // affect performance on in-order systems with 2 or more ld/st pipelines
663 lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
664 }
665 if (!isL) { // UU/UL case
666 lsl(ch2, cnt1tmp, 1); // offset in bytes
667 }
668 cmp(tmp3, skipch);
669 br(NE, BMSKIP);
670 ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
671 mov(ch1, tmp6);
672 if (isL) {
673 b(BMLOOPSTR1_AFTER_LOAD);
674 } else {
675 sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
676 b(BMLOOPSTR1_CMP);
677 }
678 BIND(BMLOOPSTR1);
679 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
680 (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
681 BIND(BMLOOPSTR1_AFTER_LOAD);
682 subs(cnt1tmp, cnt1tmp, 1);
683 br(LT, BMLOOPSTR1_LASTCMP);
684 BIND(BMLOOPSTR1_CMP);
685 cmp(ch1, ch2);
686 br(EQ, BMLOOPSTR1);
687 BIND(BMSKIP);
688 if (!isL) {
689 // if we've met UTF symbol while searching Latin1 pattern, then we can
690 // skip cnt1 symbols
691 if (str1_isL != str2_isL) {
692 mov(result_tmp, cnt1);
693 } else {
694 mov(result_tmp, 1);
695 }
696 subs(zr, skipch, ASIZE);
697 br(HS, BMADV);
698 }
699 ldrb(result_tmp, Address(sp, skipch)); // load skip distance
700 BIND(BMADV);
701 sub(cnt1tmp, cnt1, 1);
702 add(str2, str2, result_tmp, LSL, str2_chr_shift);
703 cmp(str2, str2end);
704 br(LE, BMLOOPSTR2);
705 add(sp, sp, ASIZE);
706 b(NOMATCH);
707 BIND(BMLOOPSTR1_LASTCMP);
708 cmp(ch1, ch2);
709 br(NE, BMSKIP);
710 BIND(BMMATCH);
711 sub(result, str2, tmp5);
712 if (!str2_isL) lsr(result, result, 1);
713 add(sp, sp, ASIZE);
714 b(DONE);
715
716 BIND(LINEARSTUB);
717 cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
718 br(LT, LINEAR_MEDIUM);
719 mov(result, zr);
720 RuntimeAddress stub = NULL;
721 if (isL) {
722 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
723 assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
724 } else if (str1_isL) {
725 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
726 assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
727 } else {
728 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
729 assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
730 }
731 trampoline_call(stub);
732 b(DONE);
733 }
734
735 BIND(LINEARSEARCH);
736 {
737 Label DO1, DO2, DO3;
738
739 Register str2tmp = tmp2;
740 Register first = tmp3;
741
742 if (icnt1 == -1)
743 {
744 Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
745
746 cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
747 br(LT, DOSHORT);
748 BIND(LINEAR_MEDIUM);
749 (this->*str1_load_1chr)(first, Address(str1));
750 lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
751 sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
752 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
753 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
754
755 BIND(FIRST_LOOP);
756 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
757 cmp(first, ch2);
758 br(EQ, STR1_LOOP);
759 BIND(STR2_NEXT);
760 adds(cnt2_neg, cnt2_neg, str2_chr_size);
761 br(LE, FIRST_LOOP);
762 b(NOMATCH);
763
764 BIND(STR1_LOOP);
765 adds(cnt1tmp, cnt1_neg, str1_chr_size);
766 add(cnt2tmp, cnt2_neg, str2_chr_size);
767 br(GE, MATCH);
768
769 BIND(STR1_NEXT);
770 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
771 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
772 cmp(ch1, ch2);
773 br(NE, STR2_NEXT);
774 adds(cnt1tmp, cnt1tmp, str1_chr_size);
775 add(cnt2tmp, cnt2tmp, str2_chr_size);
776 br(LT, STR1_NEXT);
777 b(MATCH);
778
779 BIND(DOSHORT);
780 if (str1_isL == str2_isL) {
781 cmp(cnt1, (u1)2);
782 br(LT, DO1);
783 br(GT, DO3);
784 }
785 }
786
787 if (icnt1 == 4) {
788 Label CH1_LOOP;
789
790 (this->*load_4chr)(ch1, str1);
791 sub(result_tmp, cnt2, 4);
792 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
793 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
794
795 BIND(CH1_LOOP);
796 (this->*load_4chr)(ch2, Address(str2, cnt2_neg));
797 cmp(ch1, ch2);
798 br(EQ, MATCH);
799 adds(cnt2_neg, cnt2_neg, str2_chr_size);
800 br(LE, CH1_LOOP);
801 b(NOMATCH);
802 }
803
804 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
805 Label CH1_LOOP;
806
807 BIND(DO2);
808 (this->*load_2chr)(ch1, str1);
809 if (icnt1 == 2) {
810 sub(result_tmp, cnt2, 2);
811 }
812 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
813 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
814 BIND(CH1_LOOP);
815 (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
816 cmp(ch1, ch2);
817 br(EQ, MATCH);
818 adds(cnt2_neg, cnt2_neg, str2_chr_size);
819 br(LE, CH1_LOOP);
820 b(NOMATCH);
821 }
822
823 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
824 Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
825
826 BIND(DO3);
827 (this->*load_2chr)(first, str1);
828 (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
829 if (icnt1 == 3) {
830 sub(result_tmp, cnt2, 3);
831 }
832 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
833 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
834 BIND(FIRST_LOOP);
835 (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
836 cmpw(first, ch2);
837 br(EQ, STR1_LOOP);
838 BIND(STR2_NEXT);
839 adds(cnt2_neg, cnt2_neg, str2_chr_size);
840 br(LE, FIRST_LOOP);
841 b(NOMATCH);
842
843 BIND(STR1_LOOP);
844 add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
845 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
846 cmp(ch1, ch2);
847 br(NE, STR2_NEXT);
848 b(MATCH);
849 }
850
851 if (icnt1 == -1 || icnt1 == 1) {
852 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
853
854 BIND(DO1);
855 (this->*str1_load_1chr)(ch1, str1);
856 cmp(cnt2, (u1)8);
857 br(LT, DO1_SHORT);
858
859 sub(result_tmp, cnt2, 8/str2_chr_size);
860 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
861 mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
862 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
863
864 if (str2_isL) {
865 orr(ch1, ch1, ch1, LSL, 8);
866 }
867 orr(ch1, ch1, ch1, LSL, 16);
868 orr(ch1, ch1, ch1, LSL, 32);
869 BIND(CH1_LOOP);
870 ldr(ch2, Address(str2, cnt2_neg));
871 eor(ch2, ch1, ch2);
872 sub(tmp1, ch2, tmp3);
873 orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
874 bics(tmp1, tmp1, tmp2);
875 br(NE, HAS_ZERO);
876 adds(cnt2_neg, cnt2_neg, 8);
877 br(LT, CH1_LOOP);
878
879 cmp(cnt2_neg, (u1)8);
880 mov(cnt2_neg, 0);
881 br(LT, CH1_LOOP);
882 b(NOMATCH);
883
884 BIND(HAS_ZERO);
885 rev(tmp1, tmp1);
886 clz(tmp1, tmp1);
887 add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
888 b(MATCH);
889
890 BIND(DO1_SHORT);
891 mov(result_tmp, cnt2);
892 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
893 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
894 BIND(DO1_LOOP);
895 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
896 cmpw(ch1, ch2);
897 br(EQ, MATCH);
898 adds(cnt2_neg, cnt2_neg, str2_chr_size);
899 br(LT, DO1_LOOP);
900 }
901 }
902 BIND(NOMATCH);
903 mov(result, -1);
904 b(DONE);
905 BIND(MATCH);
906 add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
907 BIND(DONE);
908 }
909
910 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
911 typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
912
913 void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
914 Register ch, Register result,
915 Register tmp1, Register tmp2, Register tmp3)
916 {
917 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
918 Register cnt1_neg = cnt1;
919 Register ch1 = rscratch1;
920 Register result_tmp = rscratch2;
921
922 cbz(cnt1, NOMATCH);
923
924 cmp(cnt1, (u1)4);
925 br(LT, DO1_SHORT);
926
927 orr(ch, ch, ch, LSL, 16);
928 orr(ch, ch, ch, LSL, 32);
929
930 sub(cnt1, cnt1, 4);
931 mov(result_tmp, cnt1);
932 lea(str1, Address(str1, cnt1, Address::uxtw(1)));
933 sub(cnt1_neg, zr, cnt1, LSL, 1);
934
935 mov(tmp3, 0x0001000100010001);
936
937 BIND(CH1_LOOP);
938 ldr(ch1, Address(str1, cnt1_neg));
939 eor(ch1, ch, ch1);
940 sub(tmp1, ch1, tmp3);
941 orr(tmp2, ch1, 0x7fff7fff7fff7fff);
942 bics(tmp1, tmp1, tmp2);
943 br(NE, HAS_ZERO);
944 adds(cnt1_neg, cnt1_neg, 8);
945 br(LT, CH1_LOOP);
946
947 cmp(cnt1_neg, (u1)8);
948 mov(cnt1_neg, 0);
949 br(LT, CH1_LOOP);
950 b(NOMATCH);
951
952 BIND(HAS_ZERO);
953 rev(tmp1, tmp1);
954 clz(tmp1, tmp1);
955 add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
956 b(MATCH);
957
958 BIND(DO1_SHORT);
959 mov(result_tmp, cnt1);
960 lea(str1, Address(str1, cnt1, Address::uxtw(1)));
961 sub(cnt1_neg, zr, cnt1, LSL, 1);
962 BIND(DO1_LOOP);
963 ldrh(ch1, Address(str1, cnt1_neg));
964 cmpw(ch, ch1);
965 br(EQ, MATCH);
966 adds(cnt1_neg, cnt1_neg, 2);
967 br(LT, DO1_LOOP);
968 BIND(NOMATCH);
969 mov(result, -1);
970 b(DONE);
971 BIND(MATCH);
972 add(result, result_tmp, cnt1_neg, ASR, 1);
973 BIND(DONE);
974 }
975
976 void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
977 Register ch, Register result,
978 Register tmp1, Register tmp2, Register tmp3)
979 {
980 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
981 Register cnt1_neg = cnt1;
982 Register ch1 = rscratch1;
983 Register result_tmp = rscratch2;
984
985 cbz(cnt1, NOMATCH);
986
987 cmp(cnt1, (u1)8);
988 br(LT, DO1_SHORT);
989
990 orr(ch, ch, ch, LSL, 8);
991 orr(ch, ch, ch, LSL, 16);
992 orr(ch, ch, ch, LSL, 32);
993
994 sub(cnt1, cnt1, 8);
995 mov(result_tmp, cnt1);
996 lea(str1, Address(str1, cnt1));
997 sub(cnt1_neg, zr, cnt1);
998
999 mov(tmp3, 0x0101010101010101);
1000
1001 BIND(CH1_LOOP);
1002 ldr(ch1, Address(str1, cnt1_neg));
1003 eor(ch1, ch, ch1);
1004 sub(tmp1, ch1, tmp3);
1005 orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);
1006 bics(tmp1, tmp1, tmp2);
1007 br(NE, HAS_ZERO);
1008 adds(cnt1_neg, cnt1_neg, 8);
1009 br(LT, CH1_LOOP);
1010
1011 cmp(cnt1_neg, (u1)8);
1012 mov(cnt1_neg, 0);
1013 br(LT, CH1_LOOP);
1014 b(NOMATCH);
1015
1016 BIND(HAS_ZERO);
1017 rev(tmp1, tmp1);
1018 clz(tmp1, tmp1);
1019 add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
1020 b(MATCH);
1021
1022 BIND(DO1_SHORT);
1023 mov(result_tmp, cnt1);
1024 lea(str1, Address(str1, cnt1));
1025 sub(cnt1_neg, zr, cnt1);
1026 BIND(DO1_LOOP);
1027 ldrb(ch1, Address(str1, cnt1_neg));
1028 cmp(ch, ch1);
1029 br(EQ, MATCH);
1030 adds(cnt1_neg, cnt1_neg, 1);
1031 br(LT, DO1_LOOP);
1032 BIND(NOMATCH);
1033 mov(result, -1);
1034 b(DONE);
1035 BIND(MATCH);
1036 add(result, result_tmp, cnt1_neg);
1037 BIND(DONE);
1038 }
1039
1040 // Compare strings.
1041 void C2_MacroAssembler::string_compare(Register str1, Register str2,
1042 Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
1043 FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
1044 Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
1045 DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
1046 SHORT_LOOP_START, TAIL_CHECK;
1047
1048 bool isLL = ae == StrIntrinsicNode::LL;
1049 bool isLU = ae == StrIntrinsicNode::LU;
1050 bool isUL = ae == StrIntrinsicNode::UL;
1051
1052 // The stub threshold for LL strings is: 72 (64 + 8) chars
1053 // UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
1054 // LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
1055 const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
1056
1057 bool str1_isL = isLL || isLU;
1058 bool str2_isL = isLL || isUL;
1059
1060 int str1_chr_shift = str1_isL ? 0 : 1;
1061 int str2_chr_shift = str2_isL ? 0 : 1;
1062 int str1_chr_size = str1_isL ? 1 : 2;
1063 int str2_chr_size = str2_isL ? 1 : 2;
1064 int minCharsInWord = isLL ? wordSize : wordSize/2;
1065
1066 FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
1067 chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
1068 (chr_insn)&MacroAssembler::ldrh;
1069 chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
1070 (chr_insn)&MacroAssembler::ldrh;
1071 uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
1072 (uxt_insn)&MacroAssembler::uxthw;
1073
1074 BLOCK_COMMENT("string_compare {");
1075
1076 // Bizzarely, the counts are passed in bytes, regardless of whether they
1077 // are L or U strings, however the result is always in characters.
1078 if (!str1_isL) asrw(cnt1, cnt1, 1);
1079 if (!str2_isL) asrw(cnt2, cnt2, 1);
1080
1081 // Compute the minimum of the string lengths and save the difference.
1082 subsw(result, cnt1, cnt2);
1083 cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
1084
1085 // A very short string
1086 cmpw(cnt2, minCharsInWord);
1087 br(Assembler::LE, SHORT_STRING);
1088
1089 // Compare longwords
1090 // load first parts of strings and finish initialization while loading
1091 {
1092 if (str1_isL == str2_isL) { // LL or UU
1093 ldr(tmp1, Address(str1));
1094 cmp(str1, str2);
1095 br(Assembler::EQ, DONE);
1096 ldr(tmp2, Address(str2));
1097 cmp(cnt2, stub_threshold);
1098 br(GE, STUB);
1099 subsw(cnt2, cnt2, minCharsInWord);
1100 br(EQ, TAIL_CHECK);
1101 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
1102 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
1103 sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
1104 } else if (isLU) {
1105 ldrs(vtmp, Address(str1));
1106 ldr(tmp2, Address(str2));
1107 cmp(cnt2, stub_threshold);
1108 br(GE, STUB);
1109 subw(cnt2, cnt2, 4);
1110 eor(vtmpZ, T16B, vtmpZ, vtmpZ);
1111 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
1112 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
1113 zip1(vtmp, T8B, vtmp, vtmpZ);
1114 sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
1115 sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
1116 add(cnt1, cnt1, 4);
1117 fmovd(tmp1, vtmp);
1118 } else { // UL case
1119 ldr(tmp1, Address(str1));
1120 ldrs(vtmp, Address(str2));
1121 cmp(cnt2, stub_threshold);
1122 br(GE, STUB);
1123 subw(cnt2, cnt2, 4);
1124 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
1125 eor(vtmpZ, T16B, vtmpZ, vtmpZ);
1126 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
1127 sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
1128 zip1(vtmp, T8B, vtmp, vtmpZ);
1129 sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
1130 add(cnt1, cnt1, 8);
1131 fmovd(tmp2, vtmp);
1132 }
1133 adds(cnt2, cnt2, isUL ? 4 : 8);
1134 br(GE, TAIL);
1135 eor(rscratch2, tmp1, tmp2);
1136 cbnz(rscratch2, DIFF);
1137 // main loop
1138 bind(NEXT_WORD);
1139 if (str1_isL == str2_isL) {
1140 ldr(tmp1, Address(str1, cnt2));
1141 ldr(tmp2, Address(str2, cnt2));
1142 adds(cnt2, cnt2, 8);
1143 } else if (isLU) {
1144 ldrs(vtmp, Address(str1, cnt1));
1145 ldr(tmp2, Address(str2, cnt2));
1146 add(cnt1, cnt1, 4);
1147 zip1(vtmp, T8B, vtmp, vtmpZ);
1148 fmovd(tmp1, vtmp);
1149 adds(cnt2, cnt2, 8);
1150 } else { // UL
1151 ldrs(vtmp, Address(str2, cnt2));
1152 ldr(tmp1, Address(str1, cnt1));
1153 zip1(vtmp, T8B, vtmp, vtmpZ);
1154 add(cnt1, cnt1, 8);
1155 fmovd(tmp2, vtmp);
1156 adds(cnt2, cnt2, 4);
1157 }
1158 br(GE, TAIL);
1159
1160 eor(rscratch2, tmp1, tmp2);
1161 cbz(rscratch2, NEXT_WORD);
1162 b(DIFF);
1163 bind(TAIL);
1164 eor(rscratch2, tmp1, tmp2);
1165 cbnz(rscratch2, DIFF);
1166 // Last longword. In the case where length == 4 we compare the
1167 // same longword twice, but that's still faster than another
1168 // conditional branch.
1169 if (str1_isL == str2_isL) {
1170 ldr(tmp1, Address(str1));
1171 ldr(tmp2, Address(str2));
1172 } else if (isLU) {
1173 ldrs(vtmp, Address(str1));
1174 ldr(tmp2, Address(str2));
1175 zip1(vtmp, T8B, vtmp, vtmpZ);
1176 fmovd(tmp1, vtmp);
1177 } else { // UL
1178 ldrs(vtmp, Address(str2));
1179 ldr(tmp1, Address(str1));
1180 zip1(vtmp, T8B, vtmp, vtmpZ);
1181 fmovd(tmp2, vtmp);
1182 }
1183 bind(TAIL_CHECK);
1184 eor(rscratch2, tmp1, tmp2);
1185 cbz(rscratch2, DONE);
1186
1187 // Find the first different characters in the longwords and
1188 // compute their difference.
1189 bind(DIFF);
1190 rev(rscratch2, rscratch2);
1191 clz(rscratch2, rscratch2);
1192 andr(rscratch2, rscratch2, isLL ? -8 : -16);
1193 lsrv(tmp1, tmp1, rscratch2);
1194 (this->*ext_chr)(tmp1, tmp1);
1195 lsrv(tmp2, tmp2, rscratch2);
1196 (this->*ext_chr)(tmp2, tmp2);
1197 subw(result, tmp1, tmp2);
1198 b(DONE);
1199 }
1200
1201 bind(STUB);
1202 RuntimeAddress stub = NULL;
1203 switch(ae) {
1204 case StrIntrinsicNode::LL:
1205 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
1206 break;
1207 case StrIntrinsicNode::UU:
1208 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
1209 break;
1210 case StrIntrinsicNode::LU:
1211 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
1212 break;
1213 case StrIntrinsicNode::UL:
1214 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
1215 break;
1216 default:
1217 ShouldNotReachHere();
1218 }
1219 assert(stub.target() != NULL, "compare_long_string stub has not been generated");
1220 trampoline_call(stub);
1221 b(DONE);
1222
1223 bind(SHORT_STRING);
1224 // Is the minimum length zero?
1225 cbz(cnt2, DONE);
1226 // arrange code to do most branches while loading and loading next characters
1227 // while comparing previous
1228 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
1229 subs(cnt2, cnt2, 1);
1230 br(EQ, SHORT_LAST_INIT);
1231 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
1232 b(SHORT_LOOP_START);
1233 bind(SHORT_LOOP);
1234 subs(cnt2, cnt2, 1);
1235 br(EQ, SHORT_LAST);
1236 bind(SHORT_LOOP_START);
1237 (this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
1238 (this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
1239 cmp(tmp1, cnt1);
1240 br(NE, SHORT_LOOP_TAIL);
1241 subs(cnt2, cnt2, 1);
1242 br(EQ, SHORT_LAST2);
1243 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
1244 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
1245 cmp(tmp2, rscratch1);
1246 br(EQ, SHORT_LOOP);
1247 sub(result, tmp2, rscratch1);
1248 b(DONE);
1249 bind(SHORT_LOOP_TAIL);
1250 sub(result, tmp1, cnt1);
1251 b(DONE);
1252 bind(SHORT_LAST2);
1253 cmp(tmp2, rscratch1);
1254 br(EQ, DONE);
1255 sub(result, tmp2, rscratch1);
1256
1257 b(DONE);
1258 bind(SHORT_LAST_INIT);
1259 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
1260 bind(SHORT_LAST);
1261 cmp(tmp1, cnt1);
1262 br(EQ, DONE);
1263 sub(result, tmp1, cnt1);
1264
1265 bind(DONE);
1266
1267 BLOCK_COMMENT("} string_compare");
1268 }
1269
1270 void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
1271 FloatRegister src2, int cond, bool isQ) {
1272 SIMD_Arrangement size = esize2arrangement(type2aelembytes(bt), isQ);
1273 if (bt == T_FLOAT || bt == T_DOUBLE) {
1274 switch (cond) {
1275 case BoolTest::eq: fcmeq(dst, size, src1, src2); break;
1276 case BoolTest::ne: {
1277 fcmeq(dst, size, src1, src2);
1278 notr(dst, T16B, dst);
1279 break;
1280 }
1281 case BoolTest::ge: fcmge(dst, size, src1, src2); break;
1282 case BoolTest::gt: fcmgt(dst, size, src1, src2); break;
1283 case BoolTest::le: fcmge(dst, size, src2, src1); break;
1284 case BoolTest::lt: fcmgt(dst, size, src2, src1); break;
1285 default:
1286 assert(false, "unsupported");
1287 ShouldNotReachHere();
1288 }
1289 } else {
1290 switch (cond) {
1291 case BoolTest::eq: cmeq(dst, size, src1, src2); break;
1292 case BoolTest::ne: {
1293 cmeq(dst, size, src1, src2);
1294 notr(dst, T16B, dst);
1295 break;
1296 }
1297 case BoolTest::ge: cmge(dst, size, src1, src2); break;
1298 case BoolTest::gt: cmgt(dst, size, src1, src2); break;
1299 case BoolTest::le: cmge(dst, size, src2, src1); break;
1300 case BoolTest::lt: cmgt(dst, size, src2, src1); break;
1301 case BoolTest::uge: cmhs(dst, size, src1, src2); break;
1302 case BoolTest::ugt: cmhi(dst, size, src1, src2); break;
1303 case BoolTest::ult: cmhi(dst, size, src2, src1); break;
1304 case BoolTest::ule: cmhs(dst, size, src2, src1); break;
1305 default:
1306 assert(false, "unsupported");
1307 ShouldNotReachHere();
1308 }
1309 }
1310 }
1311
1312 void C2_MacroAssembler::load_nklass_compact(Register dst, Register obj, Register index, int scale, int disp) {
1313 C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
1314 Compile::current()->output()->add_stub(stub);
1315
1316 // Note: Don't clobber obj anywhere in that method!
1317
1318 // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
1319 // obj-start, so that we can load from the object's mark-word instead. Usually the address
1320 // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
1321 // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
1322 // then passes that register as obj and 0 in disp. The following code extracts the base
1323 // and offset to load the mark-word.
1324 int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
1325 if (index == noreg) {
1326 ldr(dst, Address(obj, offset));
1327 } else {
1328 lea(dst, Address(obj, index, Address::lsl(scale)));
1329 ldr(dst, Address(dst, offset));
1330 }
1331 // NOTE: We can't use tbnz here, because the target is sometimes too far away
1332 // and cannot be encoded.
1333 tst(dst, markWord::monitor_value);
1334 br(Assembler::NE, stub->entry());
1335 bind(stub->continuation());
1336 lsr(dst, dst, markWord::klass_shift);
1337 }