1 /*
2 * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
4 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interp_masm.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45 #ifdef COMPILER2
46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
47 #include "opto/output.hpp"
48 #endif
49
50 #define __ masm->
51
52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
53 Register src, Register dst, Register count, RegSet saved_regs) {
54 if (is_oop) {
55 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
56 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
57
58 Label done;
59
60 // Avoid calling runtime if count == 0
61 __ cbz(count, done);
62
63 // Is GC active?
64 assert(!saved_regs.contains(rscratch1), "Sanity: about to clobber rscratch1");
65 assert(!saved_regs.contains(rscratch2), "Sanity: about to clobber rscratch2");
66 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
67 __ ldrb(rscratch1, gc_state);
68 if (ShenandoahSATBBarrier && dest_uninitialized) {
69 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
70 } else {
71 __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
72 __ tst(rscratch1, rscratch2);
73 __ br(Assembler::EQ, done);
74 }
75
76 __ push_call_clobbered_registers();
77 // If arguments are not in proper places, shuffle them.
78 // Doing this via the stack is the most straight-forward way to avoid
79 // accidentally smashing any register.
80 if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
81 __ push(RegSet::of(src), sp);
82 __ push(RegSet::of(dst), sp);
83 __ push(RegSet::of(count), sp);
84 __ pop(RegSet::of(c_rarg2), sp);
85 __ pop(RegSet::of(c_rarg1), sp);
86 __ pop(RegSet::of(c_rarg0), sp);
87 }
88 address target = nullptr;
89 if (UseCompressedOops) {
90 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
91 } else {
92 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
93 }
94 __ call_VM_leaf(target, 3);
95 __ pop_call_clobbered_registers();
96 __ bind(done);
97 }
98 }
99 }
100
101 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
102 Register start, Register count, Register tmp) {
103 if (ShenandoahCardBarrier && is_oop) {
104 gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
105 }
106 }
107
108 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
109 Register obj,
110 Register pre_val,
111 Register thread,
112 Register tmp1,
113 Register tmp2) {
114 assert(ShenandoahSATBBarrier, "Should be checked by caller");
115 assert(thread == rthread, "must be");
116
117 Label done;
118 Label runtime;
119
120 assert_different_registers(obj, pre_val, tmp1, tmp2);
121 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
122
123 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
124 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
125
126 // Is marking active?
127 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
128 __ ldrb(tmp1, gc_state);
129 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, done);
130
131 // Do we need to load the previous value?
132 if (obj != noreg) {
133 if (UseCompressedOops) {
134 __ ldrw(pre_val, Address(obj, 0));
135 __ decode_heap_oop(pre_val);
136 } else {
137 __ ldr(pre_val, Address(obj, 0));
138 }
139 }
140
141 // Is the previous value null?
142 __ cbz(pre_val, done);
143
144 // Can we store original value in the thread's buffer?
145 // Is index == 0?
146 // (The index field is typed as size_t.)
147
148 __ ldr(tmp1, index); // tmp := *index_adr
149 __ cbz(tmp1, runtime); // tmp == 0?
150 // If yes, goto runtime
151
152 __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
153 __ str(tmp1, index); // *index_adr := tmp
154 __ ldr(tmp2, buffer);
155 __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
156
157 // Record the previous value
158 __ str(pre_val, Address(tmp1, 0));
159 __ b(done);
160
161 __ bind(runtime);
162
163 // Slow-path call
164 __ enter(/* strip_ret_addr = */ true);
165 __ push_call_clobbered_registers();
166 if (c_rarg0 != pre_val) {
167 __ mov(c_rarg0, pre_val);
168 }
169 // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
170 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
171 __ pop_call_clobbered_registers();
172 __ leave();
173
174 __ bind(done);
175 }
176
177 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
178 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
179 Label is_null;
180 __ cbz(dst, is_null);
181 resolve_forward_pointer_not_null(masm, dst, tmp);
182 __ bind(is_null);
183 }
184
185 // IMPORTANT: This must preserve all registers, even rscratch1 and rscratch2, except those explicitly
186 // passed in.
187 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
188 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
189 // The below loads the mark word, checks if the lowest two bits are
190 // set, and if so, clear the lowest two bits and copy the result
191 // to dst. Otherwise it leaves dst alone.
192 // Implementing this is surprisingly awkward. I do it here by:
193 // - Inverting the mark word
194 // - Test lowest two bits == 0
195 // - If so, set the lowest two bits
196 // - Invert the result back, and copy to dst
197
198 bool borrow_reg = (tmp == noreg);
199 if (borrow_reg) {
200 // No free registers available. Make one useful.
201 tmp = rscratch1;
202 if (tmp == dst) {
203 tmp = rscratch2;
204 }
205 __ push(RegSet::of(tmp), sp);
206 }
207
208 assert_different_registers(tmp, dst);
209
210 Label done;
211 __ ldr(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
212 __ eon(tmp, tmp, zr);
213 __ ands(zr, tmp, markWord::lock_mask_in_place);
214 __ br(Assembler::NE, done);
215 __ orr(tmp, tmp, markWord::marked_value);
216 __ eon(dst, tmp, zr);
217 __ bind(done);
218
219 if (borrow_reg) {
220 __ pop(RegSet::of(tmp), sp);
221 }
222 }
223
224 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) {
225 assert(ShenandoahLoadRefBarrier, "Should be enabled");
226 assert(dst != rscratch2, "need rscratch2");
227 assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
228
229 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
230 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
231 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
232 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
233 bool is_narrow = UseCompressedOops && !is_native;
234
235 Label heap_stable, not_cset;
236 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
237 __ ldrb(rscratch2, gc_state);
238
239 // Check for heap stability
240 if (is_strong) {
241 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
242 } else {
243 Label lrb;
244 __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, lrb);
245 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
246 __ bind(lrb);
247 }
248
249 // use r1 for load address
250 Register result_dst = dst;
251 if (dst == r1) {
252 __ mov(rscratch1, dst);
253 dst = rscratch1;
254 }
255
256 // Save r0 and r1, unless it is an output register
257 RegSet to_save = RegSet::of(r0, r1) - result_dst;
258 __ push(to_save, sp);
259 __ lea(r1, load_addr);
260 __ mov(r0, dst);
261
262 // Test for in-cset
263 if (is_strong) {
264 if (AOTCodeCache::is_on_for_dump()) {
265 __ lea(rscratch2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
266 __ ldr(rscratch2, Address(rscratch2));
267 __ lea(rscratch1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
268 __ ldrw(rscratch1, Address(rscratch1));
269 __ lsrv(rscratch1, r0, rscratch1);
270 } else {
271 __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
272 __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
273 }
274 __ ldrb(rscratch2, Address(rscratch2, rscratch1));
275 __ tbz(rscratch2, 0, not_cset);
276 }
277
278 // Slow-path call
279 __ enter(/* strip_ret_addr = */ true);
280 __ push_call_clobbered_registers();
281 address target = nullptr;
282 if (is_strong) {
283 if (is_narrow) {
284 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
285 } else {
286 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
287 }
288 } else if (is_weak) {
289 if (is_narrow) {
290 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
291 } else {
292 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
293 }
294 } else {
295 assert(is_phantom, "only remaining strength");
296 assert(!is_narrow, "phantom access cannot be narrow");
297 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
298 }
299 // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
300 __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
301 __ mov(rscratch1, r0);
302 __ pop_call_clobbered_registers();
303 __ mov(r0, rscratch1);
304 __ leave();
305
306 __ bind(not_cset);
307
308 __ mov(result_dst, r0);
309 __ pop(to_save, sp);
310
311 __ bind(heap_stable);
312 }
313
314 //
315 // Arguments:
316 //
317 // Inputs:
318 // src: oop location to load from, might be clobbered
319 //
320 // Output:
321 // dst: oop loaded from src location
322 //
323 // Kill:
324 // rscratch1 (scratch reg)
325 //
326 // Alias:
327 // dst: rscratch1 (might use rscratch1 as temporary output register to avoid clobbering src)
328 //
329 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
330 Register dst, Address src, Register tmp1, Register tmp2) {
331 // 1: non-reference load, no additional barrier is needed
332 if (!is_reference_type(type)) {
333 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
334 return;
335 }
336
337 // 2: load a reference from src location and apply LRB if needed
338 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
339 Register result_dst = dst;
340
341 // Preserve src location for LRB
342 if (dst == src.base() || dst == src.index()) {
343 dst = rscratch1;
344 }
345 assert_different_registers(dst, src.base(), src.index());
346
347 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
348
349 load_reference_barrier(masm, dst, src, decorators);
350
351 if (dst != result_dst) {
352 __ mov(result_dst, dst);
353 dst = result_dst;
354 }
355 } else {
356 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
357 }
358
359 // 3: apply keep-alive barrier if needed
360 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
361 satb_barrier(masm /* masm */,
362 noreg /* obj */,
363 dst /* pre_val */,
364 rthread /* thread */,
365 tmp1 /* tmp1 */,
366 tmp2 /* tmp2 */);
367 }
368 }
369
370 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
371 assert(ShenandoahCardBarrier, "Should have been checked by caller");
372
373 __ lsr(obj, obj, CardTable::card_shift());
374
375 assert(CardTable::dirty_card_val() == 0, "must be");
376
377 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
378 __ ldr(rscratch1, curr_ct_holder_addr);
379
380 if (UseCondCardMark) {
381 Label L_already_dirty;
382 __ ldrb(rscratch2, Address(obj, rscratch1));
383 __ cbz(rscratch2, L_already_dirty);
384 __ strb(zr, Address(obj, rscratch1));
385 __ bind(L_already_dirty);
386 } else {
387 __ strb(zr, Address(obj, rscratch1));
388 }
389 }
390
391 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
392 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
393 // 1: non-reference types require no barriers
394 if (!is_reference_type(type)) {
395 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
396 return;
397 }
398
399 // Flatten object address right away for simplicity: likely needed by barriers
400 if (dst.index() == noreg && dst.offset() == 0) {
401 if (dst.base() != tmp3) {
402 __ mov(tmp3, dst.base());
403 }
404 } else {
405 __ lea(tmp3, dst);
406 }
407
408 // 2: pre-barrier: SATB needs the previous value
409 if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
410 satb_barrier(masm,
411 tmp3 /* obj */,
412 tmp2 /* pre_val */,
413 rthread /* thread */,
414 tmp1 /* tmp */,
415 rscratch1 /* tmp2 */);
416 }
417
418 // Store!
419 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
420
421 // 3: post-barrier: card barrier needs store address
422 bool storing_non_null = (val != noreg);
423 if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
424 card_barrier(masm, tmp3);
425 }
426 }
427
428 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
429 Register obj, Register tmp, Label& slowpath) {
430 Label done;
431 // Resolve jobject
432 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
433
434 // Check for null.
435 __ cbz(obj, done);
436
437 assert(obj != rscratch2, "need rscratch2");
438 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
439 __ lea(rscratch2, gc_state);
440 __ ldrb(rscratch2, Address(rscratch2));
441
442 // Check for heap in evacuation phase
443 __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, slowpath);
444
445 __ bind(done);
446 }
447
448 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj,
449 Register tmp, Label& slow_path) {
450 assert_different_registers(weak_handle, tmp, noreg);
451 assert_different_registers(obj, tmp, noreg);
452
453 Label done;
454
455 // Peek weak handle using the standard implementation.
456 BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, tmp, slow_path);
457
458 // Check if the reference is null, and if it is, take the fast path.
459 __ cbz(obj, done);
460
461 Address gc_state(rthread, ShenandoahThreadLocalData::gc_state_offset());
462 __ lea(tmp, gc_state);
463 __ ldrb(tmp, __ legitimize_address(gc_state, 1, tmp));
464
465 // Check if the heap is under weak-reference/roots processing, in
466 // which case we need to take the slow path.
467 __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, slow_path);
468 __ bind(done);
469 }
470
471 // Special Shenandoah CAS implementation that handles false negatives due
472 // to concurrent evacuation. The service is more complex than a
473 // traditional CAS operation because the CAS operation is intended to
474 // succeed if the reference at addr exactly matches expected or if the
475 // reference at addr holds a pointer to a from-space object that has
476 // been relocated to the location named by expected. There are two
477 // races that must be addressed:
478 // a) A parallel thread may mutate the contents of addr so that it points
479 // to a different object. In this case, the CAS operation should fail.
480 // b) A parallel thread may heal the contents of addr, replacing a
481 // from-space pointer held in addr with the to-space pointer
482 // representing the new location of the object.
483 // Upon entry to cmpxchg_oop, it is assured that new_val equals null
484 // or it refers to an object that is not being evacuated out of
485 // from-space, or it refers to the to-space version of an object that
486 // is being evacuated out of from-space.
487 //
488 // By default the value held in the result register following execution
489 // of the generated code sequence is 0 to indicate failure of CAS,
490 // non-zero to indicate success. If is_cae, the result is the value most
491 // recently fetched from addr rather than a boolean success indicator.
492 //
493 // Clobbers rscratch1, rscratch2
494 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
495 Register addr,
496 Register expected,
497 Register new_val,
498 bool acquire, bool release,
499 bool is_cae,
500 Register result) {
501 Register tmp1 = rscratch1;
502 Register tmp2 = rscratch2;
503 bool is_narrow = UseCompressedOops;
504 Assembler::operand_size size = is_narrow ? Assembler::word : Assembler::xword;
505
506 assert_different_registers(addr, expected, tmp1, tmp2);
507 assert_different_registers(addr, new_val, tmp1, tmp2);
508
509 Label step4, done;
510
511 // There are two ways to reach this label. Initial entry into the
512 // cmpxchg_oop code expansion starts at step1 (which is equivalent
513 // to label step4). Additionally, in the rare case that four steps
514 // are required to perform the requested operation, the fourth step
515 // is the same as the first. On a second pass through step 1,
516 // control may flow through step 2 on its way to failure. It will
517 // not flow from step 2 to step 3 since we are assured that the
518 // memory at addr no longer holds a from-space pointer.
519 //
520 // The comments that immediately follow the step4 label apply only
521 // to the case in which control reaches this label by branch from
522 // step 3.
523
524 __ bind (step4);
525
526 // Step 4. CAS has failed because the value most recently fetched
527 // from addr is no longer the from-space pointer held in tmp2. If a
528 // different thread replaced the in-memory value with its equivalent
529 // to-space pointer, then CAS may still be able to succeed. The
530 // value held in the expected register has not changed.
531 //
532 // It is extremely rare we reach this point. For this reason, the
533 // implementation opts for smaller rather than potentially faster
534 // code. Ultimately, smaller code for this rare case most likely
535 // delivers higher overall throughput by enabling improved icache
536 // performance.
537
538 // Step 1. Fast-path.
539 //
540 // Try to CAS with given arguments. If successful, then we are done.
541 //
542 // No label required for step 1.
543
544 __ cmpxchg(addr, expected, new_val, size, acquire, release, false, tmp2);
545 // EQ flag set iff success. tmp2 holds value fetched.
546
547 // If expected equals null but tmp2 does not equal null, the
548 // following branches to done to report failure of CAS. If both
549 // expected and tmp2 equal null, the following branches to done to
550 // report success of CAS. There's no need for a special test of
551 // expected equal to null.
552
553 __ br(Assembler::EQ, done);
554 // if CAS failed, fall through to step 2
555
556 // Step 2. CAS has failed because the value held at addr does not
557 // match expected. This may be a false negative because the value fetched
558 // from addr (now held in tmp2) may be a from-space pointer to the
559 // original copy of same object referenced by to-space pointer expected.
560 //
561 // To resolve this, it suffices to find the forward pointer associated
562 // with fetched value. If this matches expected, retry CAS with new
563 // parameters. If this mismatches, then we have a legitimate
564 // failure, and we're done.
565 //
566 // No need for step2 label.
567
568 // overwrite tmp1 with from-space pointer fetched from memory
569 __ mov(tmp1, tmp2);
570
571 if (is_narrow) {
572 // Decode tmp1 in order to resolve its forward pointer
573 __ decode_heap_oop(tmp1, tmp1);
574 }
575 resolve_forward_pointer(masm, tmp1);
576 // Encode tmp1 to compare against expected.
577 __ encode_heap_oop(tmp1, tmp1);
578
579 // Does forwarded value of fetched from-space pointer match original
580 // value of expected? If tmp1 holds null, this comparison will fail
581 // because we know from step1 that expected is not null. There is
582 // no need for a separate test for tmp1 (the value originally held
583 // in memory) equal to null.
584 __ cmp(tmp1, expected);
585
586 // If not, then the failure was legitimate and we're done.
587 // Branching to done with NE condition denotes failure.
588 __ br(Assembler::NE, done);
589
590 // Fall through to step 3. No need for step3 label.
591
592 // Step 3. We've confirmed that the value originally held in memory
593 // (now held in tmp2) pointed to from-space version of original
594 // expected value. Try the CAS again with the from-space expected
595 // value. If it now succeeds, we're good.
596 //
597 // Note: tmp2 holds encoded from-space pointer that matches to-space
598 // object residing at expected. tmp2 is the new "expected".
599
600 // Note that macro implementation of __cmpxchg cannot use same register
601 // tmp2 for result and expected since it overwrites result before it
602 // compares result with expected.
603 __ cmpxchg(addr, tmp2, new_val, size, acquire, release, false, noreg);
604 // EQ flag set iff success. tmp2 holds value fetched, tmp1 (rscratch1) clobbered.
605
606 // If fetched value did not equal the new expected, this could
607 // still be a false negative because some other thread may have
608 // newly overwritten the memory value with its to-space equivalent.
609 __ br(Assembler::NE, step4);
610
611 if (is_cae) {
612 // We're falling through to done to indicate success. Success
613 // with is_cae is denoted by returning the value of expected as
614 // result.
615 __ mov(tmp2, expected);
616 }
617
618 __ bind(done);
619 // At entry to done, the Z (EQ) flag is on iff if the CAS
620 // operation was successful. Additionally, if is_cae, tmp2 holds
621 // the value most recently fetched from addr. In this case, success
622 // is denoted by tmp2 matching expected.
623
624 if (is_cae) {
625 __ mov(result, tmp2);
626 } else {
627 __ cset(result, Assembler::EQ);
628 }
629 }
630
631 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
632 Register start, Register count, Register scratch) {
633 assert(ShenandoahCardBarrier, "Should have been checked by caller");
634
635 Label L_loop, L_done;
636 const Register end = count;
637
638 // Zero count? Nothing to do.
639 __ cbz(count, L_done);
640
641 // end = start + count << LogBytesPerHeapOop
642 // last element address to make inclusive
643 __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
644 __ sub(end, end, BytesPerHeapOop);
645 __ lsr(start, start, CardTable::card_shift());
646 __ lsr(end, end, CardTable::card_shift());
647
648 // number of bytes to copy
649 __ sub(count, end, start);
650
651 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
652 __ ldr(scratch, curr_ct_holder_addr);
653 __ add(start, start, scratch);
654 __ bind(L_loop);
655 __ strb(zr, Address(start, count));
656 __ subs(count, count, 1);
657 __ br(Assembler::GE, L_loop);
658 __ bind(L_done);
659 }
660
661 #undef __
662
663 #ifdef COMPILER1
664
665 #define __ ce->masm()->
666
667 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
668 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
669 // At this point we know that marking is in progress.
670 // If do_load() is true then we have to emit the
671 // load of the previous value; otherwise it has already
672 // been loaded into _pre_val.
673
674 __ bind(*stub->entry());
675
676 assert(stub->pre_val()->is_register(), "Precondition.");
677
678 Register pre_val_reg = stub->pre_val()->as_register();
679
680 if (stub->do_load()) {
681 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
682 }
683 __ cbz(pre_val_reg, *stub->continuation());
684 ce->store_parameter(stub->pre_val()->as_register(), 0);
685 __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
686 __ b(*stub->continuation());
687 }
688
689 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
690 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
691 __ bind(*stub->entry());
692
693 DecoratorSet decorators = stub->decorators();
694 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
695 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
696 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
697 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
698
699 Register obj = stub->obj()->as_register();
700 Register res = stub->result()->as_register();
701 Register addr = stub->addr()->as_pointer_register();
702 Register tmp1 = stub->tmp1()->as_register();
703 Register tmp2 = stub->tmp2()->as_register();
704
705 assert(res == r0, "result must arrive in r0");
706
707 if (res != obj) {
708 __ mov(res, obj);
709 }
710
711 if (is_strong) {
712 // Check for object in cset.
713 if (AOTCodeCache::is_on_for_dump()) {
714 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
715 __ ldr(tmp2, Address(tmp2));
716 __ lea(tmp1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
717 __ ldrw(tmp1, Address(tmp1));
718 __ lsrv(tmp1, res, tmp1);
719 } else {
720 __ mov(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
721 __ lsr(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
722 }
723 __ ldrb(tmp2, Address(tmp2, tmp1));
724 __ cbz(tmp2, *stub->continuation());
725 }
726
727 ce->store_parameter(res, 0);
728 ce->store_parameter(addr, 1);
729 if (is_strong) {
730 if (is_native) {
731 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
732 } else {
733 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
734 }
735 } else if (is_weak) {
736 __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
737 } else {
738 assert(is_phantom, "only remaining strength");
739 __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
740 }
741
742 __ b(*stub->continuation());
743 }
744
745 #undef __
746
747 #define __ sasm->
748
749 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
750 __ prologue("shenandoah_pre_barrier", false);
751
752 // arg0 : previous value of memory
753
754 BarrierSet* bs = BarrierSet::barrier_set();
755
756 const Register pre_val = r0;
757 const Register thread = rthread;
758 const Register tmp = rscratch1;
759
760 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
761 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
762
763 Label done;
764 Label runtime;
765
766 // Is marking still active?
767 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
768 __ ldrb(tmp, gc_state);
769 __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, done);
770
771 // Can we store original value in the thread's buffer?
772 __ ldr(tmp, queue_index);
773 __ cbz(tmp, runtime);
774
775 __ sub(tmp, tmp, wordSize);
776 __ str(tmp, queue_index);
777 __ ldr(rscratch2, buffer);
778 __ add(tmp, tmp, rscratch2);
779 __ load_parameter(0, rscratch2);
780 __ str(rscratch2, Address(tmp, 0));
781 __ b(done);
782
783 __ bind(runtime);
784 __ push_call_clobbered_registers();
785 __ load_parameter(0, pre_val);
786 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
787 __ pop_call_clobbered_registers();
788 __ bind(done);
789
790 __ epilogue();
791 }
792
793 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
794 __ prologue("shenandoah_load_reference_barrier", false);
795 // arg0 : object to be resolved
796
797 __ push_call_clobbered_registers();
798 __ load_parameter(0, r0);
799 __ load_parameter(1, r1);
800
801 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
802 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
803 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
804 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
805 if (is_strong) {
806 if (is_native) {
807 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
808 } else {
809 if (UseCompressedOops) {
810 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow)));
811 } else {
812 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
813 }
814 }
815 } else if (is_weak) {
816 assert(!is_native, "weak must not be called off-heap");
817 if (UseCompressedOops) {
818 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)));
819 } else {
820 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)));
821 }
822 } else {
823 assert(is_phantom, "only remaining strength");
824 assert(is_native, "phantom must only be called off-heap");
825 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
826 }
827 __ blr(lr);
828 __ mov(rscratch1, r0);
829 __ pop_call_clobbered_registers();
830 __ mov(r0, rscratch1);
831
832 __ epilogue();
833 }
834
835 #undef __
836
837 #endif // COMPILER1
838
839 #ifdef COMPILER2
840 #undef __
841 #define __ masm.
842
843 int ShenandoahBarrierStubC2::available_gp_registers() {
844 return Register::number_of_registers;
845 }
846
847 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
848 return R18_RESERVED_ONLY(r == r18_tls ||)
849 r == rfp || r == sp || r == lr ||
850 r == rheapbase || r == rthread ||
851 r == rscratch1 || r == rscratch2;
852 }
853
854 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
855 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
856
857 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
858 if (_needs_far_jump) {
859 __ ldrb(rscratch1, gc_state_fast);
860 __ cbz(rscratch1, *continuation());
861 __ b(*entry());
862 } else {
863 __ ldrb(rscratch1, gc_state_fast);
864 __ cbnz(rscratch1, *entry());
865 }
866
867 // This is were the slowpath stub will return to or the code above will
868 // jump to if the checks are false
869 __ bind(*continuation());
870 }
871
872 #undef __
873 #define __ masm->
874
875 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
876 Register oldval, Register newval, Register tmp, bool exchange, bool narrow, bool weak, bool acquire) {
877 Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
878
879 // Pre-barrier covers several things:
880 // a. Avoids false positives from CAS encountering to-space memory values.
881 // b. Satisfies the need for LRB for the CAE result.
882 // c. Records old value for the sake of SATB.
883 //
884 // (a) and (b) are covered because load barrier does memory location fixup.
885 // (c) is covered by KA on the current memory value.
886 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
887 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, tmp, addr, narrow, /* do_load: */ true);
888 char check = 0;
889 check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
890 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
891 assert(!ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node), "Not supported for CAS");
892 stub->enter_if_gc_state(*masm, check);
893 }
894
895 // CAS!
896 __ cmpxchg(addr, oldval, newval, op_size, acquire, /* release */ true, weak, exchange ? res : noreg);
897
898 // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
899 if (!exchange) {
900 assert(res != noreg, "need result register");
901 __ cset(res, Assembler::EQ);
902 }
903
904 // Post-barrier deals with card updates.
905 card_barrier_c2(node, masm, Address(addr, 0));
906 }
907
908 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
909 Register newval, Register addr, Register tmp, bool is_acquire) {
910 bool is_narrow = node->bottom_type()->isa_narrowoop();
911
912 // Pre-barrier covers several things:
913 // a. Satisfies the need for LRB for the GAS result.
914 // b. Records old value for the sake of SATB.
915 //
916 // (a) is covered because load barrier does memory location fixup.
917 // (b) is covered by KA on the current memory value.
918 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
919 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, tmp, addr, is_narrow, /* do_load: */ true);
920 char check = 0;
921 check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
922 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
923 assert(!ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node), "Not supported for GAS");
924 stub->enter_if_gc_state(*masm, check);
925 }
926
927 if (is_narrow) {
928 if (is_acquire) {
929 __ atomic_xchgalw(preval, newval, addr);
930 } else {
931 __ atomic_xchgw(preval, newval, addr);
932 }
933 } else {
934 if (is_acquire) {
935 __ atomic_xchgal(preval, newval, addr);
936 } else {
937 __ atomic_xchg(preval, newval, addr);
938 }
939 }
940
941 // Post-barrier deals with card updates.
942 card_barrier_c2(node, masm, Address(addr, 0));
943 }
944
945 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
946 Register src, bool src_narrow, Register tmp, bool is_volatile) {
947
948 // Pre-barrier: SATB, keep-alive the current memory value.
949 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
950 assert(!ShenandoahBarrierStubC2::needs_load_ref_barrier(node), "Should not be required for stores");
951 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, tmp, dst, dst_narrow, /* do_load: */ true);
952 stub->enter_if_gc_state(*masm, ShenandoahHeap::MARKING);
953 }
954
955 // Do the actual store
956 if (dst_narrow) {
957 if (!src_narrow) {
958 // Need to encode into rscratch, because we cannot clobber src.
959 if (ShenandoahBarrierStubC2::maybe_null(node)) {
960 __ encode_heap_oop(rscratch1, src);
961 } else {
962 __ encode_heap_oop_not_null(rscratch1, src);
963 }
964 src = rscratch1;
965 }
966
967 if (is_volatile) {
968 __ stlrw(src, dst.base());
969 } else {
970 __ strw(src, dst);
971 }
972 } else {
973 if (is_volatile) {
974 __ stlr(src, dst.base());
975 } else {
976 __ str(src, dst);
977 }
978 }
979
980 // Post-barrier: card updates.
981 card_barrier_c2(node, masm, dst);
982 }
983
984 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool is_narrow, bool is_acquire) {
985 // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
986 if (is_narrow) {
987 if (is_acquire) {
988 __ ldarw(dst, src.base());
989 } else {
990 __ ldrw(dst, src);
991 }
992 } else {
993 if (is_acquire) {
994 __ ldar(dst, src.base());
995 } else {
996 __ ldr(dst, src);
997 }
998 }
999
1000 // Post-barrier: LRB / KA / weak-root processing.
1001 if (ShenandoahBarrierStubC2::needs_slow_barrier(node)) {
1002 ShenandoahBarrierStubC2* const stub = ShenandoahBarrierStubC2::create(node, dst, src, is_narrow, /* do_load: */ false);
1003 char check = 0;
1004 check |= ShenandoahBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
1005 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
1006 check |= ShenandoahBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
1007 stub->enter_if_gc_state(*masm, check);
1008 }
1009 }
1010
1011 void ShenandoahBarrierSetAssembler::card_barrier_c2(const MachNode* node, MacroAssembler* masm, Address address) {
1012 if (!ShenandoahBarrierStubC2::needs_card_barrier(node)) {
1013 return;
1014 }
1015
1016 assert(CardTable::dirty_card_val() == 0, "must be");
1017 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
1018
1019 // rscratch1 = card table base (holder)
1020 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
1021 __ ldr(rscratch1, curr_ct_holder_addr);
1022
1023 // rscratch2 = effective address
1024 __ lea(rscratch2, address);
1025
1026 // rscratch2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
1027 __ add(rscratch2, rscratch1, rscratch2, Assembler::LSR, CardTable::card_shift());
1028
1029 if (UseCondCardMark) {
1030 Label L_already_dirty;
1031 __ ldrb(rscratch1, Address(rscratch2));
1032 __ cbz(rscratch1, L_already_dirty);
1033 __ strb(zr, Address(rscratch2));
1034 __ bind(L_already_dirty);
1035 } else {
1036 __ strb(zr, Address(rscratch2));
1037 }
1038 }
1039 #undef __
1040 #define __ masm.
1041
1042 void ShenandoahBarrierStubC2::post_init() {
1043 // If we are in scratch emit mode we assume worst case,
1044 // and force the use of trampolines
1045 PhaseOutput* const output = Compile::current()->output();
1046 if (output->in_scratch_emit_size()) {
1047 _needs_far_jump = true;
1048 return;
1049 }
1050
1051 // TODO: how correct is this? factor out this into a method.
1052 const int code_size = output->buffer_sizing_data()->_code +
1053 output->buffer_sizing_data()->_stub +
1054 output->buffer_sizing_data()->_reloc;
1055 _needs_far_jump = code_size >= (int)(1*M);
1056 }
1057
1058 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1059 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1060 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1061
1062 __ bind(*entry());
1063
1064 // If we need to load ourselves, do it here.
1065 if (_do_load) {
1066 if (_narrow) {
1067 __ ldrw(_obj, _addr);
1068 } else {
1069 __ ldr(_obj, _addr);
1070 }
1071 }
1072
1073 // If the object is null, there is no point in applying barriers.
1074 maybe_far_jump_if_zero(masm, _obj, continuation());
1075
1076 // We need to make sure that loads done by callers survive across slow-path calls.
1077 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1078 if (!_do_load || (_needs_keep_alive_barrier && _needs_load_ref_barrier)) {
1079 preserve(_obj);
1080 }
1081
1082 // Go for barriers. Barriers can return straight to continuation, as long
1083 // as another barrier is not needed and we can reach the fastpath.
1084 if (_needs_keep_alive_barrier && _needs_load_ref_barrier) {
1085 keepalive(masm, nullptr);
1086 lrb(masm, continuation());
1087 } else if (_needs_keep_alive_barrier) {
1088 keepalive(masm, continuation());
1089 } else if (_needs_load_ref_barrier) {
1090 lrb(masm, continuation());
1091 } else {
1092 ShouldNotReachHere();
1093 }
1094 }
1095
1096 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_done) {
1097 if (_needs_far_jump) {
1098 Label L_short_jump;
1099 __ cbnz(reg, L_short_jump);
1100 __ b(*L_done);
1101 __ bind(L_short_jump);
1102 } else {
1103 __ cbz(reg, *L_done);
1104 }
1105 }
1106
1107 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1108 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1109 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1110
1111 Label L_through, L_slowpath;
1112
1113 Register tmp1 = rscratch1;
1114 Register tmp2 = rscratch2;
1115 assert_different_registers(tmp1, tmp2, _obj, _addr.base(), _addr.index());
1116
1117 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1118 if (_needs_load_ref_barrier) {
1119 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1120 __ ldrb(tmp1, gc_state_fast);
1121 if (L_done != nullptr) {
1122 maybe_far_jump_if_zero(masm, tmp1, L_done);
1123 } else {
1124 __ cbz(tmp1, L_through);
1125 }
1126 }
1127
1128 // Fast-path: put object into buffer.
1129 // If buffer is already full, go slow.
1130 __ ldr(tmp1, index);
1131 __ cbz(tmp1, L_slowpath);
1132 __ sub(tmp1, tmp1, wordSize);
1133 __ str(tmp1, index);
1134 __ ldr(tmp2, buffer);
1135
1136 // If object is narrow, we need to unpack it before inserting into buffer,
1137 // and pack it back. We can skip the unpack if we know that object is not preserved.
1138 if (_narrow) {
1139 __ decode_heap_oop_not_null(_obj);
1140 }
1141 __ str(_obj, Address(tmp2, tmp1));
1142 if (_narrow && is_preserved(_obj)) {
1143 __ encode_heap_oop_not_null(_obj);
1144 }
1145
1146 // Fast-path exits here.
1147 if (L_done != nullptr) {
1148 __ b(*L_done);
1149 } else {
1150 __ b(L_through);
1151 }
1152
1153 // Slow-path: call runtime to handle.
1154 __ bind(L_slowpath);
1155
1156 // The Load match rule in the .ad file may have legitimized the load address
1157 // using a TEMP register and in that case we need to explicitly preserve them
1158 // here because the RA does not consider TEMP as live-in, of course.
1159 if (_needs_load_ref_barrier) {
1160 preserve(_addr.base());
1161 preserve(_addr.index());
1162 }
1163
1164 {
1165 SaveLiveRegisters slr(&masm, this);
1166
1167 // Go to runtime and handle the rest there.
1168 __ mov(c_rarg0, _obj);
1169 __ mov(lr, keepalive_runtime_entry_addr());
1170 __ blr(lr);
1171 }
1172
1173 if (L_done != nullptr) {
1174 __ b(*L_done);
1175 } else {
1176 __ bind(L_through);
1177 }
1178 }
1179
1180 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm, Label* L_done) {
1181 assert(L_done != nullptr, "Must be set");
1182
1183 Label L_slow;
1184
1185 Register tmp1 = rscratch1;
1186 Register tmp2 = rscratch2;
1187 assert_different_registers(tmp1, tmp2, _obj, _addr.base(), _addr.index());
1188
1189 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1190 if (_needs_keep_alive_barrier) {
1191 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1192 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1193 __ ldrb(tmp1, gc_state_fast);
1194 maybe_far_jump_if_zero(masm, tmp1, L_done);
1195 }
1196
1197 // If weak references are being processed, weak/phantom loads need to go slow,
1198 // regardless of their cset status.
1199 if (_needs_load_ref_weak_barrier) {
1200 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1201 __ ldrb(tmp1, gc_state_fast);
1202 __ cbnz(tmp1, L_slow);
1203 }
1204
1205 // Cset-check. Fall-through to slow if in collection set.
1206 if (_narrow) {
1207 __ decode_heap_oop_not_null(tmp2, _obj);
1208 } else {
1209 tmp2 = _obj;
1210 }
1211 __ mov(tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1212 __ add(tmp1, tmp1, tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1213 __ ldrb(tmp1, Address(tmp1, 0));
1214 maybe_far_jump_if_zero(masm, tmp1, L_done);
1215
1216 // Slow path
1217 __ bind(L_slow);
1218
1219 // Obj is the result, need to temporarily stop preserving it.
1220 bool is_obj_preserved = is_preserved(_obj);
1221 if (is_obj_preserved) {
1222 dont_preserve(_obj);
1223 }
1224 {
1225 SaveLiveRegisters slr(&masm, this);
1226
1227 // Shuffle in the arguments. The end result should be:
1228 // c_rarg0 <-- obj
1229 // c_rarg1 <-- lea(addr)
1230 if (c_rarg0 == _obj) {
1231 __ lea(c_rarg1, _addr);
1232 } else if (c_rarg1 == _obj) {
1233 // Set up arguments in reverse, and then flip them
1234 __ lea(c_rarg0, _addr);
1235 // flip them
1236 __ mov(tmp1, c_rarg0);
1237 __ mov(c_rarg0, c_rarg1);
1238 __ mov(c_rarg1, tmp1);
1239 } else {
1240 assert_different_registers(c_rarg1, _obj);
1241 __ lea(c_rarg1, _addr);
1242 __ mov(c_rarg0, _obj);
1243 }
1244
1245 // Go to runtime and handle the rest there.
1246 __ mov(lr, lrb_runtime_entry_addr());
1247 __ blr(lr);
1248
1249 // Save the result where needed.
1250 if (_obj != r0) {
1251 __ mov(_obj, r0);
1252 }
1253 }
1254 if (is_obj_preserved) {
1255 preserve(_obj);
1256 }
1257
1258 __ b(*L_done);
1259 }
1260
1261 #undef __
1262 #define __ masm->
1263
1264 #endif // COMPILER2