1 /*
2 * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
4 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interp_masm.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45
46 #define __ masm->
47
48 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
49 Register src, Register dst, Register count, RegSet saved_regs) {
50 if (is_oop) {
51 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
52 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
53
54 Label done;
55
56 // Avoid calling runtime if count == 0
57 __ cbz(count, done);
58
59 // Is GC active?
60 assert(!saved_regs.contains(rscratch1), "Sanity: about to clobber rscratch1");
61 assert(!saved_regs.contains(rscratch2), "Sanity: about to clobber rscratch2");
62 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
63 __ ldrb(rscratch1, gc_state);
64 if (ShenandoahSATBBarrier && dest_uninitialized) {
65 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
66 } else {
67 __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
68 __ tst(rscratch1, rscratch2);
69 __ br(Assembler::EQ, done);
70 }
71
72 __ push_call_clobbered_registers();
73 // If arguments are not in proper places, shuffle them.
74 // Doing this via the stack is the most straight-forward way to avoid
75 // accidentally smashing any register.
76 if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
77 __ push(RegSet::of(src), sp);
78 __ push(RegSet::of(dst), sp);
79 __ push(RegSet::of(count), sp);
80 __ pop(RegSet::of(c_rarg2), sp);
81 __ pop(RegSet::of(c_rarg1), sp);
82 __ pop(RegSet::of(c_rarg0), sp);
83 }
84 address target = nullptr;
85 if (UseCompressedOops) {
86 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
87 } else {
88 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
89 }
90 __ call_VM_leaf(target, 3);
91 __ pop_call_clobbered_registers();
92 __ bind(done);
93 }
94 }
95 }
96
97 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
98 Register start, Register count, Register tmp) {
99 if (ShenandoahCardBarrier && is_oop) {
100 gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
101 }
102 }
103
104 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
105 Register obj,
106 Register pre_val,
107 Register thread,
108 Register tmp1,
109 Register tmp2) {
110 assert(ShenandoahSATBBarrier, "Should be checked by caller");
111 assert(thread == rthread, "must be");
112
113 Label done;
114 Label runtime;
115
116 assert_different_registers(obj, pre_val, tmp1, tmp2);
117 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
118
119 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
120 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
121
122 // Is marking active?
123 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
124 __ ldrb(tmp1, gc_state);
125 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, done);
126
127 // Do we need to load the previous value?
128 if (obj != noreg) {
129 if (UseCompressedOops) {
130 __ ldrw(pre_val, Address(obj, 0));
131 __ decode_heap_oop(pre_val);
132 } else {
133 __ ldr(pre_val, Address(obj, 0));
134 }
135 }
136
137 // Is the previous value null?
138 __ cbz(pre_val, done);
139
140 // Can we store original value in the thread's buffer?
141 // Is index == 0?
142 // (The index field is typed as size_t.)
143
144 __ ldr(tmp1, index); // tmp := *index_adr
145 __ cbz(tmp1, runtime); // tmp == 0?
146 // If yes, goto runtime
147
148 __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
149 __ str(tmp1, index); // *index_adr := tmp
150 __ ldr(tmp2, buffer);
151 __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
152
153 // Record the previous value
154 __ str(pre_val, Address(tmp1, 0));
155 __ b(done);
156
157 __ bind(runtime);
158
159 // Slow-path call
160 __ enter(/* strip_ret_addr = */ true);
161 __ push_call_clobbered_registers();
162 if (c_rarg0 != pre_val) {
163 __ mov(c_rarg0, pre_val);
164 }
165 // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
166 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
167 __ pop_call_clobbered_registers();
168 __ leave();
169
170 __ bind(done);
171 }
172
173 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
174 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
175 Label is_null;
176 __ cbz(dst, is_null);
177 resolve_forward_pointer_not_null(masm, dst, tmp);
178 __ bind(is_null);
179 }
180
181 // IMPORTANT: This must preserve all registers, even rscratch1 and rscratch2, except those explicitly
182 // passed in.
183 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
184 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
185 // The below loads the mark word, checks if the lowest two bits are
186 // set, and if so, clear the lowest two bits and copy the result
187 // to dst. Otherwise it leaves dst alone.
188 // Implementing this is surprisingly awkward. I do it here by:
189 // - Inverting the mark word
190 // - Test lowest two bits == 0
191 // - If so, set the lowest two bits
192 // - Invert the result back, and copy to dst
193
194 bool borrow_reg = (tmp == noreg);
195 if (borrow_reg) {
196 // No free registers available. Make one useful.
197 tmp = rscratch1;
198 if (tmp == dst) {
199 tmp = rscratch2;
200 }
201 __ push(RegSet::of(tmp), sp);
202 }
203
204 assert_different_registers(tmp, dst);
205
206 Label done;
207 __ ldr(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
208 __ eon(tmp, tmp, zr);
209 __ ands(zr, tmp, markWord::lock_mask_in_place);
210 __ br(Assembler::NE, done);
211 __ orr(tmp, tmp, markWord::marked_value);
212 __ eon(dst, tmp, zr);
213 __ bind(done);
214
215 if (borrow_reg) {
216 __ pop(RegSet::of(tmp), sp);
217 }
218 }
219
220 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) {
221 assert(ShenandoahLoadRefBarrier, "Should be enabled");
222 assert(dst != rscratch2, "need rscratch2");
223 assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
224
225 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
226 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
227 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
228 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
229 bool is_narrow = UseCompressedOops && !is_native;
230
231 Label heap_stable, not_cset;
232 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
233 __ ldrb(rscratch2, gc_state);
234
235 // Check for heap stability
236 if (is_strong) {
237 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
238 } else {
239 Label lrb;
240 __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, lrb);
241 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
242 __ bind(lrb);
243 }
244
245 // use r1 for load address
246 Register result_dst = dst;
247 if (dst == r1) {
248 __ mov(rscratch1, dst);
249 dst = rscratch1;
250 }
251
252 // Save r0 and r1, unless it is an output register
253 RegSet to_save = RegSet::of(r0, r1) - result_dst;
254 __ push(to_save, sp);
255 __ lea(r1, load_addr);
256 __ mov(r0, dst);
257
258 // Test for in-cset
259 if (is_strong) {
260 if (AOTCodeCache::is_on_for_dump()) {
261 __ lea(rscratch2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
262 __ ldr(rscratch2, Address(rscratch2));
263 __ lea(rscratch1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
264 __ ldrw(rscratch1, Address(rscratch1));
265 __ lsrv(rscratch1, r0, rscratch1);
266 } else {
267 __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
268 __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
269 }
270 __ ldrb(rscratch2, Address(rscratch2, rscratch1));
271 __ tbz(rscratch2, 0, not_cset);
272 }
273
274 // Slow-path call
275 __ enter(/* strip_ret_addr = */ true);
276 __ push_call_clobbered_registers();
277 address target = nullptr;
278 if (is_strong) {
279 if (is_narrow) {
280 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
281 } else {
282 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
283 }
284 } else if (is_weak) {
285 if (is_narrow) {
286 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
287 } else {
288 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
289 }
290 } else {
291 assert(is_phantom, "only remaining strength");
292 assert(!is_narrow, "phantom access cannot be narrow");
293 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
294 }
295 // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
296 __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
297 __ mov(rscratch1, r0);
298 __ pop_call_clobbered_registers();
299 __ mov(r0, rscratch1);
300 __ leave();
301
302 __ bind(not_cset);
303
304 __ mov(result_dst, r0);
305 __ pop(to_save, sp);
306
307 __ bind(heap_stable);
308 }
309
310 //
311 // Arguments:
312 //
313 // Inputs:
314 // src: oop location to load from, might be clobbered
315 //
316 // Output:
317 // dst: oop loaded from src location
318 //
319 // Kill:
320 // rscratch1 (scratch reg)
321 //
322 // Alias:
323 // dst: rscratch1 (might use rscratch1 as temporary output register to avoid clobbering src)
324 //
325 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
326 Register dst, Address src, Register tmp1, Register tmp2) {
327 // 1: non-reference load, no additional barrier is needed
328 if (!is_reference_type(type)) {
329 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
330 return;
331 }
332
333 // 2: load a reference from src location and apply LRB if needed
334 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
335 Register result_dst = dst;
336
337 // Preserve src location for LRB
338 if (dst == src.base() || dst == src.index()) {
339 dst = rscratch1;
340 }
341 assert_different_registers(dst, src.base(), src.index());
342
343 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
344
345 load_reference_barrier(masm, dst, src, decorators);
346
347 if (dst != result_dst) {
348 __ mov(result_dst, dst);
349 dst = result_dst;
350 }
351 } else {
352 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
353 }
354
355 // 3: apply keep-alive barrier if needed
356 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
357 satb_barrier(masm /* masm */,
358 noreg /* obj */,
359 dst /* pre_val */,
360 rthread /* thread */,
361 tmp1 /* tmp1 */,
362 tmp2 /* tmp2 */);
363 }
364 }
365
366 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
367 assert(ShenandoahCardBarrier, "Should have been checked by caller");
368
369 __ lsr(obj, obj, CardTable::card_shift());
370
371 assert(CardTable::dirty_card_val() == 0, "must be");
372
373 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
374 __ ldr(rscratch1, curr_ct_holder_addr);
375
376 if (UseCondCardMark) {
377 Label L_already_dirty;
378 __ ldrb(rscratch2, Address(obj, rscratch1));
379 __ cbz(rscratch2, L_already_dirty);
380 __ strb(zr, Address(obj, rscratch1));
381 __ bind(L_already_dirty);
382 } else {
383 __ strb(zr, Address(obj, rscratch1));
384 }
385 }
386
387 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
388 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
389 // 1: non-reference types require no barriers
390 if (!is_reference_type(type)) {
391 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
392 return;
393 }
394
395 // Flatten object address right away for simplicity: likely needed by barriers
396 if (dst.index() == noreg && dst.offset() == 0) {
397 if (dst.base() != tmp3) {
398 __ mov(tmp3, dst.base());
399 }
400 } else {
401 __ lea(tmp3, dst);
402 }
403
404 // 2: pre-barrier: SATB needs the previous value
405 if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
406 satb_barrier(masm,
407 tmp3 /* obj */,
408 tmp2 /* pre_val */,
409 rthread /* thread */,
410 tmp1 /* tmp */,
411 rscratch1 /* tmp2 */);
412 }
413
414 // Store!
415 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
416
417 // 3: post-barrier: card barrier needs store address
418 bool storing_non_null = (val != noreg);
419 if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
420 card_barrier(masm, tmp3);
421 }
422 }
423
424 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
425 Register obj, Register tmp, Label& slowpath) {
426 Label done;
427 // Resolve jobject
428 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
429
430 // Check for null.
431 __ cbz(obj, done);
432
433 assert(obj != rscratch2, "need rscratch2");
434 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
435 __ lea(rscratch2, gc_state);
436 __ ldrb(rscratch2, Address(rscratch2));
437
438 // Check for heap in evacuation phase
439 __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, slowpath);
440
441 __ bind(done);
442 }
443
444 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj,
445 Register tmp, Label& slow_path) {
446 assert_different_registers(weak_handle, tmp, noreg);
447 assert_different_registers(obj, tmp, noreg);
448
449 Label done;
450
451 // Peek weak handle using the standard implementation.
452 BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, tmp, slow_path);
453
454 // Check if the reference is null, and if it is, take the fast path.
455 __ cbz(obj, done);
456
457 Address gc_state(rthread, ShenandoahThreadLocalData::gc_state_offset());
458 __ lea(tmp, gc_state);
459 __ ldrb(tmp, __ legitimize_address(gc_state, 1, tmp));
460
461 // Check if the heap is under weak-reference/roots processing, in
462 // which case we need to take the slow path.
463 __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, slow_path);
464 __ bind(done);
465 }
466
467 // Special Shenandoah CAS implementation that handles false negatives due
468 // to concurrent evacuation. The service is more complex than a
469 // traditional CAS operation because the CAS operation is intended to
470 // succeed if the reference at addr exactly matches expected or if the
471 // reference at addr holds a pointer to a from-space object that has
472 // been relocated to the location named by expected. There are two
473 // races that must be addressed:
474 // a) A parallel thread may mutate the contents of addr so that it points
475 // to a different object. In this case, the CAS operation should fail.
476 // b) A parallel thread may heal the contents of addr, replacing a
477 // from-space pointer held in addr with the to-space pointer
478 // representing the new location of the object.
479 // Upon entry to cmpxchg_oop, it is assured that new_val equals null
480 // or it refers to an object that is not being evacuated out of
481 // from-space, or it refers to the to-space version of an object that
482 // is being evacuated out of from-space.
483 //
484 // By default the value held in the result register following execution
485 // of the generated code sequence is 0 to indicate failure of CAS,
486 // non-zero to indicate success. If is_cae, the result is the value most
487 // recently fetched from addr rather than a boolean success indicator.
488 //
489 // Clobbers rscratch1, rscratch2
490 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
491 Register addr,
492 Register expected,
493 Register new_val,
494 bool acquire, bool release,
495 bool is_cae,
496 Register result) {
497 Register tmp1 = rscratch1;
498 Register tmp2 = rscratch2;
499 bool is_narrow = UseCompressedOops;
500 Assembler::operand_size size = is_narrow ? Assembler::word : Assembler::xword;
501
502 assert_different_registers(addr, expected, tmp1, tmp2);
503 assert_different_registers(addr, new_val, tmp1, tmp2);
504
505 Label step4, done;
506
507 // There are two ways to reach this label. Initial entry into the
508 // cmpxchg_oop code expansion starts at step1 (which is equivalent
509 // to label step4). Additionally, in the rare case that four steps
510 // are required to perform the requested operation, the fourth step
511 // is the same as the first. On a second pass through step 1,
512 // control may flow through step 2 on its way to failure. It will
513 // not flow from step 2 to step 3 since we are assured that the
514 // memory at addr no longer holds a from-space pointer.
515 //
516 // The comments that immediately follow the step4 label apply only
517 // to the case in which control reaches this label by branch from
518 // step 3.
519
520 __ bind (step4);
521
522 // Step 4. CAS has failed because the value most recently fetched
523 // from addr is no longer the from-space pointer held in tmp2. If a
524 // different thread replaced the in-memory value with its equivalent
525 // to-space pointer, then CAS may still be able to succeed. The
526 // value held in the expected register has not changed.
527 //
528 // It is extremely rare we reach this point. For this reason, the
529 // implementation opts for smaller rather than potentially faster
530 // code. Ultimately, smaller code for this rare case most likely
531 // delivers higher overall throughput by enabling improved icache
532 // performance.
533
534 // Step 1. Fast-path.
535 //
536 // Try to CAS with given arguments. If successful, then we are done.
537 //
538 // No label required for step 1.
539
540 __ cmpxchg(addr, expected, new_val, size, acquire, release, false, tmp2);
541 // EQ flag set iff success. tmp2 holds value fetched.
542
543 // If expected equals null but tmp2 does not equal null, the
544 // following branches to done to report failure of CAS. If both
545 // expected and tmp2 equal null, the following branches to done to
546 // report success of CAS. There's no need for a special test of
547 // expected equal to null.
548
549 __ br(Assembler::EQ, done);
550 // if CAS failed, fall through to step 2
551
552 // Step 2. CAS has failed because the value held at addr does not
553 // match expected. This may be a false negative because the value fetched
554 // from addr (now held in tmp2) may be a from-space pointer to the
555 // original copy of same object referenced by to-space pointer expected.
556 //
557 // To resolve this, it suffices to find the forward pointer associated
558 // with fetched value. If this matches expected, retry CAS with new
559 // parameters. If this mismatches, then we have a legitimate
560 // failure, and we're done.
561 //
562 // No need for step2 label.
563
564 // overwrite tmp1 with from-space pointer fetched from memory
565 __ mov(tmp1, tmp2);
566
567 if (is_narrow) {
568 // Decode tmp1 in order to resolve its forward pointer
569 __ decode_heap_oop(tmp1, tmp1);
570 }
571 resolve_forward_pointer(masm, tmp1);
572 // Encode tmp1 to compare against expected.
573 __ encode_heap_oop(tmp1, tmp1);
574
575 // Does forwarded value of fetched from-space pointer match original
576 // value of expected? If tmp1 holds null, this comparison will fail
577 // because we know from step1 that expected is not null. There is
578 // no need for a separate test for tmp1 (the value originally held
579 // in memory) equal to null.
580 __ cmp(tmp1, expected);
581
582 // If not, then the failure was legitimate and we're done.
583 // Branching to done with NE condition denotes failure.
584 __ br(Assembler::NE, done);
585
586 // Fall through to step 3. No need for step3 label.
587
588 // Step 3. We've confirmed that the value originally held in memory
589 // (now held in tmp2) pointed to from-space version of original
590 // expected value. Try the CAS again with the from-space expected
591 // value. If it now succeeds, we're good.
592 //
593 // Note: tmp2 holds encoded from-space pointer that matches to-space
594 // object residing at expected. tmp2 is the new "expected".
595
596 // Note that macro implementation of __cmpxchg cannot use same register
597 // tmp2 for result and expected since it overwrites result before it
598 // compares result with expected.
599 __ cmpxchg(addr, tmp2, new_val, size, acquire, release, false, noreg);
600 // EQ flag set iff success. tmp2 holds value fetched, tmp1 (rscratch1) clobbered.
601
602 // If fetched value did not equal the new expected, this could
603 // still be a false negative because some other thread may have
604 // newly overwritten the memory value with its to-space equivalent.
605 __ br(Assembler::NE, step4);
606
607 if (is_cae) {
608 // We're falling through to done to indicate success. Success
609 // with is_cae is denoted by returning the value of expected as
610 // result.
611 __ mov(tmp2, expected);
612 }
613
614 __ bind(done);
615 // At entry to done, the Z (EQ) flag is on iff if the CAS
616 // operation was successful. Additionally, if is_cae, tmp2 holds
617 // the value most recently fetched from addr. In this case, success
618 // is denoted by tmp2 matching expected.
619
620 if (is_cae) {
621 __ mov(result, tmp2);
622 } else {
623 __ cset(result, Assembler::EQ);
624 }
625 }
626
627 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
628 Register start, Register count, Register scratch) {
629 assert(ShenandoahCardBarrier, "Should have been checked by caller");
630
631 Label L_loop, L_done;
632 const Register end = count;
633
634 // Zero count? Nothing to do.
635 __ cbz(count, L_done);
636
637 // end = start + count << LogBytesPerHeapOop
638 // last element address to make inclusive
639 __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
640 __ sub(end, end, BytesPerHeapOop);
641 __ lsr(start, start, CardTable::card_shift());
642 __ lsr(end, end, CardTable::card_shift());
643
644 // number of bytes to copy
645 __ sub(count, end, start);
646
647 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
648 __ ldr(scratch, curr_ct_holder_addr);
649 __ add(start, start, scratch);
650 __ bind(L_loop);
651 __ strb(zr, Address(start, count));
652 __ subs(count, count, 1);
653 __ br(Assembler::GE, L_loop);
654 __ bind(L_done);
655 }
656
657 #undef __
658
659 #ifdef COMPILER1
660
661 #define __ ce->masm()->
662
663 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
664 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
665 // At this point we know that marking is in progress.
666 // If do_load() is true then we have to emit the
667 // load of the previous value; otherwise it has already
668 // been loaded into _pre_val.
669
670 __ bind(*stub->entry());
671
672 assert(stub->pre_val()->is_register(), "Precondition.");
673
674 Register pre_val_reg = stub->pre_val()->as_register();
675
676 if (stub->do_load()) {
677 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
678 }
679 __ cbz(pre_val_reg, *stub->continuation());
680 ce->store_parameter(stub->pre_val()->as_register(), 0);
681 __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
682 __ b(*stub->continuation());
683 }
684
685 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
686 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
687 __ bind(*stub->entry());
688
689 DecoratorSet decorators = stub->decorators();
690 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
691 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
692 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
693 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
694
695 Register obj = stub->obj()->as_register();
696 Register res = stub->result()->as_register();
697 Register addr = stub->addr()->as_pointer_register();
698 Register tmp1 = stub->tmp1()->as_register();
699 Register tmp2 = stub->tmp2()->as_register();
700
701 assert(res == r0, "result must arrive in r0");
702
703 if (res != obj) {
704 __ mov(res, obj);
705 }
706
707 if (is_strong) {
708 // Check for object in cset.
709 if (AOTCodeCache::is_on_for_dump()) {
710 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
711 __ ldr(tmp2, Address(tmp2));
712 __ lea(tmp1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
713 __ ldrw(tmp1, Address(tmp1));
714 __ lsrv(tmp1, res, tmp1);
715 } else {
716 __ mov(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
717 __ lsr(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
718 }
719 __ ldrb(tmp2, Address(tmp2, tmp1));
720 __ cbz(tmp2, *stub->continuation());
721 }
722
723 ce->store_parameter(res, 0);
724 ce->store_parameter(addr, 1);
725 if (is_strong) {
726 if (is_native) {
727 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
728 } else {
729 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
730 }
731 } else if (is_weak) {
732 __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
733 } else {
734 assert(is_phantom, "only remaining strength");
735 __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
736 }
737
738 __ b(*stub->continuation());
739 }
740
741 #undef __
742
743 #define __ sasm->
744
745 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
746 __ prologue("shenandoah_pre_barrier", false);
747
748 // arg0 : previous value of memory
749
750 BarrierSet* bs = BarrierSet::barrier_set();
751
752 const Register pre_val = r0;
753 const Register thread = rthread;
754 const Register tmp = rscratch1;
755
756 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
757 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
758
759 Label done;
760 Label runtime;
761
762 // Is marking still active?
763 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
764 __ ldrb(tmp, gc_state);
765 __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, done);
766
767 // Can we store original value in the thread's buffer?
768 __ ldr(tmp, queue_index);
769 __ cbz(tmp, runtime);
770
771 __ sub(tmp, tmp, wordSize);
772 __ str(tmp, queue_index);
773 __ ldr(rscratch2, buffer);
774 __ add(tmp, tmp, rscratch2);
775 __ load_parameter(0, rscratch2);
776 __ str(rscratch2, Address(tmp, 0));
777 __ b(done);
778
779 __ bind(runtime);
780 __ push_call_clobbered_registers();
781 __ load_parameter(0, pre_val);
782 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
783 __ pop_call_clobbered_registers();
784 __ bind(done);
785
786 __ epilogue();
787 }
788
789 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
790 __ prologue("shenandoah_load_reference_barrier", false);
791 // arg0 : object to be resolved
792
793 __ push_call_clobbered_registers();
794 __ load_parameter(0, r0);
795 __ load_parameter(1, r1);
796
797 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
798 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
799 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
800 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
801 if (is_strong) {
802 if (is_native) {
803 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
804 } else {
805 if (UseCompressedOops) {
806 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow)));
807 } else {
808 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
809 }
810 }
811 } else if (is_weak) {
812 assert(!is_native, "weak must not be called off-heap");
813 if (UseCompressedOops) {
814 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)));
815 } else {
816 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)));
817 }
818 } else {
819 assert(is_phantom, "only remaining strength");
820 assert(is_native, "phantom must only be called off-heap");
821 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
822 }
823 __ blr(lr);
824 __ mov(rscratch1, r0);
825 __ pop_call_clobbered_registers();
826 __ mov(r0, rscratch1);
827
828 __ epilogue();
829 }
830
831 #undef __
832
833 #endif // COMPILER1