1 /*
2 * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
4 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
33 #include "gc/shenandoah/shenandoahRuntime.hpp"
34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
35 #include "interpreter/interp_masm.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #ifdef COMPILER1
40 #include "c1/c1_LIRAssembler.hpp"
41 #include "c1/c1_MacroAssembler.hpp"
42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
43 #endif
44 #ifdef COMPILER2
45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
46 #include "opto/output.hpp"
47 #endif
48
49 #define __ masm->
50
51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
52 Register src, Register dst, Register count, RegSet saved_regs) {
53 if (is_oop) {
54 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
55 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
56
57 Label done;
58
59 // Avoid calling runtime if count == 0
60 __ cbz(count, done);
61
62 // Is GC active?
63 assert(!saved_regs.contains(rscratch1), "Sanity: about to clobber rscratch1");
64 assert(!saved_regs.contains(rscratch2), "Sanity: about to clobber rscratch2");
65 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
66 __ ldrb(rscratch1, gc_state);
67 if (ShenandoahSATBBarrier && dest_uninitialized) {
68 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
69 } else {
70 __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
71 __ tst(rscratch1, rscratch2);
72 __ br(Assembler::EQ, done);
73 }
74
75 __ push_call_clobbered_registers();
76 // If arguments are not in proper places, shuffle them.
77 // Doing this via the stack is the most straight-forward way to avoid
78 // accidentally smashing any register.
79 if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
80 __ push(RegSet::of(src), sp);
81 __ push(RegSet::of(dst), sp);
82 __ push(RegSet::of(count), sp);
83 __ pop(RegSet::of(c_rarg2), sp);
84 __ pop(RegSet::of(c_rarg1), sp);
85 __ pop(RegSet::of(c_rarg0), sp);
86 }
87 address target = nullptr;
88 if (UseCompressedOops) {
89 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
90 } else {
91 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
92 }
93 __ call_VM_leaf(target, 3);
94 __ pop_call_clobbered_registers();
95 __ bind(done);
96 }
97 }
98 }
99
100 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
101 Register start, Register count, Register tmp) {
102 if (ShenandoahCardBarrier && is_oop) {
103 gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
104 }
105 }
106
107 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
108 Register obj,
109 Register pre_val,
110 Register thread,
111 Register tmp1,
112 Register tmp2) {
113 assert(ShenandoahSATBBarrier, "Should be checked by caller");
114 assert(thread == rthread, "must be");
115
116 Label done;
117 Label runtime;
118
119 assert_different_registers(obj, pre_val, tmp1, tmp2);
120 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
121
122 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
123 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
124
125 // Is marking active?
126 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
127 __ ldrb(tmp1, gc_state);
128 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, done);
129
130 // Do we need to load the previous value?
131 if (obj != noreg) {
132 if (UseCompressedOops) {
133 __ ldrw(pre_val, Address(obj, 0));
134 __ decode_heap_oop(pre_val);
135 } else {
136 __ ldr(pre_val, Address(obj, 0));
137 }
138 }
139
140 // Is the previous value null?
141 __ cbz(pre_val, done);
142
143 // Can we store original value in the thread's buffer?
144 // Is index == 0?
145 // (The index field is typed as size_t.)
146
147 __ ldr(tmp1, index); // tmp := *index_adr
148 __ cbz(tmp1, runtime); // tmp == 0?
149 // If yes, goto runtime
150
151 __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
152 __ str(tmp1, index); // *index_adr := tmp
153 __ ldr(tmp2, buffer);
154 __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
155
156 // Record the previous value
157 __ str(pre_val, Address(tmp1, 0));
158 __ b(done);
159
160 __ bind(runtime);
161
162 // Slow-path call
163 __ enter(/* strip_ret_addr = */ true);
164 __ push_call_clobbered_registers();
165 if (c_rarg0 != pre_val) {
166 __ mov(c_rarg0, pre_val);
167 }
168 // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
169 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
170 __ pop_call_clobbered_registers();
171 __ leave();
172
173 __ bind(done);
174 }
175
176 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) {
177 assert(ShenandoahLoadRefBarrier, "Should be enabled");
178 assert(dst != rscratch2, "need rscratch2");
179 assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
180
181 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
182 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
183 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
184 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
185 bool is_narrow = UseCompressedOops && !is_native;
186
187 Label heap_stable, not_cset;
188 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
189 __ ldrb(rscratch2, gc_state);
190
191 // Check for heap stability
192 if (is_strong) {
193 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
194 } else {
195 Label lrb;
196 __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, lrb);
197 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
198 __ bind(lrb);
199 }
200
201 // use r1 for load address
202 Register result_dst = dst;
203 if (dst == r1) {
204 __ mov(rscratch1, dst);
205 dst = rscratch1;
206 }
207
208 // Save r0 and r1, unless it is an output register
209 RegSet to_save = RegSet::of(r0, r1) - result_dst;
210 __ push(to_save, sp);
211 __ lea(r1, load_addr);
212 __ mov(r0, dst);
213
214 // Test for in-cset
215 if (is_strong) {
216 if (AOTCodeCache::is_on_for_dump()) {
217 __ lea(rscratch2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
218 __ ldr(rscratch2, Address(rscratch2));
219 __ lea(rscratch1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
220 __ ldrw(rscratch1, Address(rscratch1));
221 __ lsrv(rscratch1, r0, rscratch1);
222 } else {
223 __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
224 __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
225 }
226 __ ldrb(rscratch2, Address(rscratch2, rscratch1));
227 __ tbz(rscratch2, 0, not_cset);
228 }
229
230 // Slow-path call
231 __ enter(/* strip_ret_addr = */ true);
232 __ push_call_clobbered_registers();
233 address target = nullptr;
234 if (is_strong) {
235 if (is_narrow) {
236 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
237 } else {
238 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
239 }
240 } else if (is_weak) {
241 if (is_narrow) {
242 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
243 } else {
244 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
245 }
246 } else {
247 assert(is_phantom, "only remaining strength");
248 assert(!is_narrow, "phantom access cannot be narrow");
249 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
250 }
251 // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
252 __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
253 __ mov(rscratch1, r0);
254 __ pop_call_clobbered_registers();
255 __ mov(r0, rscratch1);
256 __ leave();
257
258 __ bind(not_cset);
259
260 __ mov(result_dst, r0);
261 __ pop(to_save, sp);
262
263 __ bind(heap_stable);
264 }
265
266 //
267 // Arguments:
268 //
269 // Inputs:
270 // src: oop location to load from, might be clobbered
271 //
272 // Output:
273 // dst: oop loaded from src location
274 //
275 // Kill:
276 // rscratch1 (scratch reg)
277 //
278 // Alias:
279 // dst: rscratch1 (might use rscratch1 as temporary output register to avoid clobbering src)
280 //
281 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
282 Register dst, Address src, Register tmp1, Register tmp2) {
283 // 1: non-reference load, no additional barrier is needed
284 if (!is_reference_type(type)) {
285 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
286 return;
287 }
288
289 // 2: load a reference from src location and apply LRB if needed
290 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
291 Register result_dst = dst;
292
293 // Preserve src location for LRB
294 if (dst == src.base() || dst == src.index()) {
295 dst = rscratch1;
296 }
297 assert_different_registers(dst, src.base(), src.index());
298
299 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
300
301 load_reference_barrier(masm, dst, src, decorators);
302
303 if (dst != result_dst) {
304 __ mov(result_dst, dst);
305 dst = result_dst;
306 }
307 } else {
308 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
309 }
310
311 // 3: apply keep-alive barrier if needed
312 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
313 satb_barrier(masm /* masm */,
314 noreg /* obj */,
315 dst /* pre_val */,
316 rthread /* thread */,
317 tmp1 /* tmp1 */,
318 tmp2 /* tmp2 */);
319 }
320 }
321
322 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
323 assert(ShenandoahCardBarrier, "Should have been checked by caller");
324
325 __ lsr(obj, obj, CardTable::card_shift());
326
327 assert(CardTable::dirty_card_val() == 0, "must be");
328
329 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
330 __ ldr(rscratch1, curr_ct_holder_addr);
331
332 if (UseCondCardMark) {
333 Label L_already_dirty;
334 __ ldrb(rscratch2, Address(obj, rscratch1));
335 __ cbz(rscratch2, L_already_dirty);
336 __ strb(zr, Address(obj, rscratch1));
337 __ bind(L_already_dirty);
338 } else {
339 __ strb(zr, Address(obj, rscratch1));
340 }
341 }
342
343 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
344 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
345 // 1: non-reference types require no barriers
346 if (!is_reference_type(type)) {
347 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
348 return;
349 }
350
351 // Flatten object address right away for simplicity: likely needed by barriers
352 if (dst.index() == noreg && dst.offset() == 0) {
353 if (dst.base() != tmp3) {
354 __ mov(tmp3, dst.base());
355 }
356 } else {
357 __ lea(tmp3, dst);
358 }
359
360 // 2: pre-barrier: SATB needs the previous value
361 if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
362 satb_barrier(masm,
363 tmp3 /* obj */,
364 tmp2 /* pre_val */,
365 rthread /* thread */,
366 tmp1 /* tmp */,
367 rscratch1 /* tmp2 */);
368 }
369
370 // Store!
371 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
372
373 // 3: post-barrier: card barrier needs store address
374 bool storing_non_null = (val != noreg);
375 if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
376 card_barrier(masm, tmp3);
377 }
378 }
379
380 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
381 Register obj, Register tmp, Label& slowpath) {
382 Label done;
383 // Resolve jobject
384 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
385
386 // Check for null.
387 __ cbz(obj, done);
388
389 assert(obj != rscratch2, "need rscratch2");
390 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
391 __ lea(rscratch2, gc_state);
392 __ ldrb(rscratch2, Address(rscratch2));
393
394 // Check for heap in evacuation phase
395 __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, slowpath);
396
397 __ bind(done);
398 }
399
400 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj,
401 Register tmp, Label& slow_path) {
402 assert_different_registers(weak_handle, tmp, noreg);
403 assert_different_registers(obj, tmp, noreg);
404
405 Label done;
406
407 // Peek weak handle using the standard implementation.
408 BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, tmp, slow_path);
409
410 // Check if the reference is null, and if it is, take the fast path.
411 __ cbz(obj, done);
412
413 Address gc_state(rthread, ShenandoahThreadLocalData::gc_state_offset());
414 __ lea(tmp, gc_state);
415 __ ldrb(tmp, __ legitimize_address(gc_state, 1, tmp));
416
417 // Check if the heap is under weak-reference/roots processing, in
418 // which case we need to take the slow path.
419 __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, slow_path);
420 __ bind(done);
421 }
422
423 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
424 Register start, Register count, Register scratch) {
425 assert(ShenandoahCardBarrier, "Should have been checked by caller");
426
427 Label L_loop, L_done;
428 const Register end = count;
429
430 // Zero count? Nothing to do.
431 __ cbz(count, L_done);
432
433 // end = start + count << LogBytesPerHeapOop
434 // last element address to make inclusive
435 __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
436 __ sub(end, end, BytesPerHeapOop);
437 __ lsr(start, start, CardTable::card_shift());
438 __ lsr(end, end, CardTable::card_shift());
439
440 // number of bytes to copy
441 __ sub(count, end, start);
442
443 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
444 __ ldr(scratch, curr_ct_holder_addr);
445 __ add(start, start, scratch);
446 __ bind(L_loop);
447 __ strb(zr, Address(start, count));
448 __ subs(count, count, 1);
449 __ br(Assembler::GE, L_loop);
450 __ bind(L_done);
451 }
452
453 #undef __
454
455 #ifdef COMPILER1
456
457 #define __ ce->masm()->
458
459 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
460 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
461 // At this point we know that marking is in progress.
462 // If do_load() is true then we have to emit the
463 // load of the previous value; otherwise it has already
464 // been loaded into _pre_val.
465
466 __ bind(*stub->entry());
467
468 assert(stub->pre_val()->is_register(), "Precondition.");
469
470 Register pre_val_reg = stub->pre_val()->as_register();
471
472 if (stub->do_load()) {
473 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
474 }
475 __ cbz(pre_val_reg, *stub->continuation());
476 ce->store_parameter(stub->pre_val()->as_register(), 0);
477 __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
478 __ b(*stub->continuation());
479 }
480
481 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
482 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
483 __ bind(*stub->entry());
484
485 DecoratorSet decorators = stub->decorators();
486 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
487 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
488 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
489 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
490
491 Register obj = stub->obj()->as_register();
492 Register res = stub->result()->as_register();
493 Register addr = stub->addr()->as_pointer_register();
494 Register tmp1 = stub->tmp1()->as_register();
495 Register tmp2 = stub->tmp2()->as_register();
496
497 assert(res == r0, "result must arrive in r0");
498
499 if (res != obj) {
500 __ mov(res, obj);
501 }
502
503 if (is_strong) {
504 // Check for object in cset.
505 if (AOTCodeCache::is_on_for_dump()) {
506 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
507 __ ldr(tmp2, Address(tmp2));
508 __ lea(tmp1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
509 __ ldrw(tmp1, Address(tmp1));
510 __ lsrv(tmp1, res, tmp1);
511 } else {
512 __ mov(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
513 __ lsr(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
514 }
515 __ ldrb(tmp2, Address(tmp2, tmp1));
516 __ cbz(tmp2, *stub->continuation());
517 }
518
519 ce->store_parameter(res, 0);
520 ce->store_parameter(addr, 1);
521 if (is_strong) {
522 if (is_native) {
523 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
524 } else {
525 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
526 }
527 } else if (is_weak) {
528 __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
529 } else {
530 assert(is_phantom, "only remaining strength");
531 __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
532 }
533
534 __ b(*stub->continuation());
535 }
536
537 #undef __
538
539 #define __ sasm->
540
541 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
542 __ prologue("shenandoah_pre_barrier", false);
543
544 // arg0 : previous value of memory
545
546 BarrierSet* bs = BarrierSet::barrier_set();
547
548 const Register pre_val = r0;
549 const Register thread = rthread;
550 const Register tmp = rscratch1;
551
552 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
553 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
554
555 Label done;
556 Label runtime;
557
558 // Is marking still active?
559 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
560 __ ldrb(tmp, gc_state);
561 __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, done);
562
563 // Can we store original value in the thread's buffer?
564 __ ldr(tmp, queue_index);
565 __ cbz(tmp, runtime);
566
567 __ sub(tmp, tmp, wordSize);
568 __ str(tmp, queue_index);
569 __ ldr(rscratch2, buffer);
570 __ add(tmp, tmp, rscratch2);
571 __ load_parameter(0, rscratch2);
572 __ str(rscratch2, Address(tmp, 0));
573 __ b(done);
574
575 __ bind(runtime);
576 __ push_call_clobbered_registers();
577 __ load_parameter(0, pre_val);
578 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
579 __ pop_call_clobbered_registers();
580 __ bind(done);
581
582 __ epilogue();
583 }
584
585 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
586 __ prologue("shenandoah_load_reference_barrier", false);
587 // arg0 : object to be resolved
588
589 __ push_call_clobbered_registers();
590 __ load_parameter(0, r0);
591 __ load_parameter(1, r1);
592
593 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
594 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
595 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
596 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
597 if (is_strong) {
598 if (is_native) {
599 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
600 } else {
601 if (UseCompressedOops) {
602 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow)));
603 } else {
604 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
605 }
606 }
607 } else if (is_weak) {
608 assert(!is_native, "weak must not be called off-heap");
609 if (UseCompressedOops) {
610 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)));
611 } else {
612 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)));
613 }
614 } else {
615 assert(is_phantom, "only remaining strength");
616 assert(is_native, "phantom must only be called off-heap");
617 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
618 }
619 __ blr(lr);
620 __ mov(rscratch1, r0);
621 __ pop_call_clobbered_registers();
622 __ mov(r0, rscratch1);
623
624 __ epilogue();
625 }
626
627 #undef __
628
629 #endif // COMPILER1
630
631 #ifdef COMPILER2
632
633 #undef __
634 #define __ masm->
635
636
637 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, Register tmp1, Register tmp2, bool is_narrow, bool is_acquire) {
638 // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
639 if (is_narrow) {
640 if (is_acquire) {
641 assert(src.getMode() == Address::base_plus_offset && src.offset() == 0,
642 "is_acquire path requires address to be base-only");
643 __ ldarw(dst, src.base());
644 } else {
645 __ ldrw(dst, src);
646 }
647 } else {
648 if (is_acquire) {
649 assert(src.getMode() == Address::base_plus_offset && src.offset() == 0,
650 "is_acquire path requires address to be base-only");
651 __ ldar(dst, src.base());
652 } else {
653 __ ldr(dst, src);
654 }
655 }
656
657 ShenandoahBarrierStubC2::load_post(masm, node, dst, src, tmp1, tmp2, is_narrow);
658 }
659
660 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
661 Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3, bool is_volatile) {
662
663 ShenandoahBarrierStubC2::store_pre(masm, node, dst, tmp1, tmp2, tmp3, dst_narrow);
664
665 // Do the actual store
666 if (dst_narrow) {
667 if (!src_narrow) {
668 // Need to encode into rscratch, because we cannot clobber src.
669 if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
670 __ encode_heap_oop(tmp2, src);
671 } else {
672 __ encode_heap_oop_not_null(tmp2, src);
673 }
674 src = tmp2;
675 }
676
677 if (is_volatile) {
678 assert(dst.getMode() == Address::base_plus_offset && dst.offset() == 0,
679 "is_acquire path requires address to be base-only");
680 __ stlrw(src, dst.base());
681 } else {
682 __ strw(src, dst);
683 }
684 } else {
685 if (is_volatile) {
686 assert(dst.getMode() == Address::base_plus_offset && dst.offset() == 0,
687 "is_acquire path requires address to be base-only");
688 __ stlr(src, dst.base());
689 } else {
690 __ str(src, dst);
691 }
692 }
693
694 ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp2, tmp3);
695 }
696
697 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
698 Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool weak, bool acquire) {
699 Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
700
701 ShenandoahBarrierStubC2::load_store_pre(masm, node, addr, tmp1, tmp2, tmp3, narrow);
702
703 // CAS!
704 __ cmpxchg(addr, oldval, newval, op_size, acquire, /* release */ true, weak, exchange ? res : noreg);
705
706 // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
707 if (!exchange) {
708 assert(res != noreg, "need result register");
709 __ cset(res, Assembler::EQ);
710 }
711
712 ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
713 }
714
715 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
716 Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire) {
717 bool is_narrow = node->bottom_type()->isa_narrowoop();
718
719 ShenandoahBarrierStubC2::load_store_pre(masm, node, addr, tmp1, tmp2, tmp3, is_narrow);
720
721 if (is_narrow) {
722 if (is_acquire) {
723 __ atomic_xchgalw(preval, newval, addr);
724 } else {
725 __ atomic_xchgw(preval, newval, addr);
726 }
727 } else {
728 if (is_acquire) {
729 __ atomic_xchgal(preval, newval, addr);
730 } else {
731 __ atomic_xchg(preval, newval, addr);
732 }
733 }
734
735 ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
736 }
737
738 #undef __
739 #define __ masm.
740
741 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
742 assert(CardTable::dirty_card_val() == 0, "must be");
743 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
744
745 // tmp1 = card table base (holder)
746 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
747 __ ldr(tmp1, curr_ct_holder_addr);
748
749 // tmp2 = effective address
750 __ lea(tmp2, address);
751
752 // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
753 __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
754
755 if (UseCondCardMark) {
756 Label L_already_dirty;
757 __ ldrb(tmp1, Address(tmp2));
758 __ cbz(tmp1, L_already_dirty);
759 __ strb(zr, Address(tmp2));
760 __ bind(L_already_dirty);
761 } else {
762 __ strb(zr, Address(tmp2));
763 }
764 }
765
766 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
767 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
768 PhaseOutput* const output = Compile::current()->output();
769 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
770
771 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
772 // We'll use that information to decide whether we need a far jump to the
773 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
774 // because otherwise it will be rebound when we later emit the instructions
775 // for real.
776 if (_needs_far_jump) {
777 __ ldrb(tmp, gc_state_fast);
778 __ cbz(tmp, *continuation());
779 __ b(output->in_scratch_emit_size() ? *continuation() : *entry());
780 } else {
781 __ ldrb(tmp, gc_state_fast);
782 __ cbnz(tmp, output->in_scratch_emit_size() ? *continuation() : *entry());
783 }
784
785 // This is were the slowpath stub will return to or the code above will
786 // jump to if the checks are false
787 __ bind(*continuation());
788 }
789
790 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
791 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
792 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
793 PhaseOutput* const output = Compile::current()->output();
794
795 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
796 // We'll use that information to decide whether we need a far jump to the
797 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
798 // because otherwise it will be rebound when we later emit the instructions
799 // for real.
800 if (!output->in_scratch_emit_size()) {
801 __ bind(*entry());
802 }
803
804 // If we need to load ourselves, do it here.
805 if (_do_load) {
806 if (_narrow) {
807 __ ldrw(_obj, _addr);
808 } else {
809 __ ldr(_obj, _addr);
810 }
811 }
812
813 // If the object is null, there is no point in applying barriers.
814 maybe_far_jump_if_zero(masm, _obj);
815
816 // We need to make sure that loads done by callers survive across slow-path calls.
817 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
818 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
819 if (!_do_load || needs_both_barriers) {
820 preserve(_obj);
821 }
822
823 // Go for barriers. Barriers can return straight to continuation, as long
824 // as another barrier is not needed and we can reach the fastpath.
825 if (needs_both_barriers) {
826 // The Load match rule in the .ad file may have legitimized the load
827 // address using a TEMP register and in that case we need to explicitly
828 // preserve them here, because the RA does not consider TEMP as live-in,
829 // and the KA runtime call may clobber them and cause a crash on the
830 // subsequent LRB stub.
831 if (_addr.base() != noreg) {
832 preserve(_addr.base());
833 }
834 if (_addr.index() != noreg) {
835 preserve(_addr.index());
836 }
837 keepalive(masm, nullptr);
838 lrb(masm);
839 } else if (_needs_keep_alive_barrier) {
840 keepalive(masm, continuation());
841 } else if (_needs_load_ref_barrier) {
842 lrb(masm);
843 } else {
844 ShouldNotReachHere();
845 }
846 }
847
848 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
849 if (_needs_far_jump) {
850 Label L_short_jump;
851 __ cbnz(reg, L_short_jump);
852 __ b(*continuation());
853 __ bind(L_short_jump);
854 } else {
855 __ cbz(reg, *continuation());
856 }
857 }
858
859 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
860 Address gcstate(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
861 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
862 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
863 Label L_through, L_slowpath;
864
865 // If another barrier is enabled as well, do a runtime check for a specific barrier.
866 if (_needs_load_ref_barrier) {
867 assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
868 __ ldrb(_tmp1, gcstate);
869 __ cbz(_tmp1, L_through);
870 }
871
872 // Fast-path: put object into buffer.
873 // If buffer is already full, go slow.
874 __ ldr(_tmp1, index);
875 __ cbz(_tmp1, L_slowpath);
876 __ sub(_tmp1, _tmp1, wordSize);
877 __ str(_tmp1, index);
878 __ ldr(_tmp2, buffer);
879
880 // Store the object in queue.
881 // If object is narrow, we need to decode it before inserting.
882 if (_narrow) {
883 __ add(_tmp2, _tmp2, _tmp1);
884 __ decode_heap_oop_not_null(_tmp1, _obj);
885 __ str(_tmp1, Address(_tmp2));
886 } else {
887 // Buffer is 64-bit address, must be in base register.
888 __ str(_obj, Address(_tmp2, _tmp1));
889 }
890
891 // Fast-path exits here.
892 if (L_done != nullptr) {
893 __ b(*L_done);
894 } else {
895 __ b(L_through);
896 }
897
898 // Slow-path: call runtime to handle.
899 __ bind(L_slowpath);
900
901 {
902 SaveLiveRegisters slr(&masm, this);
903
904 // Go to runtime and handle the rest there.
905 __ mov(c_rarg0, _obj);
906 __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
907 __ blr(lr);
908 }
909 if (L_done != nullptr) {
910 __ b(*L_done);
911 } else {
912 __ bind(L_through);
913 }
914 }
915
916 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
917 Label L_slow;
918
919 // If another barrier is enabled as well, do a runtime check for a specific barrier.
920 if (_needs_keep_alive_barrier) {
921 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
922 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
923 __ ldrb(_tmp1, gc_state_fast);
924 maybe_far_jump_if_zero(masm, _tmp1);
925 }
926
927 // If weak references are being processed, weak/phantom loads need to go slow,
928 // regardless of their cset status.
929 if (_needs_load_ref_weak_barrier) {
930 Address gc_state_fast(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
931 __ ldrb(_tmp1, gc_state_fast);
932 __ cbnz(_tmp1, L_slow);
933 }
934
935 // Cset-check. Fall-through to slow if in collection set.
936 bool is_aot = AOTCodeCache::is_on_for_dump();
937 if (!is_aot) {
938 __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
939 if (_narrow) {
940 __ decode_heap_oop_not_null(_tmp2, _obj);
941 __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
942 } else {
943 __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
944 }
945 } else {
946 // Generating AOT code, pull the cset bitmap and region shift from AOT table.
947 if (_narrow) {
948 __ decode_heap_oop_not_null(_tmp1, _obj);
949 } else {
950 __ mov(_tmp1, _obj);
951 }
952 __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
953 __ ldrw(_tmp2, Address(_tmp2));
954 __ lsrv(_tmp2, _tmp1, _tmp2);
955 __ lea(_tmp1, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
956 __ ldr(_tmp1, Address(_tmp1));
957 __ add(_tmp1, _tmp1, _tmp2);
958 }
959 __ ldrb(_tmp1, Address(_tmp1, 0));
960 maybe_far_jump_if_zero(masm, _tmp1);
961
962 // Slow path
963 __ bind(L_slow);
964
965 // Obj is the result, need to temporarily stop preserving it.
966 bool is_obj_preserved = is_preserved(_obj);
967 if (is_obj_preserved) {
968 dont_preserve(_obj);
969 }
970 {
971 SaveLiveRegisters slr(&masm, this);
972
973 // Shuffle in the arguments. The end result should be:
974 // c_rarg0 <-- obj
975 // c_rarg1 <-- lea(addr)
976 if (c_rarg0 == _obj) {
977 __ lea(c_rarg1, _addr);
978 } else if (c_rarg1 == _obj) {
979 __ mov(_tmp1, c_rarg1);
980 __ lea(c_rarg1, _addr);
981 __ mov(c_rarg0, _tmp1);
982 } else {
983 assert_different_registers(c_rarg1, _obj);
984 __ lea(c_rarg1, _addr);
985 __ mov(c_rarg0, _obj);
986 }
987
988 // Go to runtime and handle the rest there.
989 __ lea(lr, RuntimeAddress(lrb_runtime_entry_addr()));
990 __ blr(lr);
991
992 // Save the result where needed. Narrow entries return narrowOop (32 bits)
993 // and AAPCS does not guarantee the upper 32 bits of x0 are zero.
994 if (_narrow) {
995 __ movw(_obj, r0);
996 } else if (_obj != r0) {
997 __ mov(_obj, r0);
998 }
999 }
1000 if (is_obj_preserved) {
1001 preserve(_obj);
1002 }
1003
1004 __ b(*continuation());
1005 }
1006
1007 int ShenandoahBarrierStubC2::available_gp_registers() {
1008 Unimplemented(); // Not used
1009 return 0;
1010 }
1011
1012 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1013 Unimplemented(); // Not used
1014 return true;
1015 }
1016
1017 static ShenandoahBarrierSetC2State* barrier_set_state() {
1018 return reinterpret_cast<ShenandoahBarrierSetC2State*>(Compile::current()->barrier_set_state());
1019 }
1020
1021 static int get_stub_size(ShenandoahBarrierStubC2* stub) {
1022 PhaseOutput* const output = Compile::current()->output();
1023 assert(output->in_scratch_emit_size(), "only used when in scratch_emit_size.");
1024 BufferBlob* const blob = output->scratch_buffer_blob();
1025 CodeBuffer cb(blob->content_begin(), (address)output->scratch_locs_memory() - blob->content_begin());
1026 MacroAssembler masm(&cb);
1027 stub->emit_code(masm);
1028 return cb.insts_size();
1029 }
1030
1031 void ShenandoahBarrierStubC2::post_init() {
1032 // If we are in scratch emit mode we assume worst case, and force the use of
1033 // far branches.
1034 PhaseOutput* const output = Compile::current()->output();
1035 ShenandoahBarrierSetC2State* state = barrier_set_state();
1036 if (output->in_scratch_emit_size()) {
1037 state->inc_stubs_current_total_size(get_stub_size(this));
1038 _needs_far_jump = true;
1039 return;
1040 }
1041
1042 // The logic implemented in this stub only uses short jumps (cbz, cbnz) if
1043 // the aggregation of all relevant code sections of a method is less than 1MB
1044 // - 2KB. We could be more aggressive and try and compute the distance
1045 // between the fastpath branch and the stub entry but in practice not many
1046 // methods reach the 1MB size.
1047 const BufferSizingData* sizing = output->buffer_sizing_data();
1048 const int code_size = sizing->_code + state->stubs_current_total_size();
1049
1050 // Maximum backward range is 1M. Maximum forward reach is 1M - 4bytes.
1051 // Subtract 2K to be ultra conservative.
1052 const int cond_branch_max_reach = (int)(1*M - 2*K);
1053 _needs_far_jump = code_size >= cond_branch_max_reach;
1054 }
1055
1056 #endif // COMPILER2