1 /*
2 * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
4 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
33 #include "gc/shenandoah/shenandoahNMethod.inline.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interp_masm.hpp"
37 #include "interpreter/interpreter.hpp"
38 #include "nativeInst_aarch64.hpp"
39 #include "runtime/javaThread.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #ifdef COMPILER1
42 #include "c1/c1_LIRAssembler.hpp"
43 #include "c1/c1_MacroAssembler.hpp"
44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
45 #endif
46 #ifdef COMPILER2
47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
48 #include "opto/output.hpp"
49 #endif
50
51 #define __ masm->
52
53 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
54 Register src, Register dst, Register count, RegSet saved_regs) {
55 if (is_oop) {
56 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
57 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
58
59 Label done;
60
61 // Avoid calling runtime if count == 0
62 __ cbz(count, done);
63
64 // Is GC active?
65 assert(!saved_regs.contains(rscratch1), "Sanity: about to clobber rscratch1");
66 assert(!saved_regs.contains(rscratch2), "Sanity: about to clobber rscratch2");
67 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
68 __ ldrb(rscratch1, gc_state);
69 if (ShenandoahSATBBarrier && dest_uninitialized) {
70 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
71 } else {
72 __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
73 __ tst(rscratch1, rscratch2);
74 __ br(Assembler::EQ, done);
75 }
76
77 __ push_call_clobbered_registers();
78 // If arguments are not in proper places, shuffle them.
79 // Doing this via the stack is the most straight-forward way to avoid
80 // accidentally smashing any register.
81 if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
82 __ push(RegSet::of(src), sp);
83 __ push(RegSet::of(dst), sp);
84 __ push(RegSet::of(count), sp);
85 __ pop(RegSet::of(c_rarg2), sp);
86 __ pop(RegSet::of(c_rarg1), sp);
87 __ pop(RegSet::of(c_rarg0), sp);
88 }
89 address target = nullptr;
90 if (UseCompressedOops) {
91 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
92 } else {
93 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
94 }
95 __ call_VM_leaf(target, 3);
96 __ pop_call_clobbered_registers();
97 __ bind(done);
98 }
99 }
100 }
101
102 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
103 Register start, Register count, Register tmp) {
104 if (ShenandoahCardBarrier && is_oop) {
105 gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
106 }
107 }
108
109 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
110 Register obj,
111 Register pre_val,
112 Register thread,
113 Register tmp1,
114 Register tmp2) {
115 assert(ShenandoahSATBBarrier, "Should be checked by caller");
116 assert(thread == rthread, "must be");
117
118 Label done;
119 Label runtime;
120
121 assert_different_registers(obj, pre_val, tmp1, tmp2);
122 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
123
124 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
125 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
126
127 // Is marking active?
128 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
129 __ ldrb(tmp1, gc_state);
130 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, done);
131
132 // Do we need to load the previous value?
133 if (obj != noreg) {
134 if (UseCompressedOops) {
135 __ ldrw(pre_val, Address(obj, 0));
136 __ decode_heap_oop(pre_val);
137 } else {
138 __ ldr(pre_val, Address(obj, 0));
139 }
140 }
141
142 // Is the previous value null?
143 __ cbz(pre_val, done);
144
145 // Can we store original value in the thread's buffer?
146 // Is index == 0?
147 // (The index field is typed as size_t.)
148
149 __ ldr(tmp1, index); // tmp := *index_adr
150 __ cbz(tmp1, runtime); // tmp == 0?
151 // If yes, goto runtime
152
153 __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
154 __ str(tmp1, index); // *index_adr := tmp
155 __ ldr(tmp2, buffer);
156 __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
157
158 // Record the previous value
159 __ str(pre_val, Address(tmp1, 0));
160 __ b(done);
161
162 __ bind(runtime);
163
164 // Slow-path call
165 __ enter(/* strip_ret_addr = */ true);
166 __ push_call_clobbered_registers();
167 if (c_rarg0 != pre_val) {
168 __ mov(c_rarg0, pre_val);
169 }
170 // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
171 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
172 __ pop_call_clobbered_registers();
173 __ leave();
174
175 __ bind(done);
176 }
177
178 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) {
179 assert(ShenandoahLoadRefBarrier, "Should be enabled");
180 assert(dst != rscratch2, "need rscratch2");
181 assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
182
183 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
184 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
185 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
186 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
187 bool is_narrow = UseCompressedOops && !is_native;
188
189 Label heap_stable, not_cset;
190 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
191 __ ldrb(rscratch2, gc_state);
192
193 // Check for heap stability
194 if (is_strong) {
195 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
196 } else {
197 Label lrb;
198 __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, lrb);
199 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
200 __ bind(lrb);
201 }
202
203 // use r1 for load address
204 Register result_dst = dst;
205 if (dst == r1) {
206 __ mov(rscratch1, dst);
207 dst = rscratch1;
208 }
209
210 // Save r0 and r1, unless it is an output register
211 RegSet to_save = RegSet::of(r0, r1) - result_dst;
212 __ push(to_save, sp);
213 __ lea(r1, load_addr);
214 __ mov(r0, dst);
215
216 // Test for in-cset
217 if (is_strong) {
218 if (AOTCodeCache::is_on_for_dump()) {
219 __ lea(rscratch2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
220 __ ldr(rscratch2, Address(rscratch2));
221 __ lea(rscratch1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
222 __ ldrw(rscratch1, Address(rscratch1));
223 __ lsrv(rscratch1, r0, rscratch1);
224 } else {
225 __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
226 __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
227 }
228 __ ldrb(rscratch2, Address(rscratch2, rscratch1));
229 __ tbz(rscratch2, 0, not_cset);
230 }
231
232 // Slow-path call
233 __ enter(/* strip_ret_addr = */ true);
234 __ push_call_clobbered_registers();
235 address target = nullptr;
236 if (is_strong) {
237 if (is_narrow) {
238 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
239 } else {
240 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
241 }
242 } else if (is_weak) {
243 if (is_narrow) {
244 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
245 } else {
246 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
247 }
248 } else {
249 assert(is_phantom, "only remaining strength");
250 assert(!is_narrow, "phantom access cannot be narrow");
251 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
252 }
253 // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
254 __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
255 __ mov(rscratch1, r0);
256 __ pop_call_clobbered_registers();
257 __ mov(r0, rscratch1);
258 __ leave();
259
260 __ bind(not_cset);
261
262 __ mov(result_dst, r0);
263 __ pop(to_save, sp);
264
265 __ bind(heap_stable);
266 }
267
268 //
269 // Arguments:
270 //
271 // Inputs:
272 // src: oop location to load from, might be clobbered
273 //
274 // Output:
275 // dst: oop loaded from src location
276 //
277 // Kill:
278 // rscratch1 (scratch reg)
279 //
280 // Alias:
281 // dst: rscratch1 (might use rscratch1 as temporary output register to avoid clobbering src)
282 //
283 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
284 Register dst, Address src, Register tmp1, Register tmp2) {
285 // 1: non-reference load, no additional barrier is needed
286 if (!is_reference_type(type)) {
287 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
288 return;
289 }
290
291 // 2: load a reference from src location and apply LRB if needed
292 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
293 Register result_dst = dst;
294
295 // Preserve src location for LRB
296 if (dst == src.base() || dst == src.index()) {
297 dst = rscratch1;
298 }
299 assert_different_registers(dst, src.base(), src.index());
300
301 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
302
303 load_reference_barrier(masm, dst, src, decorators);
304
305 if (dst != result_dst) {
306 __ mov(result_dst, dst);
307 dst = result_dst;
308 }
309 } else {
310 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
311 }
312
313 // 3: apply keep-alive barrier if needed
314 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
315 satb_barrier(masm /* masm */,
316 noreg /* obj */,
317 dst /* pre_val */,
318 rthread /* thread */,
319 tmp1 /* tmp1 */,
320 tmp2 /* tmp2 */);
321 }
322 }
323
324 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
325 assert(ShenandoahCardBarrier, "Should have been checked by caller");
326
327 __ lsr(obj, obj, CardTable::card_shift());
328
329 assert(CardTable::dirty_card_val() == 0, "must be");
330
331 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
332 __ ldr(rscratch1, curr_ct_holder_addr);
333
334 if (UseCondCardMark) {
335 Label L_already_dirty;
336 __ ldrb(rscratch2, Address(obj, rscratch1));
337 __ cbz(rscratch2, L_already_dirty);
338 __ strb(zr, Address(obj, rscratch1));
339 __ bind(L_already_dirty);
340 } else {
341 __ strb(zr, Address(obj, rscratch1));
342 }
343 }
344
345 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
346 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
347 // 1: non-reference types require no barriers
348 if (!is_reference_type(type)) {
349 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
350 return;
351 }
352
353 // Flatten object address right away for simplicity: likely needed by barriers
354 if (dst.index() == noreg && dst.offset() == 0) {
355 if (dst.base() != tmp3) {
356 __ mov(tmp3, dst.base());
357 }
358 } else {
359 __ lea(tmp3, dst);
360 }
361
362 // 2: pre-barrier: SATB needs the previous value
363 if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
364 satb_barrier(masm,
365 tmp3 /* obj */,
366 tmp2 /* pre_val */,
367 rthread /* thread */,
368 tmp1 /* tmp */,
369 rscratch1 /* tmp2 */);
370 }
371
372 // Store!
373 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
374
375 // 3: post-barrier: card barrier needs store address
376 bool storing_non_null = (val != noreg);
377 if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
378 card_barrier(masm, tmp3);
379 }
380 }
381
382 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
383 Register obj, Register tmp, Label& slowpath) {
384 Label done;
385 // Resolve jobject
386 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
387
388 // Check for null.
389 __ cbz(obj, done);
390
391 assert(obj != rscratch2, "need rscratch2");
392 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
393 __ lea(rscratch2, gc_state);
394 __ ldrb(rscratch2, Address(rscratch2));
395
396 // Check for heap in evacuation phase
397 __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, slowpath);
398
399 __ bind(done);
400 }
401
402 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj,
403 Register tmp, Label& slow_path) {
404 assert_different_registers(weak_handle, tmp, noreg);
405 assert_different_registers(obj, tmp, noreg);
406
407 Label done;
408
409 // Peek weak handle using the standard implementation.
410 BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, tmp, slow_path);
411
412 // Check if the reference is null, and if it is, take the fast path.
413 __ cbz(obj, done);
414
415 Address gc_state(rthread, ShenandoahThreadLocalData::gc_state_offset());
416 __ lea(tmp, gc_state);
417 __ ldrb(tmp, __ legitimize_address(gc_state, 1, tmp));
418
419 // Check if the heap is under weak-reference/roots processing, in
420 // which case we need to take the slow path.
421 __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, slow_path);
422 __ bind(done);
423 }
424
425 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
426 Register start, Register count, Register scratch) {
427 assert(ShenandoahCardBarrier, "Should have been checked by caller");
428
429 Label L_loop, L_done;
430 const Register end = count;
431
432 // Zero count? Nothing to do.
433 __ cbz(count, L_done);
434
435 // end = start + count << LogBytesPerHeapOop
436 // last element address to make inclusive
437 __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
438 __ sub(end, end, BytesPerHeapOop);
439 __ lsr(start, start, CardTable::card_shift());
440 __ lsr(end, end, CardTable::card_shift());
441
442 // number of bytes to copy
443 __ sub(count, end, start);
444
445 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
446 __ ldr(scratch, curr_ct_holder_addr);
447 __ add(start, start, scratch);
448 __ bind(L_loop);
449 __ strb(zr, Address(start, count));
450 __ subs(count, count, 1);
451 __ br(Assembler::GE, L_loop);
452 __ bind(L_done);
453 }
454
455 #undef __
456
457 #ifdef COMPILER1
458
459 #define __ ce->masm()->
460
461 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
462 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
463 // At this point we know that marking is in progress.
464 // If do_load() is true then we have to emit the
465 // load of the previous value; otherwise it has already
466 // been loaded into _pre_val.
467
468 __ bind(*stub->entry());
469
470 assert(stub->pre_val()->is_register(), "Precondition.");
471
472 Register pre_val_reg = stub->pre_val()->as_register();
473
474 if (stub->do_load()) {
475 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
476 }
477 __ cbz(pre_val_reg, *stub->continuation());
478 ce->store_parameter(stub->pre_val()->as_register(), 0);
479 __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
480 __ b(*stub->continuation());
481 }
482
483 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
484 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
485 __ bind(*stub->entry());
486
487 DecoratorSet decorators = stub->decorators();
488 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
489 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
490 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
491 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
492
493 Register obj = stub->obj()->as_register();
494 Register res = stub->result()->as_register();
495 Register addr = stub->addr()->as_pointer_register();
496 Register tmp1 = stub->tmp1()->as_register();
497 Register tmp2 = stub->tmp2()->as_register();
498
499 assert(res == r0, "result must arrive in r0");
500
501 if (res != obj) {
502 __ mov(res, obj);
503 }
504
505 if (is_strong) {
506 // Check for object in cset.
507 if (AOTCodeCache::is_on_for_dump()) {
508 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
509 __ ldr(tmp2, Address(tmp2));
510 __ lea(tmp1, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
511 __ ldrw(tmp1, Address(tmp1));
512 __ lsrv(tmp1, res, tmp1);
513 } else {
514 __ mov(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
515 __ lsr(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
516 }
517 __ ldrb(tmp2, Address(tmp2, tmp1));
518 __ cbz(tmp2, *stub->continuation());
519 }
520
521 ce->store_parameter(res, 0);
522 ce->store_parameter(addr, 1);
523 if (is_strong) {
524 if (is_native) {
525 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
526 } else {
527 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
528 }
529 } else if (is_weak) {
530 __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
531 } else {
532 assert(is_phantom, "only remaining strength");
533 __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
534 }
535
536 __ b(*stub->continuation());
537 }
538
539 #undef __
540
541 #define __ sasm->
542
543 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
544 __ prologue("shenandoah_pre_barrier", false);
545
546 // arg0 : previous value of memory
547
548 BarrierSet* bs = BarrierSet::barrier_set();
549
550 const Register pre_val = r0;
551 const Register thread = rthread;
552 const Register tmp = rscratch1;
553
554 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
555 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
556
557 Label done;
558 Label runtime;
559
560 // Is marking still active?
561 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
562 __ ldrb(tmp, gc_state);
563 __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, done);
564
565 // Can we store original value in the thread's buffer?
566 __ ldr(tmp, queue_index);
567 __ cbz(tmp, runtime);
568
569 __ sub(tmp, tmp, wordSize);
570 __ str(tmp, queue_index);
571 __ ldr(rscratch2, buffer);
572 __ add(tmp, tmp, rscratch2);
573 __ load_parameter(0, rscratch2);
574 __ str(rscratch2, Address(tmp, 0));
575 __ b(done);
576
577 __ bind(runtime);
578 __ push_call_clobbered_registers();
579 __ load_parameter(0, pre_val);
580 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
581 __ pop_call_clobbered_registers();
582 __ bind(done);
583
584 __ epilogue();
585 }
586
587 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
588 __ prologue("shenandoah_load_reference_barrier", false);
589 // arg0 : object to be resolved
590
591 __ push_call_clobbered_registers();
592 __ load_parameter(0, r0);
593 __ load_parameter(1, r1);
594
595 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
596 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
597 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
598 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
599 if (is_strong) {
600 if (is_native) {
601 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
602 } else {
603 if (UseCompressedOops) {
604 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow)));
605 } else {
606 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)));
607 }
608 }
609 } else if (is_weak) {
610 assert(!is_native, "weak must not be called off-heap");
611 if (UseCompressedOops) {
612 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)));
613 } else {
614 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)));
615 }
616 } else {
617 assert(is_phantom, "only remaining strength");
618 assert(is_native, "phantom must only be called off-heap");
619 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
620 }
621 __ blr(lr);
622 __ mov(rscratch1, r0);
623 __ pop_call_clobbered_registers();
624 __ mov(r0, rscratch1);
625
626 __ epilogue();
627 }
628
629 #undef __
630
631 #endif // COMPILER1
632
633 #ifdef COMPILER2
634
635 #undef __
636 #define __ masm->
637
638
639 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, Register tmp1, Register tmp2, bool is_narrow, bool is_acquire) {
640 // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
641 if (is_narrow) {
642 if (is_acquire) {
643 assert(src.getMode() == Address::base_plus_offset && src.offset() == 0,
644 "is_acquire path requires address to be base-only");
645 __ ldarw(dst, src.base());
646 } else {
647 __ ldrw(dst, src);
648 }
649 } else {
650 if (is_acquire) {
651 assert(src.getMode() == Address::base_plus_offset && src.offset() == 0,
652 "is_acquire path requires address to be base-only");
653 __ ldar(dst, src.base());
654 } else {
655 __ ldr(dst, src);
656 }
657 }
658
659 ShenandoahBarrierStubC2::load_post(masm, node, dst, src, tmp1, tmp2, is_narrow);
660 }
661
662 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm, Address dst, bool dst_narrow,
663 Register src, bool src_narrow, Register tmp1, Register tmp2, Register tmp3, bool is_volatile) {
664
665 ShenandoahBarrierStubC2::store_pre(masm, node, dst, tmp1, tmp2, tmp3, dst_narrow);
666
667 // Do the actual store
668 if (dst_narrow) {
669 if (!src_narrow) {
670 // Need to encode into rscratch, because we cannot clobber src.
671 if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
672 __ encode_heap_oop(tmp2, src);
673 } else {
674 __ encode_heap_oop_not_null(tmp2, src);
675 }
676 src = tmp2;
677 }
678
679 if (is_volatile) {
680 assert(dst.getMode() == Address::base_plus_offset && dst.offset() == 0,
681 "is_acquire path requires address to be base-only");
682 __ stlrw(src, dst.base());
683 } else {
684 __ strw(src, dst);
685 }
686 } else {
687 if (is_volatile) {
688 assert(dst.getMode() == Address::base_plus_offset && dst.offset() == 0,
689 "is_acquire path requires address to be base-only");
690 __ stlr(src, dst.base());
691 } else {
692 __ str(src, dst);
693 }
694 }
695
696 ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp2, tmp3);
697 }
698
699 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr,
700 Register oldval, Register newval, Register tmp1, Register tmp2, Register tmp3, bool exchange, bool narrow, bool weak, bool acquire) {
701 Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
702
703 ShenandoahBarrierStubC2::load_store_pre(masm, node, addr, tmp1, tmp2, tmp3, narrow);
704
705 // CAS!
706 __ cmpxchg(addr, oldval, newval, op_size, acquire, /* release */ true, weak, exchange ? res : noreg);
707
708 // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
709 if (!exchange) {
710 assert(res != noreg, "need result register");
711 __ cset(res, Assembler::EQ);
712 }
713
714 ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
715 }
716
717 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval,
718 Register newval, Register addr, Register tmp1, Register tmp2, Register tmp3, bool is_acquire) {
719 bool is_narrow = node->bottom_type()->isa_narrowoop();
720
721 ShenandoahBarrierStubC2::load_store_pre(masm, node, addr, tmp1, tmp2, tmp3, is_narrow);
722
723 if (is_narrow) {
724 if (is_acquire) {
725 __ atomic_xchgalw(preval, newval, addr);
726 } else {
727 __ atomic_xchgw(preval, newval, addr);
728 }
729 } else {
730 if (is_acquire) {
731 __ atomic_xchgal(preval, newval, addr);
732 } else {
733 __ atomic_xchg(preval, newval, addr);
734 }
735 }
736
737 ShenandoahBarrierStubC2::load_store_post(masm, node, Address(addr, 0), tmp2, tmp3);
738 }
739
740 #undef __
741 #define __ masm.
742
743 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address address, Register tmp1, Register tmp2) {
744 assert(CardTable::dirty_card_val() == 0, "must be");
745 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
746
747 // tmp1 = card table base (holder)
748 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
749 __ ldr(tmp1, curr_ct_holder_addr);
750
751 // tmp2 = effective address
752 __ lea(tmp2, address);
753
754 // tmp2 = &card_table[ addr >> CardTable::card_shift() ] ; card index
755 __ add(tmp2, tmp1, tmp2, Assembler::LSR, CardTable::card_shift());
756
757 if (UseCondCardMark) {
758 Label L_already_dirty;
759 __ ldrb(tmp1, Address(tmp2));
760 __ cbz(tmp1, L_already_dirty);
761 __ strb(zr, Address(tmp2));
762 __ bind(L_already_dirty);
763 } else {
764 __ strb(zr, Address(tmp2));
765 }
766 }
767
768 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
769 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
770 PhaseOutput* const output = Compile::current()->output();
771
772 // Emit the unconditional branch in the first version of the method.
773 // Let the rest of runtime figure out how to manage it.
774 if (output->in_scratch_emit_size()) {
775 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
776 // We'll use that information to decide whether we need a far jump to the
777 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
778 // because otherwise it will be rebound when we later emit the instructions
779 // for real.
780 __ nop();
781 } else {
782 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(test_state));
783 __ b(*entry());
784 }
785
786 // This is were the slowpath stub will return to or the code above will
787 // jump to if the checks are false
788 __ bind(*continuation());
789 }
790
791 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
792 NativeInstruction* ni = nativeInstruction_at(pc);
793 assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
794 NativeJump* jmp = nativeJump_at(pc);
795 return jmp->jump_destination();
796 }
797
798 static bool is_nop(address pc) {
799 if (*(pc + 0) != 0x1F) return false;
800 if (*(pc + 1) != 0x20) return false;
801 if (*(pc + 2) != 0x03) return false;
802 if (*(pc + 3) != 0xD5) return false;
803 return true;
804 }
805
806 static void insert_nop(address pc) {
807 *reinterpret_cast<int32_t*>(pc) = 0xD503201F;
808 assert(is_nop(pc), "Should be");
809 ICache::invalidate_range(pc, 4);
810 }
811
812 static void check_at(bool cond, address pc, const char* msg) {
813 assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x",
814 msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3));
815 }
816
817 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
818 NativeInstruction* ni = nativeInstruction_at(pc);
819 return ni->is_jump();
820 }
821
822 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
823 NativeInstruction* ni = nativeInstruction_at(pc);
824 if (ni->is_jump()) {
825 insert_nop(pc);
826 } else {
827 check_at(is_nop(pc), pc, "Should already be nop");
828 }
829 }
830
831 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
832 NativeInstruction* ni = nativeInstruction_at(pc);
833 if (is_nop(pc)) {
834 NativeJump::insert(pc, stub_addr);
835 } else {
836 check_at(ni->is_jump(), pc, "Should already be jump");
837 check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
838 }
839 }
840
841 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
842 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
843 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
844 PhaseOutput* const output = Compile::current()->output();
845
846 // We piggyback on scratch_emit_size mode to compute the slowpath stub size.
847 // We'll use that information to decide whether we need a far jump to the
848 // stub entry point or not. In scratch_emit_size mode we don't bind entry()
849 // because otherwise it will be rebound when we later emit the instructions
850 // for real.
851 if (!output->in_scratch_emit_size()) {
852 __ bind(*entry());
853 }
854
855 // If we need to load ourselves, do it here.
856 if (_do_load) {
857 if (_narrow) {
858 __ ldrw(_obj, _addr);
859 } else {
860 __ ldr(_obj, _addr);
861 }
862 }
863
864 // If the object is null, there is no point in applying barriers.
865 maybe_far_jump_if_zero(masm, _obj);
866
867 // We need to make sure that loads done by callers survive across slow-path calls.
868 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
869 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
870 if (!_do_load || needs_both_barriers) {
871 preserve(_obj);
872 }
873
874 // Go for barriers. Barriers can return straight to continuation, as long
875 // as another barrier is not needed and we can reach the fastpath.
876 if (needs_both_barriers) {
877 // The Load match rule in the .ad file may have legitimized the load
878 // address using a TEMP register and in that case we need to explicitly
879 // preserve them here, because the RA does not consider TEMP as live-in,
880 // and the KA runtime call may clobber them and cause a crash on the
881 // subsequent LRB stub.
882 if (_addr.base() != noreg) {
883 preserve(_addr.base());
884 }
885 if (_addr.index() != noreg) {
886 preserve(_addr.index());
887 }
888 keepalive(masm, nullptr);
889 lrb(masm);
890 } else if (_needs_keep_alive_barrier) {
891 keepalive(masm, continuation());
892 } else if (_needs_load_ref_barrier) {
893 lrb(masm);
894 } else {
895 ShouldNotReachHere();
896 }
897 }
898
899 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
900 if (_needs_far_jump) {
901 Label L_short_jump;
902 __ cbnz(reg, L_short_jump);
903 __ b(*continuation());
904 __ bind(L_short_jump);
905 } else {
906 __ cbz(reg, *continuation());
907 }
908 }
909
910 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
911 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
912 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
913 Label L_through, L_slowpath;
914
915 // If another barrier is enabled as well, do a check for a specific barrier.
916 if (_needs_load_ref_barrier) {
917 assert(L_done == nullptr, "Should be");
918 // Emit the unconditional branch in the first version of the method.
919 // Let the rest of runtime figure out how to manage it.
920 // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
921 char state_to_check = ShenandoahHeap::MARKING;
922 Label L_over;
923 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
924 __ b(L_over);
925 __ b(L_through);
926 __ bind(L_over);
927 }
928
929 // Fast-path: put object into buffer.
930 // If buffer is already full, go slow.
931 __ ldr(_tmp1, index);
932 __ cbz(_tmp1, L_slowpath);
933 __ sub(_tmp1, _tmp1, wordSize);
934 __ str(_tmp1, index);
935 __ ldr(_tmp2, buffer);
936
937 // Store the object in queue.
938 // If object is narrow, we need to decode it before inserting.
939 if (_narrow) {
940 __ add(_tmp2, _tmp2, _tmp1);
941 __ decode_heap_oop_not_null(_tmp1, _obj);
942 __ str(_tmp1, Address(_tmp2));
943 } else {
944 // Buffer is 64-bit address, must be in base register.
945 __ str(_obj, Address(_tmp2, _tmp1));
946 }
947
948 // Fast-path exits here.
949 if (L_done != nullptr) {
950 __ b(*L_done);
951 } else {
952 __ b(L_through);
953 }
954
955 // Slow-path: call runtime to handle.
956 __ bind(L_slowpath);
957
958 {
959 SaveLiveRegisters slr(&masm, this);
960
961 // Go to runtime and handle the rest there.
962 __ mov(c_rarg0, _obj);
963 __ lea(lr, RuntimeAddress(keepalive_runtime_entry_addr()));
964 __ blr(lr);
965 }
966 if (L_done != nullptr) {
967 __ b(*L_done);
968 } else {
969 __ bind(L_through);
970 }
971 }
972
973 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
974 Label L_slow;
975
976 // If weak references are being processed, weak/phantom loads need to go slow,
977 // regardless of their cset status.
978 if (_needs_load_ref_weak_barrier) {
979 char state_to_check = ShenandoahHeap::WEAK_ROOTS;
980 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
981 __ b(L_slow);
982 }
983
984 if (_needs_keep_alive_barrier) {
985 // Emit the unconditional branch in the first version of the method.
986 // Let the rest of runtime figure out how to manage it.
987 // TODO: We could have spared the over-jump if patching knew we need the inverse branch.
988 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
989 Label L_over;
990 __ relocate(barrier_Relocation::spec(), ShenandoahNMethod::gc_state_to_reloc(state_to_check));
991 __ b(L_over);
992 __ b(*continuation());
993 __ bind(L_over);
994 }
995
996 // Cset-check. Fall-through to slow if in collection set.
997 bool is_aot = AOTCodeCache::is_on_for_dump();
998 if (!is_aot) {
999 __ mov(_tmp1, ShenandoahHeap::in_cset_fast_test_addr());
1000 if (_narrow) {
1001 __ decode_heap_oop_not_null(_tmp2, _obj);
1002 __ add(_tmp1, _tmp1, _tmp2, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1003 } else {
1004 __ add(_tmp1, _tmp1, _obj, Assembler::LSR, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1005 }
1006 } else {
1007 // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1008 if (_narrow) {
1009 __ decode_heap_oop_not_null(_tmp1, _obj);
1010 } else {
1011 __ mov(_tmp1, _obj);
1012 }
1013 __ lea(_tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1014 __ ldrw(_tmp2, Address(_tmp2));
1015 __ lsrv(_tmp2, _tmp1, _tmp2);
1016 __ lea(_tmp1, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1017 __ ldr(_tmp1, Address(_tmp1));
1018 __ add(_tmp1, _tmp1, _tmp2);
1019 }
1020 __ ldrb(_tmp1, Address(_tmp1, 0));
1021 maybe_far_jump_if_zero(masm, _tmp1);
1022
1023 // Slow path
1024 __ bind(L_slow);
1025
1026 // Obj is the result, need to temporarily stop preserving it.
1027 bool is_obj_preserved = is_preserved(_obj);
1028 if (is_obj_preserved) {
1029 dont_preserve(_obj);
1030 }
1031 {
1032 SaveLiveRegisters slr(&masm, this);
1033
1034 // Shuffle in the arguments. The end result should be:
1035 // c_rarg0 <-- obj
1036 // c_rarg1 <-- lea(addr)
1037 if (c_rarg0 == _obj) {
1038 __ lea(c_rarg1, _addr);
1039 } else if (c_rarg1 == _obj) {
1040 __ mov(_tmp1, c_rarg1);
1041 __ lea(c_rarg1, _addr);
1042 __ mov(c_rarg0, _tmp1);
1043 } else {
1044 assert_different_registers(c_rarg1, _obj);
1045 __ lea(c_rarg1, _addr);
1046 __ mov(c_rarg0, _obj);
1047 }
1048
1049 // Go to runtime and handle the rest there.
1050 __ lea(lr, RuntimeAddress(lrb_runtime_entry_addr()));
1051 __ blr(lr);
1052
1053 // Save the result where needed. Narrow entries return narrowOop (32 bits)
1054 // and AAPCS does not guarantee the upper 32 bits of x0 are zero.
1055 if (_narrow) {
1056 __ movw(_obj, r0);
1057 } else if (_obj != r0) {
1058 __ mov(_obj, r0);
1059 }
1060 }
1061 if (is_obj_preserved) {
1062 preserve(_obj);
1063 }
1064
1065 __ b(*continuation());
1066 }
1067
1068 int ShenandoahBarrierStubC2::available_gp_registers() {
1069 Unimplemented(); // Not used
1070 return 0;
1071 }
1072
1073 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1074 Unimplemented(); // Not used
1075 return true;
1076 }
1077
1078 static ShenandoahBarrierSetC2State* barrier_set_state() {
1079 return reinterpret_cast<ShenandoahBarrierSetC2State*>(Compile::current()->barrier_set_state());
1080 }
1081
1082 static int get_stub_size(ShenandoahBarrierStubC2* stub) {
1083 PhaseOutput* const output = Compile::current()->output();
1084 assert(output->in_scratch_emit_size(), "only used when in scratch_emit_size.");
1085 BufferBlob* const blob = output->scratch_buffer_blob();
1086 CodeBuffer cb(blob->content_begin(), (address)output->scratch_locs_memory() - blob->content_begin());
1087 MacroAssembler masm(&cb);
1088 stub->emit_code(masm);
1089 return cb.insts_size();
1090 }
1091
1092 void ShenandoahBarrierStubC2::post_init() {
1093 // If we are in scratch emit mode we assume worst case, and force the use of
1094 // far branches.
1095 PhaseOutput* const output = Compile::current()->output();
1096 ShenandoahBarrierSetC2State* state = barrier_set_state();
1097 if (output->in_scratch_emit_size()) {
1098 state->inc_stubs_current_total_size(get_stub_size(this));
1099 _needs_far_jump = true;
1100 return;
1101 }
1102
1103 // The logic implemented in this stub only uses short jumps (cbz, cbnz) if
1104 // the aggregation of all relevant code sections of a method is less than 1MB
1105 // - 2KB. We could be more aggressive and try and compute the distance
1106 // between the fastpath branch and the stub entry but in practice not many
1107 // methods reach the 1MB size.
1108 const BufferSizingData* sizing = output->buffer_sizing_data();
1109 const int code_size = sizing->_code + state->stubs_current_total_size();
1110
1111 // Maximum backward range is 1M. Maximum forward reach is 1M - 4bytes.
1112 // Subtract 2K to be ultra conservative.
1113 const int cond_branch_max_reach = (int)(1*M - 2*K);
1114 _needs_far_jump = code_size >= cond_branch_max_reach;
1115 }
1116
1117 #endif // COMPILER2