1 /*
2 * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
3 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
27 #include "gc/shenandoah/mode/shenandoahMode.hpp"
28 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
30 #include "gc/shenandoah/shenandoahForwarding.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
33 #include "gc/shenandoah/shenandoahRuntime.hpp"
34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
35 #include "interpreter/interp_masm.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #ifdef COMPILER1
40 #include "c1/c1_LIRAssembler.hpp"
41 #include "c1/c1_MacroAssembler.hpp"
42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
43 #endif
44 #ifdef COMPILER2
45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
46 #endif
47
48 #define __ masm->
49
50 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
51 Register src, Register dst, Register count, RegSet saved_regs) {
52 if (is_oop) {
53 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
54 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
55
56 Label L_done;
57
58 // Avoid calling runtime if count == 0
59 __ cbz(count, L_done);
60
61 // Is GC active?
62 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
63 __ ldrb(rscratch1, gc_state);
64 if (ShenandoahSATBBarrier && dest_uninitialized) {
65 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_done);
66 } else {
67 __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
68 __ tst(rscratch1, rscratch2);
69 __ br(Assembler::EQ, L_done);
70 }
71
72 __ push(saved_regs, sp);
73 if (UseCompressedOops) {
74 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count);
75 } else {
76 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count);
77 }
78 __ pop(saved_regs, sp);
79 __ bind(L_done);
80 }
81 }
82 }
83
84 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
85 Register start, Register count, Register tmp) {
86 if (ShenandoahCardBarrier && is_oop) {
87 gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
88 }
89 }
90
91 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
92 Register obj,
93 Register pre_val,
94 Register thread,
95 Register tmp1,
96 Register tmp2,
97 bool tosca_live,
98 bool expand_call) {
99 assert(ShenandoahSATBBarrier, "Should be checked by caller");
100
101 // If expand_call is true then we expand the call_VM_leaf macro
102 // directly to skip generating the check by
103 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
104
105 assert(thread == rthread, "must be");
106
107 Label L_done;
108 Label L_runtime;
109
110 assert_different_registers(obj, pre_val, tmp1, tmp2);
111 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
112
113 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
114 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
115
116 // Is marking active?
117 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
118 __ ldrb(tmp1, gc_state);
119 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
120
121 // Do we need to load the previous value?
122 if (obj != noreg) {
123 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
124 }
125
126 // Is the previous value null?
127 __ cbz(pre_val, L_done);
128
129 // Can we store original value in the thread's buffer?
130 // Is index == 0?
131 // (The index field is typed as size_t.)
132
133 __ ldr(tmp1, index); // tmp := *index_adr
134 __ cbz(tmp1, L_runtime); // tmp == 0?
135 // If yes, goto runtime
136
137 __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
138 __ str(tmp1, index); // *index_adr := tmp
139 __ ldr(tmp2, buffer);
140 __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
141
142 // Record the previous value
143 __ str(pre_val, Address(tmp1, 0));
144 __ b(L_done);
145
146 __ bind(L_runtime);
147 // save the live input values
148 RegSet saved = RegSet::of(pre_val);
149 if (tosca_live) saved += RegSet::of(r0);
150 if (obj != noreg) saved += RegSet::of(obj);
151
152 __ push(saved, sp);
153
154 // Calling the runtime using the regular call_VM_leaf mechanism generates
155 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
156 // that checks that the *(rfp+frame::interpreter_frame_last_sp) == nullptr.
157 //
158 // If we care generating the pre-barrier without a frame (e.g. in the
159 // intrinsified Reference.get() routine) then rfp might be pointing to
160 // the caller frame and so this check will most likely fail at runtime.
161 //
162 // Expanding the call directly bypasses the generation of the check.
163 // So when we do not have have a full interpreter frame on the stack
164 // expand_call should be passed true.
165
166 if (expand_call) {
167 assert(pre_val != c_rarg1, "smashed arg");
168 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
169 } else {
170 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
171 }
172
173 __ pop(saved, sp);
174
175 __ bind(L_done);
176 }
177
178 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
179 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
180 Label L_is_null;
181 __ cbz(dst, L_is_null);
182 resolve_forward_pointer_not_null(masm, dst, tmp);
183 __ bind(L_is_null);
184 }
185
186 // IMPORTANT: This must preserve all registers, even rscratch1 and rscratch2, except those explicitly
187 // passed in.
188 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
189 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
190 // The below loads the mark word, checks if the lowest two bits are
191 // set, and if so, clear the lowest two bits and copy the result
192 // to dst. Otherwise it leaves dst alone.
193 // Implementing this is surprisingly awkward. I do it here by:
194 // - Inverting the mark word
195 // - Test lowest two bits == 0
196 // - If so, set the lowest two bits
197 // - Invert the result back, and copy to dst
198
199 bool borrow_reg = (tmp == noreg);
200 if (borrow_reg) {
201 // No free registers available. Make one useful.
202 tmp = rscratch1;
203 if (tmp == dst) {
204 tmp = rscratch2;
205 }
206 __ push(RegSet::of(tmp), sp);
207 }
208
209 assert_different_registers(tmp, dst);
210
211 Label L_done;
212 __ ldr(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
213 __ eon(tmp, tmp, zr);
214 __ ands(zr, tmp, markWord::lock_mask_in_place);
215 __ br(Assembler::NE, L_done);
216 __ orr(tmp, tmp, markWord::marked_value);
217 __ eon(dst, tmp, zr);
218 __ bind(L_done);
219
220 if (borrow_reg) {
221 __ pop(RegSet::of(tmp), sp);
222 }
223 }
224
225 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) {
226 assert(ShenandoahLoadRefBarrier, "Should be enabled");
227 assert(dst != rscratch2, "need rscratch2");
228 assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
229
230 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
231 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
232 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
233 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
234 bool is_narrow = UseCompressedOops && !is_native;
235
236 Label L_heap_stable, L_not_cset;
237 __ enter(/*strip_ret_addr*/true);
238 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
239 __ ldrb(rscratch2, gc_state);
240
241 // Check for heap stability
242 if (is_strong) {
243 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_heap_stable);
244 } else {
245 Label L_lrb;
246 __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_lrb);
247 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_heap_stable);
248 __ bind(L_lrb);
249 }
250
251 // use r1 for load address
252 Register result_dst = dst;
253 if (dst == r1) {
254 __ mov(rscratch1, dst);
255 dst = rscratch1;
256 }
257
258 // Save r0 and r1, unless it is an output register
259 RegSet to_save = RegSet::of(r0, r1) - result_dst;
260 __ push(to_save, sp);
261 __ lea(r1, load_addr);
262 __ mov(r0, dst);
263
264 // Test for in-cset
265 if (is_strong) {
266 __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
267 __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
268 __ ldrb(rscratch2, Address(rscratch2, rscratch1));
269 __ tbz(rscratch2, 0, L_not_cset);
270 }
271
272 __ push_call_clobbered_registers();
273 if (is_strong) {
274 if (is_narrow) {
275 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow));
276 } else {
277 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
278 }
279 } else if (is_weak) {
280 if (is_narrow) {
281 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
282 } else {
283 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
284 }
285 } else {
286 assert(is_phantom, "only remaining strength");
287 assert(!is_narrow, "phantom access cannot be narrow");
288 // AOT saved adapters need relocation for this call.
289 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
290 }
291 __ blr(lr);
292 __ mov(rscratch1, r0);
293 __ pop_call_clobbered_registers();
294 __ mov(r0, rscratch1);
295
296 __ bind(L_not_cset);
297
298 __ mov(result_dst, r0);
299 __ pop(to_save, sp);
300
301 __ bind(L_heap_stable);
302 __ leave();
303 }
304
305 //
306 // Arguments:
307 //
308 // Inputs:
309 // src: oop location to load from, might be clobbered
310 //
311 // Output:
312 // dst: oop loaded from src location
313 //
314 // Kill:
315 // rscratch1 (scratch reg)
316 //
317 // Alias:
318 // dst: rscratch1 (might use rscratch1 as temporary output register to avoid clobbering src)
319 //
320 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
321 Register dst, Address src, Register tmp1, Register tmp2) {
322 // 1: non-reference load, no additional barrier is needed
323 if (!is_reference_type(type)) {
324 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
325 return;
326 }
327
328 // 2: load a reference from src location and apply LRB if needed
329 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
330 Register result_dst = dst;
331
332 // Preserve src location for LRB
333 if (dst == src.base() || dst == src.index()) {
334 dst = rscratch1;
335 }
336 assert_different_registers(dst, src.base(), src.index());
337
338 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
339
340 load_reference_barrier(masm, dst, src, decorators);
341
342 if (dst != result_dst) {
343 __ mov(result_dst, dst);
344 dst = result_dst;
345 }
346 } else {
347 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
348 }
349
350 // 3: apply keep-alive barrier if needed
351 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
352 __ enter(/*strip_ret_addr*/true);
353 __ push_call_clobbered_registers();
354 satb_barrier(masm /* masm */,
355 noreg /* obj */,
356 dst /* pre_val */,
357 rthread /* thread */,
358 tmp1 /* tmp1 */,
359 tmp2 /* tmp2 */,
360 true /* tosca_live */,
361 true /* expand_call */);
362 __ pop_call_clobbered_registers();
363 __ leave();
364 }
365 }
366
367 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
368 assert(ShenandoahCardBarrier, "Should have been checked by caller");
369
370 __ lsr(obj, obj, CardTable::card_shift());
371
372 assert(CardTable::dirty_card_val() == 0, "must be");
373
374 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
375 __ ldr(rscratch1, curr_ct_holder_addr);
376
377 if (UseCondCardMark) {
378 Label L_already_dirty;
379 __ ldrb(rscratch2, Address(obj, rscratch1));
380 __ cbz(rscratch2, L_already_dirty);
381 __ strb(zr, Address(obj, rscratch1));
382 __ bind(L_already_dirty);
383 } else {
384 __ strb(zr, Address(obj, rscratch1));
385 }
386 }
387
388 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
389 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
390 // 1: non-reference types require no barriers
391 if (!is_reference_type(type)) {
392 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
393 return;
394 }
395
396 // Flatten object address right away for simplicity: likely needed by barriers
397 if (dst.index() == noreg && dst.offset() == 0) {
398 if (dst.base() != tmp3) {
399 __ mov(tmp3, dst.base());
400 }
401 } else {
402 __ lea(tmp3, dst);
403 }
404
405 bool storing_non_null = (val != noreg);
406
407 // 2: pre-barrier: SATB needs the previous value
408 if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
409 satb_barrier(masm,
410 tmp3 /* obj */,
411 tmp2 /* pre_val */,
412 rthread /* thread */,
413 tmp1 /* tmp */,
414 rscratch1 /* tmp2 */,
415 storing_non_null /* tosca_live */,
416 false /* expand_call */);
417 }
418
419 // Store!
420 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
421
422 // 3: post-barrier: card barrier needs store address
423 if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
424 card_barrier(masm, tmp3);
425 }
426 }
427
428 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
429 Register obj, Register tmp, Label& L_slowpath) {
430 Label L_done;
431 // Resolve jobject
432 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, L_slowpath);
433
434 // Check for null.
435 __ cbz(obj, L_done);
436
437 assert(obj != rscratch2, "need rscratch2");
438 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
439 __ lea(rscratch2, gc_state);
440 __ ldrb(rscratch2, Address(rscratch2));
441
442 // Check for heap in evacuation phase
443 __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, L_slowpath);
444
445 __ bind(L_done);
446 }
447
448 // Special Shenandoah CAS implementation that handles false negatives due
449 // to concurrent evacuation. The service is more complex than a
450 // traditional CAS operation because the CAS operation is intended to
451 // succeed if the reference at addr exactly matches expected or if the
452 // reference at addr holds a pointer to a from-space object that has
453 // been relocated to the location named by expected. There are two
454 // races that must be addressed:
455 // a) A parallel thread may mutate the contents of addr so that it points
456 // to a different object. In this case, the CAS operation should fail.
457 // b) A parallel thread may heal the contents of addr, replacing a
458 // from-space pointer held in addr with the to-space pointer
459 // representing the new location of the object.
460 // Upon entry to cmpxchg_oop, it is assured that new_val equals null
461 // or it refers to an object that is not being evacuated out of
462 // from-space, or it refers to the to-space version of an object that
463 // is being evacuated out of from-space.
464 //
465 // By default the value held in the result register following execution
466 // of the generated code sequence is 0 to indicate failure of CAS,
467 // non-zero to indicate success. If is_cae, the result is the value most
468 // recently fetched from addr rather than a boolean success indicator.
469 //
470 // Clobbers rscratch1, rscratch2
471 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
472 Register addr,
473 Register expected,
474 Register new_val,
475 bool acquire, bool release,
476 bool is_cae,
477 Register result) {
478 Register tmp1 = rscratch1;
479 Register tmp2 = rscratch2;
480 bool is_narrow = UseCompressedOops;
481 Assembler::operand_size size = is_narrow ? Assembler::word : Assembler::xword;
482
483 assert_different_registers(addr, expected, tmp1, tmp2);
484 assert_different_registers(addr, new_val, tmp1, tmp2);
485
486 Label L_step4, L_done;
487
488 // There are two ways to reach this label. Initial entry into the
489 // cmpxchg_oop code expansion starts at step1 (which is equivalent
490 // to label step4). Additionally, in the rare case that four steps
491 // are required to perform the requested operation, the fourth step
492 // is the same as the first. On a second pass through step 1,
493 // control may flow through step 2 on its way to failure. It will
494 // not flow from step 2 to step 3 since we are assured that the
495 // memory at addr no longer holds a from-space pointer.
496 //
497 // The comments that immediately follow the step4 label apply only
498 // to the case in which control reaches this label by branch from
499 // step 3.
500
501 __ bind (L_step4);
502
503 // Step 4. CAS has failed because the value most recently fetched
504 // from addr is no longer the from-space pointer held in tmp2. If a
505 // different thread replaced the in-memory value with its equivalent
506 // to-space pointer, then CAS may still be able to succeed. The
507 // value held in the expected register has not changed.
508 //
509 // It is extremely rare we reach this point. For this reason, the
510 // implementation opts for smaller rather than potentially faster
511 // code. Ultimately, smaller code for this rare case most likely
512 // delivers higher overall throughput by enabling improved icache
513 // performance.
514
515 // Step 1. Fast-path.
516 //
517 // Try to CAS with given arguments. If successful, then we are done.
518 //
519 // No label required for step 1.
520
521 __ cmpxchg(addr, expected, new_val, size, acquire, release, false, tmp2);
522 // EQ flag set iff success. tmp2 holds value fetched.
523
524 // If expected equals null but tmp2 does not equal null, the
525 // following branches to done to report failure of CAS. If both
526 // expected and tmp2 equal null, the following branches to done to
527 // report success of CAS. There's no need for a special test of
528 // expected equal to null.
529
530 __ br(Assembler::EQ, L_done);
531 // if CAS failed, fall through to step 2
532
533 // Step 2. CAS has failed because the value held at addr does not
534 // match expected. This may be a false negative because the value fetched
535 // from addr (now held in tmp2) may be a from-space pointer to the
536 // original copy of same object referenced by to-space pointer expected.
537 //
538 // To resolve this, it suffices to find the forward pointer associated
539 // with fetched value. If this matches expected, retry CAS with new
540 // parameters. If this mismatches, then we have a legitimate
541 // failure, and we're done.
542 //
543 // No need for step2 label.
544
545 // overwrite tmp1 with from-space pointer fetched from memory
546 __ mov(tmp1, tmp2);
547
548 if (is_narrow) {
549 // Decode tmp1 in order to resolve its forward pointer
550 __ decode_heap_oop(tmp1, tmp1);
551 }
552 resolve_forward_pointer(masm, tmp1);
553 // Encode tmp1 to compare against expected.
554 __ encode_heap_oop(tmp1, tmp1);
555
556 // Does forwarded value of fetched from-space pointer match original
557 // value of expected? If tmp1 holds null, this comparison will fail
558 // because we know from step1 that expected is not null. There is
559 // no need for a separate test for tmp1 (the value originally held
560 // in memory) equal to null.
561 __ cmp(tmp1, expected);
562
563 // If not, then the failure was legitimate and we're done.
564 // Branching to done with NE condition denotes failure.
565 __ br(Assembler::NE, L_done);
566
567 // Fall through to step 3. No need for step3 label.
568
569 // Step 3. We've confirmed that the value originally held in memory
570 // (now held in tmp2) pointed to from-space version of original
571 // expected value. Try the CAS again with the from-space expected
572 // value. If it now succeeds, we're good.
573 //
574 // Note: tmp2 holds encoded from-space pointer that matches to-space
575 // object residing at expected. tmp2 is the new "expected".
576
577 // Note that macro implementation of __cmpxchg cannot use same register
578 // tmp2 for result and expected since it overwrites result before it
579 // compares result with expected.
580 __ cmpxchg(addr, tmp2, new_val, size, acquire, release, false, noreg);
581 // EQ flag set iff success. tmp2 holds value fetched, tmp1 (rscratch1) clobbered.
582
583 // If fetched value did not equal the new expected, this could
584 // still be a false negative because some other thread may have
585 // newly overwritten the memory value with its to-space equivalent.
586 __ br(Assembler::NE, L_step4);
587
588 if (is_cae) {
589 // We're falling through to done to indicate success. Success
590 // with is_cae is denoted by returning the value of expected as
591 // result.
592 __ mov(tmp2, expected);
593 }
594
595 __ bind(L_done);
596 // At entry to done, the Z (EQ) flag is on iff if the CAS
597 // operation was successful. Additionally, if is_cae, tmp2 holds
598 // the value most recently fetched from addr. In this case, success
599 // is denoted by tmp2 matching expected.
600
601 if (is_cae) {
602 __ mov(result, tmp2);
603 } else {
604 __ cset(result, Assembler::EQ);
605 }
606 }
607
608 #ifdef COMPILER2
609 void ShenandoahBarrierSetAssembler::gc_state_check_c2(MacroAssembler* masm, Register rscratch, const unsigned char test_state, BarrierStubC2* slow_stub) {
610 if (ShenandoahGCStateCheckRemove) {
611 // Unrealistic: remove all barrier fastpath checks.
612 } else if (ShenandoahGCStateCheckHotpatch) {
613 // In the ideal world, we would hot-patch the branch to slow stub with a single
614 // (unconditional) jump or nop, based on our current GC state.
615 // FIXME: we may need more than one nop. to discuss.
616 __ nop();
617 } else {
618 #ifdef ASSERT
619 const unsigned char allowed = (unsigned char)(ShenandoahHeap::MARKING | ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::WEAK_ROOTS);
620 const unsigned char only_valid_flags = test_state & (unsigned char) ~allowed;
621 assert(test_state > 0x0, "Invalid test_state asked: %x", test_state);
622 assert(only_valid_flags == 0x0, "Invalid test_state asked: %x", test_state);
623 #endif
624
625 Label L_short_branch;
626
627 bool one_bit = (test_state & (test_state - 1)) == 0;
628 char no_weak_set = (test_state & (~ShenandoahHeap::WEAK_ROOTS));
629
630 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
631 __ ldrb(rscratch, gcs_addr);
632
633 // if only one bit is required then we can always use tbz
634 if (one_bit) {
635 int bit = __builtin_ctz((unsigned)test_state);
636 __ tbz(rscratch, bit, *slow_stub->continuation());
637 } else if (no_weak_set == test_state) {
638 __ ands(rscratch, rscratch, test_state);
639 __ cbz(rscratch, *slow_stub->continuation());
640 } else {
641 // One single 'ands' isn't possible because weak is set, making the
642 // immediate pattern invalid. One single tbz/tbnz doesn't work because we
643 // have 2 or more bits set.
644 //
645 // We'll tackle this by breaking the problem in two parts. First we only
646 // check for weak_roots and then we check for the other flags using
647 // 'ands' without the weak bit set.
648 __ tbnz(rscratch, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_short_branch);
649
650 // We cleared the weak bit earlier on
651 __ ands(rscratch, rscratch, no_weak_set);
652 __ cbz(rscratch, *slow_stub->continuation());
653 }
654
655 __ bind(L_short_branch);
656 __ b(*slow_stub->entry());
657
658 // This is were the stub will return to or the code above will jump to if
659 // the checks are false
660 __ bind(*slow_stub->continuation());
661 }
662 }
663
664 /**
665 * The logic implemented here relies on certain flags being on specific
666 * positions of the GCState. Also note that all pointer values in register are
667 * guaranteed to be 'to-space' addresses. The algorithm is as follows. If the
668 * CAS succeed:
669 * - 'res' will be set to 1.
670 * - We need to check SATB flag (index 1 of GCState). If the flag is active
671 * need to store 'oldval' in the buffer.
672 * - We wrote 'newval' to 'addr', therefore we need to mark the corresponding
673 * card in the card table for 'addr' as dirty.
674 * If the CAS failed:
675 * - 'res' will be set to 0.
676 * - If the GCState FORWARDING bit (index 0 of GCState) is set we'll need to
677 * retry the CAS, because the failure may be because the value in 'addr' is
678 * the (outdated) 'from-space' version of 'expected'. The retry is done in a
679 * stub. If the retry succeed then we need to do the steps described above
680 * too for CAS succeed too.
681 * - If FORWARDING bit is clear there is nothing else to do.
682 */
683 void ShenandoahBarrierSetAssembler::cae_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr, Register oldval, Register newval, bool exchange, bool maybe_null, bool narrow, bool acquire, bool release, bool weak) {
684 Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
685
686 // Issue cmpxchg first, res will have the failure witness if CAS fails
687 __ cmpxchg(addr, oldval, newval, op_size, acquire, release, weak, exchange ? res : rscratch2);
688
689 // First CAS attempt. If successful, then we are done.
690 // EQ flag set iff success.
691 __ cset(exchange ? rscratch2 : res, Assembler::EQ);
692
693 if (!ShenandoahSkipBarriers && (ShenandoahCASBarrierStubC2::needs_barrier(node) || ShenandoahStoreBarrierStubC2::needs_card_barrier(node))) {
694 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
695
696 if (ShenandoahCASBarrierStubC2::needs_barrier(node)) {
697 ShenandoahCASBarrierStubC2* stub = ShenandoahCASBarrierStubC2::create(node, addr, oldval, newval, res, narrow, exchange, maybe_null, acquire, release, weak);
698
699 char check = 0;
700 check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
701 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
702 gc_state_check_c2(masm, rscratch1, check, stub);
703 }
704
705 if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
706 if (exchange) {
707 __ cmp(res, oldval);
708 __ cset(rscratch2, Assembler::EQ);
709 }
710 card_barrier_c2(node, masm, addr, exchange ? rscratch2 : res);
711 }
712 }
713 }
714
715 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval, Register newval, Register addr, bool acquire) {
716 if (node->bottom_type()->isa_narrowoop()) {
717 if (acquire) {
718 __ atomic_xchgalw(preval, newval, addr);
719 } else {
720 __ atomic_xchgw(preval, newval, addr);
721 }
722 } else {
723 if (acquire) {
724 __ atomic_xchgal(preval, newval, addr);
725 } else {
726 __ atomic_xchg(preval, newval, addr);
727 }
728 }
729
730 if (!ShenandoahSkipBarriers && (ShenandoahLoadBarrierStubC2::needs_barrier(node) || ShenandoahStoreBarrierStubC2::needs_card_barrier(node))) {
731 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
732
733 if (ShenandoahLoadBarrierStubC2::needs_barrier(node)) {
734 ShenandoahLoadBarrierStubC2* const stub = ShenandoahLoadBarrierStubC2::create(node, preval, addr);
735
736 char check = 0;
737 check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
738 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
739 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
740 gc_state_check_c2(masm, rscratch1, check, stub);
741 }
742
743 if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
744 card_barrier_c2(node, masm, addr, noreg);
745 }
746 }
747 }
748
749 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
750 Register addr, bool dst_narrow,
751 Register src, bool src_narrow,
752 bool is_volatile) {
753
754 // Emit barrier if needed
755 if (!ShenandoahSkipBarriers && ShenandoahStoreBarrierStubC2::needs_barrier(node)) {
756 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
757
758 if (ShenandoahStoreBarrierStubC2::needs_keep_alive_barrier(node)) {
759 ShenandoahStoreBarrierStubC2* const stub = ShenandoahStoreBarrierStubC2::create(node, addr, dst_narrow);
760
761 gc_state_check_c2(masm, rscratch1, ShenandoahHeap::MARKING, stub);
762 }
763
764 if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
765 card_barrier_c2(node, masm, addr, noreg);
766 }
767 }
768
769 // Do the actual store
770 if (dst_narrow) {
771 if (!src_narrow) {
772 // Need to encode into rscratch, because we cannot clobber src.
773 // TODO: Maybe there is a matcher way to test that src is unused after this?
774 __ mov(rscratch1, src);
775 if (ShenandoahStoreBarrierStubC2::src_not_null(node)) {
776 __ encode_heap_oop_not_null(rscratch1);
777 } else {
778 __ encode_heap_oop(rscratch1);
779 }
780 src = rscratch1;
781 }
782
783 if (is_volatile) {
784 __ stlrw(src, addr);
785 } else {
786 __ strw(src, addr);
787 }
788 } else {
789 if (is_volatile) {
790 __ stlr(src, addr);
791 } else {
792 __ str(src, addr);
793 }
794 }
795 }
796
797 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm,
798 Register dst, Register addr, bool acquire) {
799 if (node->bottom_type()->isa_narrowoop()) {
800 if (acquire) {
801 __ ldarw(dst, addr);
802 } else {
803 __ ldrw(dst, addr);
804 }
805 } else {
806 if (acquire) {
807 __ ldar(dst, addr);
808 } else {
809 __ ldr(dst, addr);
810 }
811 }
812
813 if (!ShenandoahSkipBarriers && ShenandoahLoadBarrierStubC2::needs_barrier(node)) {
814 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
815
816 ShenandoahLoadBarrierStubC2* const stub = ShenandoahLoadBarrierStubC2::create(node, dst, addr);
817 stub->preserve(addr);
818 stub->dont_preserve(dst);
819
820 char check = 0;
821 check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
822 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
823 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
824 gc_state_check_c2(masm, rscratch1, check, stub);
825 }
826 }
827
828 void ShenandoahBarrierSetAssembler::card_barrier_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register cond) {
829 if ((node->barrier_data() & ShenandoahBitCardMark) == 0) {
830 return;
831 }
832
833 assert(CardTable::dirty_card_val() == 0, "must be");
834 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
835 Label L_skip;
836
837 if (cond != noreg) {
838 __ cbz(cond, L_skip);
839 }
840
841 // rscratch2 = addr >> CardTable::card_shift()
842 __ lsr(rscratch2, addr, CardTable::card_shift());
843
844 // rscratch1 = card table base (holder)
845 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
846 __ ldr(rscratch1, curr_ct_holder_addr);
847
848 // rscratch2 = &card_table[card_index]
849 __ add(rscratch2, rscratch1, rscratch2);
850
851 if (UseCondCardMark) {
852 Label L_already_dirty;
853 __ ldrb(rscratch1, Address(rscratch2));
854 __ cbz(rscratch1, L_already_dirty);
855 __ strb(zr, Address(rscratch2));
856 __ bind(L_already_dirty);
857 } else {
858 __ strb(zr, Address(rscratch2));
859 }
860 __ bind(L_skip);
861 }
862 #undef __
863 #define __ masm.
864
865 void ShenandoahStoreBarrierStubC2::emit_code(MacroAssembler& masm) {
866 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
867 __ bind(*entry());
868
869 Label L_done;
870
871 // We'll use "_addr_reg" register as third scratch register
872 assert(_addr_reg != noreg, "should be");
873 RegSet saved = RegSet::of(_addr_reg);
874 Register rscratch3 = _addr_reg;
875 __ push(saved, sp);
876
877 // Do we need to load the previous value?
878 if (_addr_reg != noreg) {
879 __ load_heap_oop(rscratch3, Address(rscratch3, 0), noreg, noreg, AS_RAW);
880 // FIXME: We can merge this on the load above
881 __ cbz(rscratch3, L_done);
882 } else {
883 if (_dst_narrow) {
884 __ decode_heap_oop(rscratch3, &L_done);
885 } else {
886 __ cbz(rscratch3, L_done);
887 }
888 }
889
890 satb(&masm, this, rscratch1, rscratch2, rscratch3, &L_done);
891
892 __ bind(L_done);
893 __ pop(saved, sp);
894 __ b(*continuation());
895 }
896
897 void ShenandoahLoadBarrierStubC2::emit_code(MacroAssembler& masm) {
898 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
899
900 __ bind(*entry());
901
902 Label L_lrb;
903
904 if (_narrow) {
905 if (_maybe_null) {
906 __ decode_heap_oop(_dst, &L_lrb);
907 } else {
908 __ decode_heap_oop_not_null(_dst);
909 }
910 } else {
911 __ cbz(_dst, L_lrb);
912 }
913
914 { // SATB
915 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
916 __ ldrb(rscratch1, gcs_addr);
917 __ tbz(rscratch1, ShenandoahHeap::MARKING_BITPOS, L_lrb);
918
919 preserve(_dst);
920 satb(&masm, this, rscratch1, rscratch2, _dst, &L_lrb);
921 }
922
923 __ bind(L_lrb); { // LRB
924 Label L_lrb_end;
925
926 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
927 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
928 __ ldrb(rscratch1, gcs_addr);
929 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_lrb_end);
930 }
931
932 dont_preserve(_dst);
933 lrb(&masm, this, _dst, _addr_reg, &L_lrb_end, _narrow);
934
935 __ bind(L_lrb_end);
936 }
937
938 if (_narrow) {
939 if (_maybe_null) {
940 __ encode_heap_oop(_dst);
941 } else {
942 __ encode_heap_oop_not_null(_dst);
943 }
944 }
945
946 __ b(*continuation());
947 }
948
949 void ShenandoahCASBarrierStubC2::emit_code(MacroAssembler& masm) {
950 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
951
952 __ bind(*entry());
953
954 // Non-strong references should always go to runtime. We do not expect
955 // CASes over non-strong locations.
956 assert((_node->barrier_data() & ShenandoahBitStrong) != 0, "Only strong references for CASes");
957
958 Label L_final;
959 Label L_succeded;
960
961 // check if first CAS succeded, if it did we just need to write to SATB
962 __ cbnz(_cae ? rscratch2 : _result, L_succeded);
963
964
965 // LRB + CAS Retry
966 // First CAS attempt did not succeed. Execute LRB on 'addr' and retry CAS.
967 if (!_cae) {
968 __ mov(_result, rscratch2);
969 }
970
971 // [Compressed] failure witness is in _result. Decode it and check
972 // if it is in collection set.
973 if (_narrow) {
974 __ decode_heap_oop(_result);
975 }
976
977 lrb(&masm, this, _result, _addr_reg, &L_final, _narrow);
978
979 __ bind(L_final);
980
981 Assembler::operand_size size = _narrow ? Assembler::word : Assembler::xword;
982 __ cmpxchg(_addr_reg, _expected, _new_val, size, _acquire, _release, _weak, _result);
983
984 if (!_cae) {
985 __ cset(_result, Assembler::EQ);
986 }
987 // If the retry did not succeed skip SATB
988 __ br(Assembler::NE, *continuation());
989
990
991
992
993 // SATB
994 __ bind(L_succeded);
995 Label short_branch;
996 Label L_done;
997
998 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
999 __ ldrb(rscratch1, gcs_addr);
1000 __ tbnz(rscratch1, ShenandoahHeap::MARKING_BITPOS, short_branch);
1001 __ b(*continuation());
1002 __ bind(short_branch);
1003
1004 // We'll use "_addr_reg" register as third scratch register
1005 assert(_addr_reg != noreg, "should be");
1006 RegSet saved = RegSet::of(_addr_reg);
1007 Register rscratch3 = _addr_reg;
1008 __ push(saved, sp);
1009
1010 if (_narrow) {
1011 __ decode_heap_oop(rscratch3, _expected, &L_done);
1012 } else {
1013 __ mov(rscratch3, _expected);
1014 __ cbz(rscratch3, L_done);
1015 }
1016
1017 satb(&masm, this, rscratch1, rscratch2, rscratch3, &L_done);
1018
1019 __ bind(L_done);
1020 __ pop(saved, sp);
1021
1022 __ b(*continuation());
1023 }
1024
1025 void ShenandoahBarrierStubC2::satb(MacroAssembler* masm, ShenandoahBarrierStubC2* stub, Register scratch1, Register scratch2, Register scratch3, Label* L_done) {
1026 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1027 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1028 Label L_runtime;
1029
1030 // If buffer is full, call into runtime.
1031 masm->ldr(scratch1, index);
1032 masm->cbz(scratch1, L_runtime);
1033
1034 // The buffer is not full, store value into it.
1035 masm->sub(scratch1, scratch1, wordSize);
1036 masm->str(scratch1, index);
1037 masm->ldr(scratch2, buffer);
1038 masm->str(scratch3, Address(scratch2, scratch1));
1039 masm->b(*L_done);
1040
1041 // Runtime call
1042 masm->bind(L_runtime);
1043 {
1044 SaveLiveRegisters save_registers(masm, stub);
1045 masm->mov(c_rarg0, scratch3);
1046 masm->mov(scratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre));
1047 masm->blr(scratch1);
1048 }
1049 }
1050
1051 void ShenandoahBarrierStubC2::lrb(MacroAssembler* masm, ShenandoahBarrierStubC2* stub, Register obj, Register addr, Label* L_done, bool narrow) {
1052 // Weak/phantom loads always need to go to runtime, otherwise check for
1053 // object in cset.
1054 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1055 masm->mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
1056 masm->lsr(rscratch1, obj, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1057 masm->ldrb(rscratch2, Address(rscratch2, rscratch1));
1058 masm->cbz(rscratch2, *L_done);
1059 }
1060
1061 {
1062 SaveLiveRegisters save_registers(masm, stub);
1063 assert(obj != addr, "sanity address and obj can't be the same.");
1064 assert(c_rarg0 != addr, "need to be separate registers, otherwise we override data.");
1065 assert(c_rarg1 != obj, "sanity");
1066
1067 masm->mov(c_rarg0, obj);
1068 masm->mov(c_rarg1, addr);
1069
1070 if (narrow) {
1071 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1072 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow));
1073 } else if ((_node->barrier_data() & ShenandoahBitWeak) != 0) {
1074 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
1075 } else if ((_node->barrier_data() & ShenandoahBitPhantom) != 0) {
1076 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow));
1077 }
1078 } else {
1079 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1080 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
1081 } else if ((_node->barrier_data() & ShenandoahBitWeak) != 0) {
1082 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
1083 } else if ((_node->barrier_data() & ShenandoahBitPhantom) != 0) {
1084 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom));
1085 }
1086 }
1087 masm->blr(rscratch1);
1088 masm->mov(obj, r0);
1089 }
1090 }
1091
1092 #undef __
1093 #define __ masm->
1094 #endif // COMPILER2
1095
1096 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
1097 Register start, Register count, Register scratch) {
1098 assert(ShenandoahCardBarrier, "Should have been checked by caller");
1099
1100 Label L_loop, L_done;
1101 const Register end = count;
1102
1103 // Zero count? Nothing to do.
1104 __ cbz(count, L_done);
1105
1106 // end = start + count << LogBytesPerHeapOop
1107 // last element address to make inclusive
1108 __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
1109 __ sub(end, end, BytesPerHeapOop);
1110 __ lsr(start, start, CardTable::card_shift());
1111 __ lsr(end, end, CardTable::card_shift());
1112
1113 // number of bytes to copy
1114 __ sub(count, end, start);
1115
1116 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
1117 __ ldr(scratch, curr_ct_holder_addr);
1118 __ add(start, start, scratch);
1119 __ bind(L_loop);
1120 __ strb(zr, Address(start, count));
1121 __ subs(count, count, 1);
1122 __ br(Assembler::GE, L_loop);
1123 __ bind(L_done);
1124 }
1125
1126 #undef __
1127
1128 #ifdef COMPILER1
1129
1130 #define __ ce->masm()->
1131
1132 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
1133 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
1134 // At this point we know that marking is in progress.
1135 // If do_load() is true then we have to emit the
1136 // load of the previous value; otherwise it has already
1137 // been loaded into _pre_val.
1138
1139 __ bind(*stub->entry());
1140
1141 assert(stub->pre_val()->is_register(), "Precondition.");
1142
1143 Register pre_val_reg = stub->pre_val()->as_register();
1144
1145 if (stub->do_load()) {
1146 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
1147 }
1148 __ cbz(pre_val_reg, *stub->continuation());
1149 ce->store_parameter(stub->pre_val()->as_register(), 0);
1150 __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
1151 __ b(*stub->continuation());
1152 }
1153
1154 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
1155 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
1156 __ bind(*stub->entry());
1157
1158 DecoratorSet decorators = stub->decorators();
1159 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
1160 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
1161 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
1162 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
1163
1164 Register obj = stub->obj()->as_register();
1165 Register res = stub->result()->as_register();
1166 Register addr = stub->addr()->as_pointer_register();
1167 Register tmp1 = stub->tmp1()->as_register();
1168 Register tmp2 = stub->tmp2()->as_register();
1169
1170 assert(res == r0, "result must arrive in r0");
1171
1172 if (res != obj) {
1173 __ mov(res, obj);
1174 }
1175
1176 if (is_strong) {
1177 // Check for object in cset.
1178 __ mov(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
1179 __ lsr(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1180 __ ldrb(tmp2, Address(tmp2, tmp1));
1181 __ cbz(tmp2, *stub->continuation());
1182 }
1183
1184 ce->store_parameter(res, 0);
1185 ce->store_parameter(addr, 1);
1186 if (is_strong) {
1187 if (is_native) {
1188 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
1189 } else {
1190 __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
1191 }
1192 } else if (is_weak) {
1193 __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
1194 } else {
1195 assert(is_phantom, "only remaining strength");
1196 __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
1197 }
1198
1199 __ b(*stub->continuation());
1200 }
1201
1202 #undef __
1203
1204 #define __ sasm->
1205
1206 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
1207 __ prologue("shenandoah_pre_barrier", false);
1208
1209 // arg0 : previous value of memory
1210
1211 BarrierSet* bs = BarrierSet::barrier_set();
1212
1213 const Register pre_val = r0;
1214 const Register thread = rthread;
1215 const Register tmp = rscratch1;
1216
1217 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1218 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1219
1220 Label L_done;
1221 Label L_runtime;
1222
1223 // Is marking still active?
1224 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1225 __ ldrb(tmp, gc_state);
1226 __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, L_done);
1227
1228 // Can we store original value in the thread's buffer?
1229 __ ldr(tmp, queue_index);
1230 __ cbz(tmp, L_runtime);
1231
1232 __ sub(tmp, tmp, wordSize);
1233 __ str(tmp, queue_index);
1234 __ ldr(rscratch2, buffer);
1235 __ add(tmp, tmp, rscratch2);
1236 __ load_parameter(0, rscratch2);
1237 __ str(rscratch2, Address(tmp, 0));
1238 __ b(L_done);
1239
1240 __ bind(L_runtime);
1241 __ push_call_clobbered_registers();
1242 __ load_parameter(0, pre_val);
1243 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
1244 __ pop_call_clobbered_registers();
1245 __ bind(L_done);
1246
1247 __ epilogue();
1248 }
1249
1250 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
1251 __ prologue("shenandoah_load_reference_barrier", false);
1252 // arg0 : object to be resolved
1253
1254 __ push_call_clobbered_registers();
1255 __ load_parameter(0, r0);
1256 __ load_parameter(1, r1);
1257
1258 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
1259 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
1260 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
1261 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
1262 if (is_strong) {
1263 if (is_native) {
1264 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
1265 } else {
1266 if (UseCompressedOops) {
1267 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow));
1268 } else {
1269 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
1270 }
1271 }
1272 } else if (is_weak) {
1273 assert(!is_native, "weak must not be called off-heap");
1274 if (UseCompressedOops) {
1275 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
1276 } else {
1277 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
1278 }
1279 } else {
1280 assert(is_phantom, "only remaining strength");
1281 assert(is_native, "phantom must only be called off-heap");
1282 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom));
1283 }
1284 __ blr(lr);
1285 __ mov(rscratch1, r0);
1286 __ pop_call_clobbered_registers();
1287 __ mov(r0, rscratch1);
1288
1289 __ epilogue();
1290 }
1291
1292 #undef __
1293
1294 #endif // COMPILER1