24 */
25
26 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
27 #include "gc/shenandoah/mode/shenandoahMode.hpp"
28 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
30 #include "gc/shenandoah/shenandoahForwarding.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
33 #include "gc/shenandoah/shenandoahRuntime.hpp"
34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
35 #include "interpreter/interp_masm.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #ifdef COMPILER1
40 #include "c1/c1_LIRAssembler.hpp"
41 #include "c1/c1_MacroAssembler.hpp"
42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
43 #endif
44
45 #define __ masm->
46
47 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
48 Register src, Register dst, Register count, RegSet saved_regs) {
49 if (is_oop) {
50 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
51 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
52
53 Label done;
54
55 // Avoid calling runtime if count == 0
56 __ cbz(count, done);
57
58 // Is GC active?
59 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
60 __ ldrb(rscratch1, gc_state);
61 if (ShenandoahSATBBarrier && dest_uninitialized) {
62 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
63 } else {
64 __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
65 __ tst(rscratch1, rscratch2);
66 __ br(Assembler::EQ, done);
67 }
68
69 __ push(saved_regs, sp);
70 if (UseCompressedOops) {
71 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count);
72 } else {
73 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count);
74 }
75 __ pop(saved_regs, sp);
76 __ bind(done);
77 }
78 }
79 }
80
81 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
82 Register start, Register count, Register tmp) {
83 if (ShenandoahCardBarrier && is_oop) {
84 gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
85 }
86 }
87
88 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
89 Register obj,
90 Register pre_val,
91 Register thread,
92 Register tmp1,
93 Register tmp2,
94 bool tosca_live,
95 bool expand_call) {
96 assert(ShenandoahSATBBarrier, "Should be checked by caller");
97
98 // If expand_call is true then we expand the call_VM_leaf macro
99 // directly to skip generating the check by
100 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
101
102 assert(thread == rthread, "must be");
103
104 Label done;
105 Label runtime;
106
107 assert_different_registers(obj, pre_val, tmp1, tmp2);
108 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
109
110 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
111 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
112
113 // Is marking active?
114 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
115 __ ldrb(tmp1, gc_state);
116 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, done);
117
118 // Do we need to load the previous value?
119 if (obj != noreg) {
120 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
121 }
122
123 // Is the previous value null?
124 __ cbz(pre_val, done);
125
126 // Can we store original value in the thread's buffer?
127 // Is index == 0?
128 // (The index field is typed as size_t.)
129
130 __ ldr(tmp1, index); // tmp := *index_adr
131 __ cbz(tmp1, runtime); // tmp == 0?
132 // If yes, goto runtime
133
134 __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
135 __ str(tmp1, index); // *index_adr := tmp
136 __ ldr(tmp2, buffer);
137 __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
138
139 // Record the previous value
140 __ str(pre_val, Address(tmp1, 0));
141 __ b(done);
142
143 __ bind(runtime);
144 // save the live input values
145 RegSet saved = RegSet::of(pre_val);
146 if (tosca_live) saved += RegSet::of(r0);
147 if (obj != noreg) saved += RegSet::of(obj);
148
149 __ push(saved, sp);
150
151 // Calling the runtime using the regular call_VM_leaf mechanism generates
152 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
153 // that checks that the *(rfp+frame::interpreter_frame_last_sp) == nullptr.
154 //
155 // If we care generating the pre-barrier without a frame (e.g. in the
156 // intrinsified Reference.get() routine) then rfp might be pointing to
157 // the caller frame and so this check will most likely fail at runtime.
158 //
159 // Expanding the call directly bypasses the generation of the check.
160 // So when we do not have have a full interpreter frame on the stack
161 // expand_call should be passed true.
162
163 if (expand_call) {
164 assert(pre_val != c_rarg1, "smashed arg");
165 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
166 } else {
167 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
168 }
169
170 __ pop(saved, sp);
171
172 __ bind(done);
173 }
174
175 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
176 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
177 Label is_null;
178 __ cbz(dst, is_null);
179 resolve_forward_pointer_not_null(masm, dst, tmp);
180 __ bind(is_null);
181 }
182
183 // IMPORTANT: This must preserve all registers, even rscratch1 and rscratch2, except those explicitly
184 // passed in.
185 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
186 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
187 // The below loads the mark word, checks if the lowest two bits are
188 // set, and if so, clear the lowest two bits and copy the result
189 // to dst. Otherwise it leaves dst alone.
190 // Implementing this is surprisingly awkward. I do it here by:
191 // - Inverting the mark word
192 // - Test lowest two bits == 0
193 // - If so, set the lowest two bits
194 // - Invert the result back, and copy to dst
195
196 bool borrow_reg = (tmp == noreg);
197 if (borrow_reg) {
198 // No free registers available. Make one useful.
199 tmp = rscratch1;
200 if (tmp == dst) {
201 tmp = rscratch2;
202 }
203 __ push(RegSet::of(tmp), sp);
204 }
205
206 assert_different_registers(tmp, dst);
207
208 Label done;
209 __ ldr(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
210 __ eon(tmp, tmp, zr);
211 __ ands(zr, tmp, markWord::lock_mask_in_place);
212 __ br(Assembler::NE, done);
213 __ orr(tmp, tmp, markWord::marked_value);
214 __ eon(dst, tmp, zr);
215 __ bind(done);
216
217 if (borrow_reg) {
218 __ pop(RegSet::of(tmp), sp);
219 }
220 }
221
222 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) {
223 assert(ShenandoahLoadRefBarrier, "Should be enabled");
224 assert(dst != rscratch2, "need rscratch2");
225 assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
226
227 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
228 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
229 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
230 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
231 bool is_narrow = UseCompressedOops && !is_native;
232
233 Label heap_stable, not_cset;
234 __ enter(/*strip_ret_addr*/true);
235 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
236 __ ldrb(rscratch2, gc_state);
237
238 // Check for heap stability
239 if (is_strong) {
240 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
241 } else {
242 Label lrb;
243 __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, lrb);
244 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
245 __ bind(lrb);
246 }
247
248 // use r1 for load address
249 Register result_dst = dst;
250 if (dst == r1) {
251 __ mov(rscratch1, dst);
252 dst = rscratch1;
253 }
254
255 // Save r0 and r1, unless it is an output register
256 RegSet to_save = RegSet::of(r0, r1) - result_dst;
257 __ push(to_save, sp);
258 __ lea(r1, load_addr);
259 __ mov(r0, dst);
260
261 // Test for in-cset
262 if (is_strong) {
263 __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
264 __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
265 __ ldrb(rscratch2, Address(rscratch2, rscratch1));
266 __ tbz(rscratch2, 0, not_cset);
267 }
268
269 __ push_call_clobbered_registers();
270 if (is_strong) {
271 if (is_narrow) {
272 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow));
273 } else {
274 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
275 }
276 } else if (is_weak) {
277 if (is_narrow) {
278 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
279 } else {
280 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
281 }
282 } else {
283 assert(is_phantom, "only remaining strength");
284 assert(!is_narrow, "phantom access cannot be narrow");
285 // AOT saved adapters need relocation for this call.
286 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
287 }
288 __ blr(lr);
289 __ mov(rscratch1, r0);
290 __ pop_call_clobbered_registers();
291 __ mov(r0, rscratch1);
292
293 __ bind(not_cset);
294
295 __ mov(result_dst, r0);
296 __ pop(to_save, sp);
297
298 __ bind(heap_stable);
299 __ leave();
300 }
301
302 //
303 // Arguments:
304 //
305 // Inputs:
306 // src: oop location to load from, might be clobbered
307 //
308 // Output:
309 // dst: oop loaded from src location
310 //
311 // Kill:
312 // rscratch1 (scratch reg)
313 //
314 // Alias:
315 // dst: rscratch1 (might use rscratch1 as temporary output register to avoid clobbering src)
316 //
317 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
318 Register dst, Address src, Register tmp1, Register tmp2) {
406 satb_barrier(masm,
407 tmp3 /* obj */,
408 tmp2 /* pre_val */,
409 rthread /* thread */,
410 tmp1 /* tmp */,
411 rscratch1 /* tmp2 */,
412 storing_non_null /* tosca_live */,
413 false /* expand_call */);
414 }
415
416 // Store!
417 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
418
419 // 3: post-barrier: card barrier needs store address
420 if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
421 card_barrier(masm, tmp3);
422 }
423 }
424
425 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
426 Register obj, Register tmp, Label& slowpath) {
427 Label done;
428 // Resolve jobject
429 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
430
431 // Check for null.
432 __ cbz(obj, done);
433
434 assert(obj != rscratch2, "need rscratch2");
435 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
436 __ lea(rscratch2, gc_state);
437 __ ldrb(rscratch2, Address(rscratch2));
438
439 // Check for heap in evacuation phase
440 __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, slowpath);
441
442 __ bind(done);
443 }
444
445 // Special Shenandoah CAS implementation that handles false negatives due
446 // to concurrent evacuation. The service is more complex than a
447 // traditional CAS operation because the CAS operation is intended to
448 // succeed if the reference at addr exactly matches expected or if the
449 // reference at addr holds a pointer to a from-space object that has
450 // been relocated to the location named by expected. There are two
451 // races that must be addressed:
452 // a) A parallel thread may mutate the contents of addr so that it points
453 // to a different object. In this case, the CAS operation should fail.
454 // b) A parallel thread may heal the contents of addr, replacing a
455 // from-space pointer held in addr with the to-space pointer
456 // representing the new location of the object.
457 // Upon entry to cmpxchg_oop, it is assured that new_val equals null
458 // or it refers to an object that is not being evacuated out of
459 // from-space, or it refers to the to-space version of an object that
460 // is being evacuated out of from-space.
461 //
462 // By default the value held in the result register following execution
463 // of the generated code sequence is 0 to indicate failure of CAS,
464 // non-zero to indicate success. If is_cae, the result is the value most
465 // recently fetched from addr rather than a boolean success indicator.
466 //
467 // Clobbers rscratch1, rscratch2
468 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
469 Register addr,
470 Register expected,
471 Register new_val,
472 bool acquire, bool release,
473 bool is_cae,
474 Register result) {
475 Register tmp1 = rscratch1;
476 Register tmp2 = rscratch2;
477 bool is_narrow = UseCompressedOops;
478 Assembler::operand_size size = is_narrow ? Assembler::word : Assembler::xword;
479
480 assert_different_registers(addr, expected, tmp1, tmp2);
481 assert_different_registers(addr, new_val, tmp1, tmp2);
482
483 Label step4, done;
484
485 // There are two ways to reach this label. Initial entry into the
486 // cmpxchg_oop code expansion starts at step1 (which is equivalent
487 // to label step4). Additionally, in the rare case that four steps
488 // are required to perform the requested operation, the fourth step
489 // is the same as the first. On a second pass through step 1,
490 // control may flow through step 2 on its way to failure. It will
491 // not flow from step 2 to step 3 since we are assured that the
492 // memory at addr no longer holds a from-space pointer.
493 //
494 // The comments that immediately follow the step4 label apply only
495 // to the case in which control reaches this label by branch from
496 // step 3.
497
498 __ bind (step4);
499
500 // Step 4. CAS has failed because the value most recently fetched
501 // from addr is no longer the from-space pointer held in tmp2. If a
502 // different thread replaced the in-memory value with its equivalent
503 // to-space pointer, then CAS may still be able to succeed. The
504 // value held in the expected register has not changed.
505 //
506 // It is extremely rare we reach this point. For this reason, the
507 // implementation opts for smaller rather than potentially faster
508 // code. Ultimately, smaller code for this rare case most likely
509 // delivers higher overall throughput by enabling improved icache
510 // performance.
511
512 // Step 1. Fast-path.
513 //
514 // Try to CAS with given arguments. If successful, then we are done.
515 //
516 // No label required for step 1.
517
518 __ cmpxchg(addr, expected, new_val, size, acquire, release, false, tmp2);
519 // EQ flag set iff success. tmp2 holds value fetched.
520
521 // If expected equals null but tmp2 does not equal null, the
522 // following branches to done to report failure of CAS. If both
523 // expected and tmp2 equal null, the following branches to done to
524 // report success of CAS. There's no need for a special test of
525 // expected equal to null.
526
527 __ br(Assembler::EQ, done);
528 // if CAS failed, fall through to step 2
529
530 // Step 2. CAS has failed because the value held at addr does not
531 // match expected. This may be a false negative because the value fetched
532 // from addr (now held in tmp2) may be a from-space pointer to the
533 // original copy of same object referenced by to-space pointer expected.
534 //
535 // To resolve this, it suffices to find the forward pointer associated
536 // with fetched value. If this matches expected, retry CAS with new
537 // parameters. If this mismatches, then we have a legitimate
538 // failure, and we're done.
539 //
540 // No need for step2 label.
541
542 // overwrite tmp1 with from-space pointer fetched from memory
543 __ mov(tmp1, tmp2);
544
545 if (is_narrow) {
546 // Decode tmp1 in order to resolve its forward pointer
547 __ decode_heap_oop(tmp1, tmp1);
548 }
549 resolve_forward_pointer(masm, tmp1);
550 // Encode tmp1 to compare against expected.
551 __ encode_heap_oop(tmp1, tmp1);
552
553 // Does forwarded value of fetched from-space pointer match original
554 // value of expected? If tmp1 holds null, this comparison will fail
555 // because we know from step1 that expected is not null. There is
556 // no need for a separate test for tmp1 (the value originally held
557 // in memory) equal to null.
558 __ cmp(tmp1, expected);
559
560 // If not, then the failure was legitimate and we're done.
561 // Branching to done with NE condition denotes failure.
562 __ br(Assembler::NE, done);
563
564 // Fall through to step 3. No need for step3 label.
565
566 // Step 3. We've confirmed that the value originally held in memory
567 // (now held in tmp2) pointed to from-space version of original
568 // expected value. Try the CAS again with the from-space expected
569 // value. If it now succeeds, we're good.
570 //
571 // Note: tmp2 holds encoded from-space pointer that matches to-space
572 // object residing at expected. tmp2 is the new "expected".
573
574 // Note that macro implementation of __cmpxchg cannot use same register
575 // tmp2 for result and expected since it overwrites result before it
576 // compares result with expected.
577 __ cmpxchg(addr, tmp2, new_val, size, acquire, release, false, noreg);
578 // EQ flag set iff success. tmp2 holds value fetched, tmp1 (rscratch1) clobbered.
579
580 // If fetched value did not equal the new expected, this could
581 // still be a false negative because some other thread may have
582 // newly overwritten the memory value with its to-space equivalent.
583 __ br(Assembler::NE, step4);
584
585 if (is_cae) {
586 // We're falling through to done to indicate success. Success
587 // with is_cae is denoted by returning the value of expected as
588 // result.
589 __ mov(tmp2, expected);
590 }
591
592 __ bind(done);
593 // At entry to done, the Z (EQ) flag is on iff if the CAS
594 // operation was successful. Additionally, if is_cae, tmp2 holds
595 // the value most recently fetched from addr. In this case, success
596 // is denoted by tmp2 matching expected.
597
598 if (is_cae) {
599 __ mov(result, tmp2);
600 } else {
601 __ cset(result, Assembler::EQ);
602 }
603 }
604
605 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
606 Register start, Register count, Register scratch) {
607 assert(ShenandoahCardBarrier, "Should have been checked by caller");
608
609 Label L_loop, L_done;
610 const Register end = count;
611
612 // Zero count? Nothing to do.
613 __ cbz(count, L_done);
614
615 // end = start + count << LogBytesPerHeapOop
616 // last element address to make inclusive
617 __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
618 __ sub(end, end, BytesPerHeapOop);
619 __ lsr(start, start, CardTable::card_shift());
620 __ lsr(end, end, CardTable::card_shift());
621
622 // number of bytes to copy
623 __ sub(count, end, start);
624
709 }
710
711 #undef __
712
713 #define __ sasm->
714
715 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
716 __ prologue("shenandoah_pre_barrier", false);
717
718 // arg0 : previous value of memory
719
720 BarrierSet* bs = BarrierSet::barrier_set();
721
722 const Register pre_val = r0;
723 const Register thread = rthread;
724 const Register tmp = rscratch1;
725
726 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
727 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
728
729 Label done;
730 Label runtime;
731
732 // Is marking still active?
733 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
734 __ ldrb(tmp, gc_state);
735 __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, done);
736
737 // Can we store original value in the thread's buffer?
738 __ ldr(tmp, queue_index);
739 __ cbz(tmp, runtime);
740
741 __ sub(tmp, tmp, wordSize);
742 __ str(tmp, queue_index);
743 __ ldr(rscratch2, buffer);
744 __ add(tmp, tmp, rscratch2);
745 __ load_parameter(0, rscratch2);
746 __ str(rscratch2, Address(tmp, 0));
747 __ b(done);
748
749 __ bind(runtime);
750 __ push_call_clobbered_registers();
751 __ load_parameter(0, pre_val);
752 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
753 __ pop_call_clobbered_registers();
754 __ bind(done);
755
756 __ epilogue();
757 }
758
759 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
760 __ prologue("shenandoah_load_reference_barrier", false);
761 // arg0 : object to be resolved
762
763 __ push_call_clobbered_registers();
764 __ load_parameter(0, r0);
765 __ load_parameter(1, r1);
766
767 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
768 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
769 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
770 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
771 if (is_strong) {
772 if (is_native) {
773 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
774 } else {
|
24 */
25
26 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
27 #include "gc/shenandoah/mode/shenandoahMode.hpp"
28 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
30 #include "gc/shenandoah/shenandoahForwarding.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
33 #include "gc/shenandoah/shenandoahRuntime.hpp"
34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
35 #include "interpreter/interp_masm.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #ifdef COMPILER1
40 #include "c1/c1_LIRAssembler.hpp"
41 #include "c1/c1_MacroAssembler.hpp"
42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
43 #endif
44 #ifdef COMPILER2
45 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
46 #endif
47
48 #define __ masm->
49
50 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
51 Register src, Register dst, Register count, RegSet saved_regs) {
52 if (is_oop) {
53 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
54 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
55
56 Label L_done;
57
58 // Avoid calling runtime if count == 0
59 __ cbz(count, L_done);
60
61 // Is GC active?
62 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
63 __ ldrb(rscratch1, gc_state);
64 if (ShenandoahSATBBarrier && dest_uninitialized) {
65 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_done);
66 } else {
67 __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
68 __ tst(rscratch1, rscratch2);
69 __ br(Assembler::EQ, L_done);
70 }
71
72 __ push(saved_regs, sp);
73 if (UseCompressedOops) {
74 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count);
75 } else {
76 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count);
77 }
78 __ pop(saved_regs, sp);
79 __ bind(L_done);
80 }
81 }
82 }
83
84 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
85 Register start, Register count, Register tmp) {
86 if (ShenandoahCardBarrier && is_oop) {
87 gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp);
88 }
89 }
90
91 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
92 Register obj,
93 Register pre_val,
94 Register thread,
95 Register tmp1,
96 Register tmp2,
97 bool tosca_live,
98 bool expand_call) {
99 assert(ShenandoahSATBBarrier, "Should be checked by caller");
100
101 // If expand_call is true then we expand the call_VM_leaf macro
102 // directly to skip generating the check by
103 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
104
105 assert(thread == rthread, "must be");
106
107 Label L_done;
108 Label L_runtime;
109
110 assert_different_registers(obj, pre_val, tmp1, tmp2);
111 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
112
113 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
114 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
115
116 // Is marking active?
117 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
118 __ ldrb(tmp1, gc_state);
119 __ tbz(tmp1, ShenandoahHeap::MARKING_BITPOS, L_done);
120
121 // Do we need to load the previous value?
122 if (obj != noreg) {
123 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
124 }
125
126 // Is the previous value null?
127 __ cbz(pre_val, L_done);
128
129 // Can we store original value in the thread's buffer?
130 // Is index == 0?
131 // (The index field is typed as size_t.)
132
133 __ ldr(tmp1, index); // tmp := *index_adr
134 __ cbz(tmp1, L_runtime); // tmp == 0?
135 // If yes, goto runtime
136
137 __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize
138 __ str(tmp1, index); // *index_adr := tmp
139 __ ldr(tmp2, buffer);
140 __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr
141
142 // Record the previous value
143 __ str(pre_val, Address(tmp1, 0));
144 __ b(L_done);
145
146 __ bind(L_runtime);
147 // save the live input values
148 RegSet saved = RegSet::of(pre_val);
149 if (tosca_live) saved += RegSet::of(r0);
150 if (obj != noreg) saved += RegSet::of(obj);
151
152 __ push(saved, sp);
153
154 // Calling the runtime using the regular call_VM_leaf mechanism generates
155 // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
156 // that checks that the *(rfp+frame::interpreter_frame_last_sp) == nullptr.
157 //
158 // If we care generating the pre-barrier without a frame (e.g. in the
159 // intrinsified Reference.get() routine) then rfp might be pointing to
160 // the caller frame and so this check will most likely fail at runtime.
161 //
162 // Expanding the call directly bypasses the generation of the check.
163 // So when we do not have have a full interpreter frame on the stack
164 // expand_call should be passed true.
165
166 if (expand_call) {
167 assert(pre_val != c_rarg1, "smashed arg");
168 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
169 } else {
170 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
171 }
172
173 __ pop(saved, sp);
174
175 __ bind(L_done);
176 }
177
178 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
179 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
180 Label L_is_null;
181 __ cbz(dst, L_is_null);
182 resolve_forward_pointer_not_null(masm, dst, tmp);
183 __ bind(L_is_null);
184 }
185
186 // IMPORTANT: This must preserve all registers, even rscratch1 and rscratch2, except those explicitly
187 // passed in.
188 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
189 assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
190 // The below loads the mark word, checks if the lowest two bits are
191 // set, and if so, clear the lowest two bits and copy the result
192 // to dst. Otherwise it leaves dst alone.
193 // Implementing this is surprisingly awkward. I do it here by:
194 // - Inverting the mark word
195 // - Test lowest two bits == 0
196 // - If so, set the lowest two bits
197 // - Invert the result back, and copy to dst
198
199 bool borrow_reg = (tmp == noreg);
200 if (borrow_reg) {
201 // No free registers available. Make one useful.
202 tmp = rscratch1;
203 if (tmp == dst) {
204 tmp = rscratch2;
205 }
206 __ push(RegSet::of(tmp), sp);
207 }
208
209 assert_different_registers(tmp, dst);
210
211 Label L_done;
212 __ ldr(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
213 __ eon(tmp, tmp, zr);
214 __ ands(zr, tmp, markWord::lock_mask_in_place);
215 __ br(Assembler::NE, L_done);
216 __ orr(tmp, tmp, markWord::marked_value);
217 __ eon(dst, tmp, zr);
218 __ bind(L_done);
219
220 if (borrow_reg) {
221 __ pop(RegSet::of(tmp), sp);
222 }
223 }
224
225 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) {
226 assert(ShenandoahLoadRefBarrier, "Should be enabled");
227 assert(dst != rscratch2, "need rscratch2");
228 assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
229
230 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
231 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
232 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
233 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
234 bool is_narrow = UseCompressedOops && !is_native;
235
236 Label L_heap_stable, L_not_cset;
237 __ enter(/*strip_ret_addr*/true);
238 Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
239 __ ldrb(rscratch2, gc_state);
240
241 // Check for heap stability
242 if (is_strong) {
243 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_heap_stable);
244 } else {
245 Label L_lrb;
246 __ tbnz(rscratch2, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_lrb);
247 __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_heap_stable);
248 __ bind(L_lrb);
249 }
250
251 // use r1 for load address
252 Register result_dst = dst;
253 if (dst == r1) {
254 __ mov(rscratch1, dst);
255 dst = rscratch1;
256 }
257
258 // Save r0 and r1, unless it is an output register
259 RegSet to_save = RegSet::of(r0, r1) - result_dst;
260 __ push(to_save, sp);
261 __ lea(r1, load_addr);
262 __ mov(r0, dst);
263
264 // Test for in-cset
265 if (is_strong) {
266 __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
267 __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
268 __ ldrb(rscratch2, Address(rscratch2, rscratch1));
269 __ tbz(rscratch2, 0, L_not_cset);
270 }
271
272 __ push_call_clobbered_registers();
273 if (is_strong) {
274 if (is_narrow) {
275 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow));
276 } else {
277 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
278 }
279 } else if (is_weak) {
280 if (is_narrow) {
281 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
282 } else {
283 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
284 }
285 } else {
286 assert(is_phantom, "only remaining strength");
287 assert(!is_narrow, "phantom access cannot be narrow");
288 // AOT saved adapters need relocation for this call.
289 __ lea(lr, RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)));
290 }
291 __ blr(lr);
292 __ mov(rscratch1, r0);
293 __ pop_call_clobbered_registers();
294 __ mov(r0, rscratch1);
295
296 __ bind(L_not_cset);
297
298 __ mov(result_dst, r0);
299 __ pop(to_save, sp);
300
301 __ bind(L_heap_stable);
302 __ leave();
303 }
304
305 //
306 // Arguments:
307 //
308 // Inputs:
309 // src: oop location to load from, might be clobbered
310 //
311 // Output:
312 // dst: oop loaded from src location
313 //
314 // Kill:
315 // rscratch1 (scratch reg)
316 //
317 // Alias:
318 // dst: rscratch1 (might use rscratch1 as temporary output register to avoid clobbering src)
319 //
320 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
321 Register dst, Address src, Register tmp1, Register tmp2) {
409 satb_barrier(masm,
410 tmp3 /* obj */,
411 tmp2 /* pre_val */,
412 rthread /* thread */,
413 tmp1 /* tmp */,
414 rscratch1 /* tmp2 */,
415 storing_non_null /* tosca_live */,
416 false /* expand_call */);
417 }
418
419 // Store!
420 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
421
422 // 3: post-barrier: card barrier needs store address
423 if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
424 card_barrier(masm, tmp3);
425 }
426 }
427
428 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
429 Register obj, Register tmp, Label& L_slowpath) {
430 Label L_done;
431 // Resolve jobject
432 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, L_slowpath);
433
434 // Check for null.
435 __ cbz(obj, L_done);
436
437 assert(obj != rscratch2, "need rscratch2");
438 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
439 __ lea(rscratch2, gc_state);
440 __ ldrb(rscratch2, Address(rscratch2));
441
442 // Check for heap in evacuation phase
443 __ tbnz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, L_slowpath);
444
445 __ bind(L_done);
446 }
447
448 // Special Shenandoah CAS implementation that handles false negatives due
449 // to concurrent evacuation. The service is more complex than a
450 // traditional CAS operation because the CAS operation is intended to
451 // succeed if the reference at addr exactly matches expected or if the
452 // reference at addr holds a pointer to a from-space object that has
453 // been relocated to the location named by expected. There are two
454 // races that must be addressed:
455 // a) A parallel thread may mutate the contents of addr so that it points
456 // to a different object. In this case, the CAS operation should fail.
457 // b) A parallel thread may heal the contents of addr, replacing a
458 // from-space pointer held in addr with the to-space pointer
459 // representing the new location of the object.
460 // Upon entry to cmpxchg_oop, it is assured that new_val equals null
461 // or it refers to an object that is not being evacuated out of
462 // from-space, or it refers to the to-space version of an object that
463 // is being evacuated out of from-space.
464 //
465 // By default the value held in the result register following execution
466 // of the generated code sequence is 0 to indicate failure of CAS,
467 // non-zero to indicate success. If is_cae, the result is the value most
468 // recently fetched from addr rather than a boolean success indicator.
469 //
470 // Clobbers rscratch1, rscratch2
471 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
472 Register addr,
473 Register expected,
474 Register new_val,
475 bool acquire, bool release,
476 bool is_cae,
477 Register result) {
478 Register tmp1 = rscratch1;
479 Register tmp2 = rscratch2;
480 bool is_narrow = UseCompressedOops;
481 Assembler::operand_size size = is_narrow ? Assembler::word : Assembler::xword;
482
483 assert_different_registers(addr, expected, tmp1, tmp2);
484 assert_different_registers(addr, new_val, tmp1, tmp2);
485
486 Label L_step4, L_done;
487
488 // There are two ways to reach this label. Initial entry into the
489 // cmpxchg_oop code expansion starts at step1 (which is equivalent
490 // to label step4). Additionally, in the rare case that four steps
491 // are required to perform the requested operation, the fourth step
492 // is the same as the first. On a second pass through step 1,
493 // control may flow through step 2 on its way to failure. It will
494 // not flow from step 2 to step 3 since we are assured that the
495 // memory at addr no longer holds a from-space pointer.
496 //
497 // The comments that immediately follow the step4 label apply only
498 // to the case in which control reaches this label by branch from
499 // step 3.
500
501 __ bind (L_step4);
502
503 // Step 4. CAS has failed because the value most recently fetched
504 // from addr is no longer the from-space pointer held in tmp2. If a
505 // different thread replaced the in-memory value with its equivalent
506 // to-space pointer, then CAS may still be able to succeed. The
507 // value held in the expected register has not changed.
508 //
509 // It is extremely rare we reach this point. For this reason, the
510 // implementation opts for smaller rather than potentially faster
511 // code. Ultimately, smaller code for this rare case most likely
512 // delivers higher overall throughput by enabling improved icache
513 // performance.
514
515 // Step 1. Fast-path.
516 //
517 // Try to CAS with given arguments. If successful, then we are done.
518 //
519 // No label required for step 1.
520
521 __ cmpxchg(addr, expected, new_val, size, acquire, release, false, tmp2);
522 // EQ flag set iff success. tmp2 holds value fetched.
523
524 // If expected equals null but tmp2 does not equal null, the
525 // following branches to done to report failure of CAS. If both
526 // expected and tmp2 equal null, the following branches to done to
527 // report success of CAS. There's no need for a special test of
528 // expected equal to null.
529
530 __ br(Assembler::EQ, L_done);
531 // if CAS failed, fall through to step 2
532
533 // Step 2. CAS has failed because the value held at addr does not
534 // match expected. This may be a false negative because the value fetched
535 // from addr (now held in tmp2) may be a from-space pointer to the
536 // original copy of same object referenced by to-space pointer expected.
537 //
538 // To resolve this, it suffices to find the forward pointer associated
539 // with fetched value. If this matches expected, retry CAS with new
540 // parameters. If this mismatches, then we have a legitimate
541 // failure, and we're done.
542 //
543 // No need for step2 label.
544
545 // overwrite tmp1 with from-space pointer fetched from memory
546 __ mov(tmp1, tmp2);
547
548 if (is_narrow) {
549 // Decode tmp1 in order to resolve its forward pointer
550 __ decode_heap_oop(tmp1, tmp1);
551 }
552 resolve_forward_pointer(masm, tmp1);
553 // Encode tmp1 to compare against expected.
554 __ encode_heap_oop(tmp1, tmp1);
555
556 // Does forwarded value of fetched from-space pointer match original
557 // value of expected? If tmp1 holds null, this comparison will fail
558 // because we know from step1 that expected is not null. There is
559 // no need for a separate test for tmp1 (the value originally held
560 // in memory) equal to null.
561 __ cmp(tmp1, expected);
562
563 // If not, then the failure was legitimate and we're done.
564 // Branching to done with NE condition denotes failure.
565 __ br(Assembler::NE, L_done);
566
567 // Fall through to step 3. No need for step3 label.
568
569 // Step 3. We've confirmed that the value originally held in memory
570 // (now held in tmp2) pointed to from-space version of original
571 // expected value. Try the CAS again with the from-space expected
572 // value. If it now succeeds, we're good.
573 //
574 // Note: tmp2 holds encoded from-space pointer that matches to-space
575 // object residing at expected. tmp2 is the new "expected".
576
577 // Note that macro implementation of __cmpxchg cannot use same register
578 // tmp2 for result and expected since it overwrites result before it
579 // compares result with expected.
580 __ cmpxchg(addr, tmp2, new_val, size, acquire, release, false, noreg);
581 // EQ flag set iff success. tmp2 holds value fetched, tmp1 (rscratch1) clobbered.
582
583 // If fetched value did not equal the new expected, this could
584 // still be a false negative because some other thread may have
585 // newly overwritten the memory value with its to-space equivalent.
586 __ br(Assembler::NE, L_step4);
587
588 if (is_cae) {
589 // We're falling through to done to indicate success. Success
590 // with is_cae is denoted by returning the value of expected as
591 // result.
592 __ mov(tmp2, expected);
593 }
594
595 __ bind(L_done);
596 // At entry to done, the Z (EQ) flag is on iff if the CAS
597 // operation was successful. Additionally, if is_cae, tmp2 holds
598 // the value most recently fetched from addr. In this case, success
599 // is denoted by tmp2 matching expected.
600
601 if (is_cae) {
602 __ mov(result, tmp2);
603 } else {
604 __ cset(result, Assembler::EQ);
605 }
606 }
607
608 #ifdef COMPILER2
609 void ShenandoahBarrierSetAssembler::gc_state_check_c2(MacroAssembler* masm, Register rscratch, const unsigned char test_state, BarrierStubC2* slow_stub) {
610 if (ShenandoahGCStateCheckRemove) {
611 // Unrealistic: remove all barrier fastpath checks.
612 } else if (ShenandoahGCStateCheckHotpatch) {
613 // In the ideal world, we would hot-patch the branch to slow stub with a single
614 // (unconditional) jump or nop, based on our current GC state.
615 // FIXME: we may need more than one nop. to discuss.
616 __ nop();
617 } else {
618 #ifdef ASSERT
619 const unsigned char allowed = (unsigned char)(ShenandoahHeap::MARKING | ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::WEAK_ROOTS);
620 const unsigned char only_valid_flags = test_state & (unsigned char) ~allowed;
621 assert(test_state > 0x0, "Invalid test_state asked: %x", test_state);
622 assert(only_valid_flags == 0x0, "Invalid test_state asked: %x", test_state);
623 #endif
624
625 Label L_short_branch;
626
627 bool one_bit = (test_state & (test_state - 1)) == 0;
628 char no_weak_set = (test_state & (~ShenandoahHeap::WEAK_ROOTS));
629
630 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
631 __ ldrb(rscratch, gcs_addr);
632
633 // if only one bit is required then we can always use tbz
634 if (one_bit) {
635 int bit = __builtin_ctz((unsigned)test_state);
636 __ tbz(rscratch, bit, *slow_stub->continuation());
637 } else if (no_weak_set == test_state) {
638 __ ands(rscratch, rscratch, test_state);
639 __ cbz(rscratch, *slow_stub->continuation());
640 } else {
641 // One single 'ands' isn't possible because weak is set, making the
642 // immediate pattern invalid. One single tbz/tbnz doesn't work because we
643 // have 2 or more bits set.
644 //
645 // We'll tackle this by breaking the problem in two parts. First we only
646 // check for weak_roots and then we check for the other flags using
647 // 'ands' without the weak bit set.
648 __ tbnz(rscratch, ShenandoahHeap::WEAK_ROOTS_BITPOS, L_short_branch);
649
650 // We cleared the weak bit earlier on
651 __ ands(rscratch, rscratch, no_weak_set);
652 __ cbz(rscratch, *slow_stub->continuation());
653 }
654
655 __ bind(L_short_branch);
656 __ b(*slow_stub->entry());
657
658 // This is were the stub will return to or the code above will jump to if
659 // the checks are false
660 __ bind(*slow_stub->continuation());
661 }
662 }
663
664 /**
665 * The logic implemented here relies on certain flags being on specific
666 * positions of the GCState. Also note that all pointer values in register are
667 * guaranteed to be 'to-space' addresses. The algorithm is as follows. If the
668 * CAS succeed:
669 * - 'res' will be set to 1.
670 * - We need to check SATB flag (index 1 of GCState). If the flag is active
671 * need to store 'oldval' in the buffer.
672 * - We wrote 'newval' to 'addr', therefore we need to mark the corresponding
673 * card in the card table for 'addr' as dirty.
674 * If the CAS failed:
675 * - 'res' will be set to 0.
676 * - If the GCState FORWARDING bit (index 0 of GCState) is set we'll need to
677 * retry the CAS, because the failure may be because the value in 'addr' is
678 * the (outdated) 'from-space' version of 'expected'. The retry is done in a
679 * stub. If the retry succeed then we need to do the steps described above
680 * too for CAS succeed too.
681 * - If FORWARDING bit is clear there is nothing else to do.
682 */
683 void ShenandoahBarrierSetAssembler::cae_c2(const MachNode* node, MacroAssembler* masm, Register res, Register addr, Register oldval, Register newval, bool exchange, bool maybe_null, bool narrow, bool acquire, bool release, bool weak) {
684 Assembler::operand_size op_size = narrow ? Assembler::word : Assembler::xword;
685
686 // Issue cmpxchg first, res will have the failure witness if CAS fails
687 __ cmpxchg(addr, oldval, newval, op_size, acquire, release, weak, exchange ? res : rscratch2);
688
689 // First CAS attempt. If successful, then we are done.
690 // EQ flag set iff success.
691 __ cset(exchange ? rscratch2 : res, Assembler::EQ);
692
693 if (!ShenandoahSkipBarriers && (ShenandoahCASBarrierStubC2::needs_barrier(node) || ShenandoahStoreBarrierStubC2::needs_card_barrier(node))) {
694 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
695
696 if (ShenandoahCASBarrierStubC2::needs_barrier(node)) {
697 ShenandoahCASBarrierStubC2* stub = ShenandoahCASBarrierStubC2::create(node, addr, oldval, newval, res, narrow, exchange, maybe_null, acquire, release, weak);
698
699 char check = 0;
700 check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
701 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
702 gc_state_check_c2(masm, rscratch1, check, stub);
703 }
704
705 if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
706 if (exchange) {
707 __ cmp(res, oldval);
708 __ cset(rscratch2, Assembler::EQ);
709 }
710 card_barrier_c2(node, masm, addr, exchange ? rscratch2 : res);
711 }
712 }
713 }
714
715 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register preval, Register newval, Register addr, bool acquire) {
716 if (node->bottom_type()->isa_narrowoop()) {
717 if (acquire) {
718 __ atomic_xchgalw(preval, newval, addr);
719 } else {
720 __ atomic_xchgw(preval, newval, addr);
721 }
722 } else {
723 if (acquire) {
724 __ atomic_xchgal(preval, newval, addr);
725 } else {
726 __ atomic_xchg(preval, newval, addr);
727 }
728 }
729
730 if (!ShenandoahSkipBarriers && (ShenandoahLoadBarrierStubC2::needs_barrier(node) || ShenandoahStoreBarrierStubC2::needs_card_barrier(node))) {
731 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
732
733 if (ShenandoahLoadBarrierStubC2::needs_barrier(node)) {
734 ShenandoahLoadBarrierStubC2* const stub = ShenandoahLoadBarrierStubC2::create(node, preval, addr);
735
736 char check = 0;
737 check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
738 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
739 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
740 gc_state_check_c2(masm, rscratch1, check, stub);
741 }
742
743 if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
744 card_barrier_c2(node, masm, addr, noreg);
745 }
746 }
747 }
748
749 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
750 Register addr, bool dst_narrow,
751 Register src, bool src_narrow,
752 bool is_volatile) {
753
754 // Emit barrier if needed
755 if (!ShenandoahSkipBarriers && ShenandoahStoreBarrierStubC2::needs_barrier(node)) {
756 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
757
758 if (ShenandoahStoreBarrierStubC2::needs_keep_alive_barrier(node)) {
759 ShenandoahStoreBarrierStubC2* const stub = ShenandoahStoreBarrierStubC2::create(node, addr, dst_narrow);
760
761 gc_state_check_c2(masm, rscratch1, ShenandoahHeap::MARKING, stub);
762 }
763
764 if (ShenandoahStoreBarrierStubC2::needs_card_barrier(node)) {
765 card_barrier_c2(node, masm, addr, noreg);
766 }
767 }
768
769 // Do the actual store
770 if (dst_narrow) {
771 if (!src_narrow) {
772 // Need to encode into rscratch, because we cannot clobber src.
773 // TODO: Maybe there is a matcher way to test that src is unused after this?
774 __ mov(rscratch1, src);
775 if (ShenandoahStoreBarrierStubC2::src_not_null(node)) {
776 __ encode_heap_oop_not_null(rscratch1);
777 } else {
778 __ encode_heap_oop(rscratch1);
779 }
780 src = rscratch1;
781 }
782
783 if (is_volatile) {
784 __ stlrw(src, addr);
785 } else {
786 __ strw(src, addr);
787 }
788 } else {
789 if (is_volatile) {
790 __ stlr(src, addr);
791 } else {
792 __ str(src, addr);
793 }
794 }
795 }
796
797 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm,
798 Register dst, Register addr, bool acquire) {
799 if (node->bottom_type()->isa_narrowoop()) {
800 if (acquire) {
801 __ ldarw(dst, addr);
802 } else {
803 __ ldrw(dst, addr);
804 }
805 } else {
806 if (acquire) {
807 __ ldar(dst, addr);
808 } else {
809 __ ldr(dst, addr);
810 }
811 }
812
813 if (!ShenandoahSkipBarriers && ShenandoahLoadBarrierStubC2::needs_barrier(node)) {
814 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
815
816 ShenandoahLoadBarrierStubC2* const stub = ShenandoahLoadBarrierStubC2::create(node, dst, addr);
817 stub->preserve(addr);
818 stub->dont_preserve(dst);
819
820 char check = 0;
821 check |= ShenandoahLoadBarrierStubC2::needs_keep_alive_barrier(node) ? ShenandoahHeap::MARKING : 0;
822 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier(node) ? ShenandoahHeap::HAS_FORWARDED : 0;
823 check |= ShenandoahLoadBarrierStubC2::needs_load_ref_barrier_weak(node) ? ShenandoahHeap::WEAK_ROOTS : 0;
824 gc_state_check_c2(masm, rscratch1, check, stub);
825 }
826 }
827
828 void ShenandoahBarrierSetAssembler::card_barrier_c2(const MachNode* node, MacroAssembler* masm, Register addr, Register cond) {
829 if ((node->barrier_data() & ShenandoahBitCardMark) == 0) {
830 return;
831 }
832
833 assert(CardTable::dirty_card_val() == 0, "must be");
834 Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
835 Label L_skip;
836
837 if (cond != noreg) {
838 __ cbz(cond, L_skip);
839 }
840
841 // rscratch2 = addr >> CardTable::card_shift()
842 __ lsr(rscratch2, addr, CardTable::card_shift());
843
844 // rscratch1 = card table base (holder)
845 Address curr_ct_holder_addr(rthread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
846 __ ldr(rscratch1, curr_ct_holder_addr);
847
848 // rscratch2 = &card_table[card_index]
849 __ add(rscratch2, rscratch1, rscratch2);
850
851 if (UseCondCardMark) {
852 Label L_already_dirty;
853 __ ldrb(rscratch1, Address(rscratch2));
854 __ cbz(rscratch1, L_already_dirty);
855 __ strb(zr, Address(rscratch2));
856 __ bind(L_already_dirty);
857 } else {
858 __ strb(zr, Address(rscratch2));
859 }
860 __ bind(L_skip);
861 }
862 #undef __
863 #define __ masm.
864
865 void ShenandoahStoreBarrierStubC2::emit_code(MacroAssembler& masm) {
866 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
867 __ bind(*entry());
868
869 Label L_done;
870
871 // We'll use "_addr_reg" register as third scratch register
872 assert(_addr_reg != noreg, "should be");
873 RegSet saved = RegSet::of(_addr_reg);
874 Register rscratch3 = _addr_reg;
875 __ push(saved, sp);
876
877 // Do we need to load the previous value?
878 if (_addr_reg != noreg) {
879 __ load_heap_oop(rscratch3, Address(rscratch3, 0), noreg, noreg, AS_RAW);
880 // FIXME: We can merge this on the load above
881 __ cbz(rscratch3, L_done);
882 } else {
883 if (_dst_narrow) {
884 __ decode_heap_oop(rscratch3, &L_done);
885 } else {
886 __ cbz(rscratch3, L_done);
887 }
888 }
889
890 satb(&masm, this, rscratch1, rscratch2, rscratch3, &L_done);
891
892 __ bind(L_done);
893 __ pop(saved, sp);
894 __ b(*continuation());
895 }
896
897 void ShenandoahLoadBarrierStubC2::emit_code(MacroAssembler& masm) {
898 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
899
900 __ bind(*entry());
901
902 Label L_lrb;
903
904 if (_narrow) {
905 if (_maybe_null) {
906 __ decode_heap_oop(_dst, &L_lrb);
907 } else {
908 __ decode_heap_oop_not_null(_dst);
909 }
910 } else {
911 __ cbz(_dst, L_lrb);
912 }
913
914 { // SATB
915 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
916 __ ldrb(rscratch1, gcs_addr);
917 __ tbz(rscratch1, ShenandoahHeap::MARKING_BITPOS, L_lrb);
918
919 preserve(_dst);
920 satb(&masm, this, rscratch1, rscratch2, _dst, &L_lrb);
921 }
922
923 __ bind(L_lrb); { // LRB
924 Label L_lrb_end;
925
926 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
927 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
928 __ ldrb(rscratch1, gcs_addr);
929 __ tbz(rscratch1, ShenandoahHeap::HAS_FORWARDED_BITPOS, L_lrb_end);
930 }
931
932 dont_preserve(_dst);
933 lrb(&masm, this, _dst, _addr_reg, &L_lrb_end, _narrow);
934
935 __ bind(L_lrb_end);
936 }
937
938 if (_narrow) {
939 if (_maybe_null) {
940 __ encode_heap_oop(_dst);
941 } else {
942 __ encode_heap_oop_not_null(_dst);
943 }
944 }
945
946 __ b(*continuation());
947 }
948
949 void ShenandoahCASBarrierStubC2::emit_code(MacroAssembler& masm) {
950 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
951
952 __ bind(*entry());
953
954 // Non-strong references should always go to runtime. We do not expect
955 // CASes over non-strong locations.
956 assert((_node->barrier_data() & ShenandoahBitStrong) != 0, "Only strong references for CASes");
957
958 Label L_final;
959 Label L_succeded;
960
961 // check if first CAS succeded, if it did we just need to write to SATB
962 __ cbnz(_cae ? rscratch2 : _result, L_succeded);
963
964
965 // LRB + CAS Retry
966 // First CAS attempt did not succeed. Execute LRB on 'addr' and retry CAS.
967 if (!_cae) {
968 __ mov(_result, rscratch2);
969 }
970
971 // [Compressed] failure witness is in _result. Decode it and check
972 // if it is in collection set.
973 if (_narrow) {
974 __ decode_heap_oop(_result);
975 }
976
977 lrb(&masm, this, _result, _addr_reg, &L_final, _narrow);
978
979 __ bind(L_final);
980
981 Assembler::operand_size size = _narrow ? Assembler::word : Assembler::xword;
982 __ cmpxchg(_addr_reg, _expected, _new_val, size, _acquire, _release, _weak, _result);
983
984 if (!_cae) {
985 __ cset(_result, Assembler::EQ);
986 }
987 // If the retry did not succeed skip SATB
988 __ br(Assembler::NE, *continuation());
989
990
991
992
993 // SATB
994 __ bind(L_succeded);
995 Label short_branch;
996 Label L_done;
997
998 Address gcs_addr(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
999 __ ldrb(rscratch1, gcs_addr);
1000 __ tbnz(rscratch1, ShenandoahHeap::MARKING_BITPOS, short_branch);
1001 __ b(*continuation());
1002 __ bind(short_branch);
1003
1004 // We'll use "_addr_reg" register as third scratch register
1005 assert(_addr_reg != noreg, "should be");
1006 RegSet saved = RegSet::of(_addr_reg);
1007 Register rscratch3 = _addr_reg;
1008 __ push(saved, sp);
1009
1010 if (_narrow) {
1011 __ decode_heap_oop(rscratch3, _expected, &L_done);
1012 } else {
1013 __ mov(rscratch3, _expected);
1014 __ cbz(rscratch3, L_done);
1015 }
1016
1017 satb(&masm, this, rscratch1, rscratch2, rscratch3, &L_done);
1018
1019 __ bind(L_done);
1020 __ pop(saved, sp);
1021
1022 __ b(*continuation());
1023 }
1024
1025 void ShenandoahBarrierStubC2::satb(MacroAssembler* masm, ShenandoahBarrierStubC2* stub, Register scratch1, Register scratch2, Register scratch3, Label* L_done) {
1026 Address index(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1027 Address buffer(rthread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1028 Label L_runtime;
1029
1030 // If buffer is full, call into runtime.
1031 masm->ldr(scratch1, index);
1032 masm->cbz(scratch1, L_runtime);
1033
1034 // The buffer is not full, store value into it.
1035 masm->sub(scratch1, scratch1, wordSize);
1036 masm->str(scratch1, index);
1037 masm->ldr(scratch2, buffer);
1038 masm->str(scratch3, Address(scratch2, scratch1));
1039 masm->b(*L_done);
1040
1041 // Runtime call
1042 masm->bind(L_runtime);
1043 {
1044 SaveLiveRegisters save_registers(masm, stub);
1045 masm->mov(c_rarg0, scratch3);
1046 masm->mov(scratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre));
1047 masm->blr(scratch1);
1048 }
1049 }
1050
1051 void ShenandoahBarrierStubC2::lrb(MacroAssembler* masm, ShenandoahBarrierStubC2* stub, Register obj, Register addr, Label* L_done, bool narrow) {
1052 // Weak/phantom loads always need to go to runtime, otherwise check for
1053 // object in cset.
1054 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1055 masm->mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
1056 masm->lsr(rscratch1, obj, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1057 masm->ldrb(rscratch2, Address(rscratch2, rscratch1));
1058 masm->cbz(rscratch2, *L_done);
1059 }
1060
1061 {
1062 SaveLiveRegisters save_registers(masm, stub);
1063 assert(obj != addr, "sanity address and obj can't be the same.");
1064 assert(c_rarg0 != addr, "need to be separate registers, otherwise we override data.");
1065 assert(c_rarg1 != obj, "sanity");
1066
1067 masm->mov(c_rarg0, obj);
1068 masm->mov(c_rarg1, addr);
1069
1070 if (narrow) {
1071 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1072 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow));
1073 } else if ((_node->barrier_data() & ShenandoahBitWeak) != 0) {
1074 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
1075 } else if ((_node->barrier_data() & ShenandoahBitPhantom) != 0) {
1076 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom_narrow));
1077 }
1078 } else {
1079 if ((_node->barrier_data() & ShenandoahBitStrong) != 0) {
1080 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
1081 } else if ((_node->barrier_data() & ShenandoahBitWeak) != 0) {
1082 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
1083 } else if ((_node->barrier_data() & ShenandoahBitPhantom) != 0) {
1084 masm->mov(rscratch1, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom));
1085 }
1086 }
1087 masm->blr(rscratch1);
1088 masm->mov(obj, r0);
1089 }
1090 }
1091
1092 #undef __
1093 #define __ masm->
1094 #endif // COMPILER2
1095
1096 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
1097 Register start, Register count, Register scratch) {
1098 assert(ShenandoahCardBarrier, "Should have been checked by caller");
1099
1100 Label L_loop, L_done;
1101 const Register end = count;
1102
1103 // Zero count? Nothing to do.
1104 __ cbz(count, L_done);
1105
1106 // end = start + count << LogBytesPerHeapOop
1107 // last element address to make inclusive
1108 __ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop)));
1109 __ sub(end, end, BytesPerHeapOop);
1110 __ lsr(start, start, CardTable::card_shift());
1111 __ lsr(end, end, CardTable::card_shift());
1112
1113 // number of bytes to copy
1114 __ sub(count, end, start);
1115
1200 }
1201
1202 #undef __
1203
1204 #define __ sasm->
1205
1206 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
1207 __ prologue("shenandoah_pre_barrier", false);
1208
1209 // arg0 : previous value of memory
1210
1211 BarrierSet* bs = BarrierSet::barrier_set();
1212
1213 const Register pre_val = r0;
1214 const Register thread = rthread;
1215 const Register tmp = rscratch1;
1216
1217 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1218 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1219
1220 Label L_done;
1221 Label L_runtime;
1222
1223 // Is marking still active?
1224 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
1225 __ ldrb(tmp, gc_state);
1226 __ tbz(tmp, ShenandoahHeap::MARKING_BITPOS, L_done);
1227
1228 // Can we store original value in the thread's buffer?
1229 __ ldr(tmp, queue_index);
1230 __ cbz(tmp, L_runtime);
1231
1232 __ sub(tmp, tmp, wordSize);
1233 __ str(tmp, queue_index);
1234 __ ldr(rscratch2, buffer);
1235 __ add(tmp, tmp, rscratch2);
1236 __ load_parameter(0, rscratch2);
1237 __ str(rscratch2, Address(tmp, 0));
1238 __ b(L_done);
1239
1240 __ bind(L_runtime);
1241 __ push_call_clobbered_registers();
1242 __ load_parameter(0, pre_val);
1243 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), pre_val);
1244 __ pop_call_clobbered_registers();
1245 __ bind(L_done);
1246
1247 __ epilogue();
1248 }
1249
1250 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
1251 __ prologue("shenandoah_load_reference_barrier", false);
1252 // arg0 : object to be resolved
1253
1254 __ push_call_clobbered_registers();
1255 __ load_parameter(0, r0);
1256 __ load_parameter(1, r1);
1257
1258 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
1259 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
1260 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
1261 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
1262 if (is_strong) {
1263 if (is_native) {
1264 __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong));
1265 } else {
|