1 /*
2 * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
4 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "runtime/javaThread.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "utilities/macros.hpp"
40 #ifdef COMPILER1
41 #include "c1/c1_LIRAssembler.hpp"
42 #include "c1/c1_MacroAssembler.hpp"
43 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
44 #endif
45 #ifdef COMPILER2
46 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
47 #endif
48
49 #define __ masm->
50
51 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
52 Register src, Register dst, Register count) {
53
54 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
55
56 if (is_reference_type(type)) {
57 if (ShenandoahCardBarrier) {
58 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
59 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
60 bool obj_int = (type == T_OBJECT) && UseCompressedOops;
61
62 // We need to save the original element count because the array copy stub
63 // will destroy the value and we need it for the card marking barrier.
64 if (!checkcast) {
65 if (!obj_int) {
66 // Save count for barrier
67 __ movptr(r11, count);
68 } else if (disjoint) {
69 // Save dst in r11 in the disjoint case
70 __ movq(r11, dst);
71 }
72 }
73 }
74
75 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
76 Register thread = r15_thread;
77 assert_different_registers(src, dst, count, thread);
78
79 Label L_done;
80 // Short-circuit if count == 0.
81 __ testptr(count, count);
82 __ jcc(Assembler::zero, L_done);
83
84 // Avoid runtime call when not active.
85 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
86 int flags;
87 if (ShenandoahSATBBarrier && dest_uninitialized) {
88 flags = ShenandoahHeap::HAS_FORWARDED;
89 } else {
90 flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;
91 }
92 __ testb(gc_state, flags);
93 __ jcc(Assembler::zero, L_done);
94
95 __ push_call_clobbered_registers(/* save_fpu = */ false);
96 // If arguments are not in proper places, shuffle them.
97 // Doing this via the stack is the most straight-forward way to avoid
98 // accidentally smashing any register.
99 if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
100 __ push(src);
101 __ push(dst);
102 __ push(count);
103 __ pop(c_rarg2);
104 __ pop(c_rarg1);
105 __ pop(c_rarg0);
106 }
107 address target = nullptr;
108 if (UseCompressedOops) {
109 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
110 } else {
111 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
112 }
113 __ call_VM_leaf(target, 3);
114
115 __ pop_call_clobbered_registers(/* restore_fpu = */ false);
116
117 __ bind(L_done);
118 }
119 }
120
121 }
122
123 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
124 Register src, Register dst, Register count) {
125
126 if (ShenandoahCardBarrier && is_reference_type(type)) {
127 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
128 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
129 bool obj_int = (type == T_OBJECT) && UseCompressedOops;
130 Register tmp = rax;
131
132 if (!checkcast) {
133 if (!obj_int) {
134 // Save count for barrier
135 count = r11;
136 } else if (disjoint) {
137 // Use the saved dst in the disjoint case
138 dst = r11;
139 }
140 } else {
141 tmp = rscratch1;
142 }
143 gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp);
144 }
145 }
146
147 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
148 Register obj,
149 Register pre_val,
150 Register tmp) {
151 assert(ShenandoahSATBBarrier, "Should be checked by caller");
152 const Register thread = r15_thread;
153
154 Label done;
155 Label runtime;
156
157 assert(pre_val != noreg, "check this code");
158
159 if (obj != noreg) {
160 assert_different_registers(obj, pre_val, tmp);
161 assert(pre_val != rax, "check this code");
162 }
163
164 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
165 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
166
167 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
168 __ testb(gc_state, ShenandoahHeap::MARKING);
169 __ jcc(Assembler::zero, done);
170
171 // Do we need to load the previous value?
172 if (obj != noreg) {
173 if (UseCompressedOops) {
174 __ movl(pre_val, Address(obj, 0));
175 __ decode_heap_oop(pre_val);
176 } else {
177 __ movq(pre_val, Address(obj, 0));
178 }
179 }
180
181 // Is the previous value null?
182 __ cmpptr(pre_val, NULL_WORD);
183 __ jcc(Assembler::equal, done);
184
185 // Can we store original value in the thread's buffer?
186 // Is index == 0?
187 // (The index field is typed as size_t.)
188
189 __ movptr(tmp, index); // tmp := *index_adr
190 __ cmpptr(tmp, 0); // tmp == 0?
191 __ jcc(Assembler::equal, runtime); // If yes, goto runtime
192
193 __ subptr(tmp, wordSize); // tmp := tmp - wordSize
194 __ movptr(index, tmp); // *index_adr := tmp
195 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr
196
197 // Record the previous value
198 __ movptr(Address(tmp, 0), pre_val);
199 __ jmp(done);
200
201 __ bind(runtime);
202
203 // Slow-path call.
204 // Some paths can be reached from the c2i adapter with live fp arguments in registers.
205 __ enter();
206 __ push_call_clobbered_registers(/* save_fpu = */ true);
207
208 assert(thread != c_rarg0, "smashed arg");
209 if (c_rarg0 != pre_val) {
210 __ mov(c_rarg0, pre_val);
211 }
212
213 // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
214 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
215
216 __ pop_call_clobbered_registers(/* restore_fpu = */ true);
217 __ leave();
218
219 __ bind(done);
220 }
221
222 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, DecoratorSet decorators) {
223 assert(ShenandoahLoadRefBarrier, "Should be enabled");
224
225 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
226 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
227 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
228 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
229 bool is_narrow = UseCompressedOops && !is_native;
230
231 Label heap_stable, not_cset;
232
233 __ block_comment("load_reference_barrier { ");
234
235 // Check if GC is active
236 Register thread = r15_thread;
237
238 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
239 int flags = ShenandoahHeap::HAS_FORWARDED;
240 if (!is_strong) {
241 flags |= ShenandoahHeap::WEAK_ROOTS;
242 }
243 __ testb(gc_state, flags);
244 __ jcc(Assembler::zero, heap_stable);
245
246 Register tmp1 = noreg, tmp2 = noreg;
247 if (is_strong) {
248 // Test for object in cset
249 // Allocate temporary registers
250 for (int i = 0; i < Register::available_gp_registers(); i++) {
251 Register r = as_Register(i);
252 if (r != rsp && r != rbp && r != rcx && r != dst && r != src.base() && r != src.index() ) {
253 if (tmp1 == noreg) {
254 tmp1 = r;
255 } else {
256 tmp2 = r;
257 break;
258 }
259 }
260 }
261 assert(tmp1 != noreg, "tmp1 allocated");
262 assert(tmp2 != noreg, "tmp2 allocated");
263 assert_different_registers(tmp1, tmp2, src.base(), src.index());
264 assert_different_registers(tmp1, tmp2, dst);
265
266 __ push(tmp1);
267 __ push(tmp2);
268
269 // Optimized cset-test
270 __ movptr(tmp1, dst);
271 if (AOTCodeCache::is_on_for_dump()) {
272 assert_different_registers(tmp1, tmp2, rcx);
273 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
274 __ push(rcx);
275 __ movb(rcx, Address(tmp2));
276 __ shrptr(tmp1);
277 __ pop(rcx);
278 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
279 __ movptr(tmp2, Address(tmp2));
280 } else {
281 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
282 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
283 }
284 __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1));
285 __ testbool(tmp1);
286 __ jcc(Assembler::zero, not_cset);
287 }
288
289 // Slow-path call.
290 // Save registers that can be clobbered by call.
291 // Some paths can be reached from the c2i adapter with live fp arguments in registers.
292 __ enter();
293 if (dst != rax) {
294 __ push(rax);
295 }
296 __ push_call_clobbered_registers_except(rax, /* save_fpu = */ true);
297
298 // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1.
299 if (dst == c_rarg1) {
300 __ lea(c_rarg0, src);
301 __ xchgptr(c_rarg1, c_rarg0);
302 } else {
303 __ lea(c_rarg1, src);
304 __ movptr(c_rarg0, dst);
305 }
306
307 address target = nullptr;
308 if (is_strong) {
309 if (is_narrow) {
310 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
311 } else {
312 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
313 }
314 } else if (is_weak) {
315 if (is_narrow) {
316 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
317 } else {
318 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
319 }
320 } else {
321 assert(is_phantom, "only remaining strength");
322 assert(!is_narrow, "phantom access cannot be narrow");
323 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
324 }
325
326 // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
327 __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
328 __ pop_call_clobbered_registers_except(rax, /* restore_fpu = */ true);
329 if (dst != rax) {
330 __ movptr(dst, rax);
331 __ pop(rax);
332 }
333 __ leave();
334
335 __ bind(not_cset);
336
337 if (is_strong) {
338 __ pop(tmp2);
339 __ pop(tmp1);
340 }
341
342 __ bind(heap_stable);
343
344 __ block_comment("} load_reference_barrier");
345 }
346
347 //
348 // Arguments:
349 //
350 // Inputs:
351 // src: oop location, might be clobbered
352 // tmp1: scratch register, might not be valid.
353 //
354 // Output:
355 // dst: oop loaded from src location
356 //
357 // Kill:
358 // tmp1 (if it is valid)
359 //
360 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
361 Register dst, Address src, Register tmp1) {
362 // 1: non-reference load, no additional barrier is needed
363 if (!is_reference_type(type)) {
364 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
365 return;
366 }
367
368 assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected");
369
370 // 2: load a reference from src location and apply LRB if needed
371 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
372 Register result_dst = dst;
373 bool use_tmp1_for_dst = false;
374
375 // Preserve src location for LRB
376 if (dst == src.base() || dst == src.index()) {
377 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()
378 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) {
379 dst = tmp1;
380 use_tmp1_for_dst = true;
381 } else {
382 dst = rdi;
383 __ push(dst);
384 }
385 assert_different_registers(dst, src.base(), src.index());
386 }
387
388 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
389
390 load_reference_barrier(masm, dst, src, decorators);
391
392 // Move loaded oop to final destination
393 if (dst != result_dst) {
394 __ movptr(result_dst, dst);
395
396 if (!use_tmp1_for_dst) {
397 __ pop(dst);
398 }
399
400 dst = result_dst;
401 }
402 } else {
403 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
404 }
405
406 // 3: apply keep-alive barrier if needed
407 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
408 satb_barrier(masm /* masm */,
409 noreg /* obj */,
410 dst /* pre_val */,
411 tmp1 /* tmp */);
412 }
413 }
414
415 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
416 assert(ShenandoahCardBarrier, "Should have been checked by caller");
417
418 // Does a store check for the oop in register obj. The content of
419 // register obj is destroyed afterwards.
420 __ shrptr(obj, CardTable::card_shift());
421
422 // We'll use this register as the TLS base address and also later on
423 // to hold the byte_map_base.
424 Register thread = r15_thread;
425 Register tmp = rscratch1;
426
427 Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
428 __ movptr(tmp, curr_ct_holder_addr);
429 Address card_addr(tmp, obj, Address::times_1);
430
431 int dirty = CardTable::dirty_card_val();
432 if (UseCondCardMark) {
433 Label L_already_dirty;
434 __ cmpb(card_addr, dirty);
435 __ jccb(Assembler::equal, L_already_dirty);
436 __ movb(card_addr, dirty);
437 __ bind(L_already_dirty);
438 } else {
439 __ movb(card_addr, dirty);
440 }
441 }
442
443 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
444 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
445
446 // 1: non-reference types require no barriers
447 if (!is_reference_type(type)) {
448 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
449 return;
450 }
451
452 // Flatten object address right away for simplicity: likely needed by barriers
453 assert_different_registers(val, tmp1, tmp2, tmp3, r15_thread);
454 if (dst.index() == noreg && dst.disp() == 0) {
455 if (dst.base() != tmp1) {
456 __ movptr(tmp1, dst.base());
457 }
458 } else {
459 __ lea(tmp1, dst);
460 }
461
462 // 2: pre-barrier: SATB needs the previous value
463 if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
464 satb_barrier(masm,
465 tmp1 /* obj */,
466 tmp2 /* pre_val */,
467 tmp3 /* tmp */);
468 }
469
470 // Store!
471 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
472
473 // 3: post-barrier: card barrier needs store address
474 bool storing_non_null = (val != noreg);
475 if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
476 card_barrier(masm, tmp1);
477 }
478 }
479
480 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
481 Register obj, Register tmp, Label& slowpath) {
482 Label done;
483 // Resolve jobject
484 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
485
486 // Check for null.
487 __ testptr(obj, obj);
488 __ jcc(Assembler::zero, done);
489
490 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
491 __ testb(gc_state, ShenandoahHeap::EVACUATION);
492 __ jccb(Assembler::notZero, slowpath);
493 __ bind(done);
494 }
495
496 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj, Label& slowpath) {
497 Label done;
498
499 // Peek weak handle using the standard implementation.
500 BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, slowpath);
501
502 // Check if the reference is null, and if it is, take the fast path.
503 __ testptr(obj, obj);
504 __ jcc(Assembler::zero, done);
505
506 Address gc_state(r15_thread, ShenandoahThreadLocalData::gc_state_offset());
507
508 // Check if the heap is under weak-reference/roots processing, in
509 // which case we need to take the slow path.
510 __ testb(gc_state, ShenandoahHeap::WEAK_ROOTS);
511 __ jcc(Assembler::notZero, slowpath);
512 __ bind(done);
513 }
514
515 // Special Shenandoah CAS implementation that handles false negatives
516 // due to concurrent evacuation.
517 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
518 Register res, Address addr, Register oldval, Register newval,
519 bool exchange, Register tmp1, Register tmp2) {
520 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
521 assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
522 assert_different_registers(oldval, tmp1, tmp2);
523 assert_different_registers(newval, tmp1, tmp2);
524
525 Label L_success, L_failure;
526
527 // Remember oldval for retry logic below
528 if (UseCompressedOops) {
529 __ movl(tmp1, oldval);
530 } else {
531 __ movptr(tmp1, oldval);
532 }
533
534 // Step 1. Fast-path.
535 //
536 // Try to CAS with given arguments. If successful, then we are done.
537
538 if (UseCompressedOops) {
539 __ lock();
540 __ cmpxchgl(newval, addr);
541 } else {
542 __ lock();
543 __ cmpxchgptr(newval, addr);
544 }
545 __ jcc(Assembler::equal, L_success);
546
547 // Step 2. CAS had failed. This may be a false negative.
548 //
549 // The trouble comes when we compare the to-space pointer with the from-space
550 // pointer to the same object. To resolve this, it will suffice to resolve
551 // the value from memory -- this will give both to-space pointers.
552 // If they mismatch, then it was a legitimate failure.
553 //
554 // Before reaching to resolve sequence, see if we can avoid the whole shebang
555 // with filters.
556
557 // Filter: when offending in-memory value is null, the failure is definitely legitimate
558 __ testptr(oldval, oldval);
559 __ jcc(Assembler::zero, L_failure);
560
561 // Filter: when heap is stable, the failure is definitely legitimate
562 const Register thread = r15_thread;
563 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
564 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
565 __ jcc(Assembler::zero, L_failure);
566
567 if (UseCompressedOops) {
568 __ movl(tmp2, oldval);
569 __ decode_heap_oop(tmp2);
570 } else {
571 __ movptr(tmp2, oldval);
572 }
573
574 // Decode offending in-memory value.
575 // Test if-forwarded
576 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value);
577 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded
578 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded
579
580 // Load and mask forwarding pointer
581 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes()));
582 __ shrptr(tmp2, 2);
583 __ shlptr(tmp2, 2);
584
585 if (UseCompressedOops) {
586 __ decode_heap_oop(tmp1); // decode for comparison
587 }
588
589 // Now we have the forwarded offender in tmp2.
590 // Compare and if they don't match, we have legitimate failure
591 __ cmpptr(tmp1, tmp2);
592 __ jcc(Assembler::notEqual, L_failure);
593
594 // Step 3. Need to fix the memory ptr before continuing.
595 //
596 // At this point, we have from-space oldval in the register, and its to-space
597 // address is in tmp2. Let's try to update it into memory. We don't care if it
598 // succeeds or not. If it does, then the retrying CAS would see it and succeed.
599 // If this fixup fails, this means somebody else beat us to it, and necessarily
600 // with to-space ptr store. We still have to do the retry, because the GC might
601 // have updated the reference for us.
602
603 if (UseCompressedOops) {
604 __ encode_heap_oop(tmp2); // previously decoded at step 2.
605 }
606
607 if (UseCompressedOops) {
608 __ lock();
609 __ cmpxchgl(tmp2, addr);
610 } else {
611 __ lock();
612 __ cmpxchgptr(tmp2, addr);
613 }
614
615 // Step 4. Try to CAS again.
616 //
617 // This is guaranteed not to have false negatives, because oldval is definitely
618 // to-space, and memory pointer is to-space as well. Nothing is able to store
619 // from-space ptr into memory anymore. Make sure oldval is restored, after being
620 // garbled during retries.
621 //
622 if (UseCompressedOops) {
623 __ movl(oldval, tmp2);
624 } else {
625 __ movptr(oldval, tmp2);
626 }
627
628 if (UseCompressedOops) {
629 __ lock();
630 __ cmpxchgl(newval, addr);
631 } else {
632 __ lock();
633 __ cmpxchgptr(newval, addr);
634 }
635 if (!exchange) {
636 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump
637 }
638
639 // Step 5. If we need a boolean result out of CAS, set the flag appropriately.
640 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS.
641 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return.
642
643 if (exchange) {
644 __ bind(L_failure);
645 __ bind(L_success);
646 } else {
647 assert(res != noreg, "need result register");
648
649 Label exit;
650 __ bind(L_failure);
651 __ xorptr(res, res);
652 __ jmpb(exit);
653
654 __ bind(L_success);
655 __ movptr(res, 1);
656 __ bind(exit);
657 }
658 }
659
660 #ifdef PRODUCT
661 #define BLOCK_COMMENT(str) /* nothing */
662 #else
663 #define BLOCK_COMMENT(str) __ block_comment(str)
664 #endif
665
666 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
667
668 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
669
670 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
671 Register addr, Register count,
672 Register tmp) {
673 assert(ShenandoahCardBarrier, "Should have been checked by caller");
674
675 Label L_loop, L_done;
676 const Register end = count;
677 assert_different_registers(addr, end);
678
679 // Zero count? Nothing to do.
680 __ testl(count, count);
681 __ jccb(Assembler::zero, L_done);
682
683 const Register thread = r15_thread;
684 Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
685 __ movptr(tmp, curr_ct_holder_addr);
686
687 __ leaq(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size
688 __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
689 __ shrptr(addr, CardTable::card_shift());
690 __ shrptr(end, CardTable::card_shift());
691 __ subptr(end, addr); // end --> cards count
692
693 __ addptr(addr, tmp);
694
695 __ BIND(L_loop);
696 __ movb(Address(addr, count, Address::times_1), 0);
697 __ decrement(count);
698 __ jccb(Assembler::greaterEqual, L_loop);
699
700 __ BIND(L_done);
701 }
702
703 #undef __
704
705 #ifdef COMPILER1
706
707 #define __ ce->masm()->
708
709 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
710 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
711 // At this point we know that marking is in progress.
712 // If do_load() is true then we have to emit the
713 // load of the previous value; otherwise it has already
714 // been loaded into _pre_val.
715
716 __ bind(*stub->entry());
717 assert(stub->pre_val()->is_register(), "Precondition.");
718
719 Register pre_val_reg = stub->pre_val()->as_register();
720
721 if (stub->do_load()) {
722 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
723 }
724
725 __ cmpptr(pre_val_reg, NULL_WORD);
726 __ jcc(Assembler::equal, *stub->continuation());
727 ce->store_parameter(stub->pre_val()->as_register(), 0);
728 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
729 __ jmp(*stub->continuation());
730
731 }
732
733 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
734 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
735 __ bind(*stub->entry());
736
737 DecoratorSet decorators = stub->decorators();
738 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
739 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
740 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
741 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
742
743 Register obj = stub->obj()->as_register();
744 Register res = stub->result()->as_register();
745 Register addr = stub->addr()->as_pointer_register();
746 Register tmp1 = stub->tmp1()->as_register();
747 Register tmp2 = stub->tmp2()->as_register();
748 assert_different_registers(obj, res, addr, tmp1, tmp2);
749
750 Label slow_path;
751
752 assert(res == rax, "result must arrive in rax");
753
754 if (res != obj) {
755 __ mov(res, obj);
756 }
757
758 if (is_strong) {
759 // Check for object being in the collection set.
760 __ mov(tmp1, res);
761 if (AOTCodeCache::is_on_for_dump()) {
762 __ push(rcx);
763 __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
764 __ movl(rcx, Address(rcx));
765 if (tmp1 != rcx) {
766 __ mov(tmp1, res);
767 __ shrptr(tmp1);
768 __ pop(rcx);
769 } else {
770 assert_different_registers(tmp2, rcx);
771 __ mov(tmp2, res);
772 __ shrptr(tmp2);
773 __ pop(rcx);
774 __ movptr(tmp1, tmp2);
775 }
776 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
777 __ movptr(tmp2, Address(tmp2));
778 } else {
779 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
780 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
781 }
782 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
783 __ testbool(tmp2);
784 __ jcc(Assembler::zero, *stub->continuation());
785 }
786
787 __ bind(slow_path);
788 ce->store_parameter(res, 0);
789 ce->store_parameter(addr, 1);
790 if (is_strong) {
791 if (is_native) {
792 __ call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
793 } else {
794 __ call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
795 }
796 } else if (is_weak) {
797 __ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
798 } else {
799 assert(is_phantom, "only remaining strength");
800 __ call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
801 }
802 __ jmp(*stub->continuation());
803 }
804
805 #undef __
806
807 #define __ sasm->
808
809 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
810 __ prologue("shenandoah_pre_barrier", false);
811 // arg0 : previous value of memory
812
813 __ push(rax);
814 __ push(rdx);
815
816 const Register pre_val = rax;
817 const Register thread = r15_thread;
818 const Register tmp = rdx;
819
820 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
821 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
822
823 Label done;
824 Label runtime;
825
826 // Is SATB still active?
827 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
828 __ testb(gc_state, ShenandoahHeap::MARKING);
829 __ jcc(Assembler::zero, done);
830
831 // Can we store original value in the thread's buffer?
832
833 __ movptr(tmp, queue_index);
834 __ testptr(tmp, tmp);
835 __ jcc(Assembler::zero, runtime);
836 __ subptr(tmp, wordSize);
837 __ movptr(queue_index, tmp);
838 __ addptr(tmp, buffer);
839
840 // prev_val (rax)
841 __ load_parameter(0, pre_val);
842 __ movptr(Address(tmp, 0), pre_val);
843 __ jmp(done);
844
845 __ bind(runtime);
846
847 __ save_live_registers_no_oop_map(true);
848
849 // load the pre-value
850 __ load_parameter(0, rcx);
851 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), rcx);
852
853 __ restore_live_registers(true);
854
855 __ bind(done);
856
857 __ pop(rdx);
858 __ pop(rax);
859
860 __ epilogue();
861 }
862
863 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
864 __ prologue("shenandoah_load_reference_barrier", false);
865 // arg0 : object to be resolved
866
867 __ save_live_registers_no_oop_map(true);
868
869 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
870 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
871 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
872 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
873
874 __ load_parameter(0, c_rarg0);
875 __ load_parameter(1, c_rarg1);
876 if (is_strong) {
877 if (is_native) {
878 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
879 } else {
880 if (UseCompressedOops) {
881 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), c_rarg0, c_rarg1);
882 } else {
883 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
884 }
885 }
886 } else if (is_weak) {
887 assert(!is_native, "weak must not be called off-heap");
888 if (UseCompressedOops) {
889 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
890 } else {
891 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
892 }
893 } else {
894 assert(is_phantom, "only remaining strength");
895 assert(is_native, "phantom must only be called off-heap");
896 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
897 }
898
899 __ restore_live_registers_except_rax(true);
900
901 __ epilogue();
902 }
903
904 #undef __
905
906 #endif // COMPILER1
907
908 #ifdef COMPILER2
909
910 #undef __
911 #define __ masm->
912
913 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool narrow) {
914 // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
915 if (narrow) {
916 __ movl(dst, src);
917 } else {
918 __ movq(dst, src);
919 }
920
921 ShenandoahBarrierStubC2::load_post(masm, node, dst, src, noreg, noreg, narrow);
922 }
923
924 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
925 Address dst, bool dst_narrow,
926 Register src, bool src_narrow,
927 Register tmp) {
928
929 ShenandoahBarrierStubC2::store_pre(masm, node, tmp, dst, noreg, noreg, dst_narrow);
930
931 // Need to encode into tmp, because we cannot clobber src.
932 if (dst_narrow && !src_narrow) {
933 __ movq(tmp, src);
934 if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
935 __ encode_heap_oop(tmp);
936 } else {
937 __ encode_heap_oop_not_null(tmp);
938 }
939 src = tmp;
940 }
941
942 // Do the actual store
943 if (dst_narrow) {
944 __ movl(dst, src);
945 } else {
946 __ movq(dst, src);
947 }
948
949 ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp, noreg);
950 }
951
952 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm,
953 Register res, Address addr,
954 Register oldval, Register newval, Register tmp,
955 bool narrow) {
956
957 assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
958
959 // Oldval and newval can be in the same register, but all other registers should be
960 // distinct for extra safety, as we shuffle register values around.
961 assert_different_registers(oldval, tmp, addr.base(), addr.index());
962 assert_different_registers(newval, tmp, addr.base(), addr.index());
963
964 ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
965
966 // CAS!
967 __ lock();
968 if (narrow) {
969 __ cmpxchgl(newval, addr);
970 } else {
971 __ cmpxchgptr(newval, addr);
972 }
973
974 // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
975 if (res != noreg) {
976 __ setcc(Assembler::equal, res);
977 }
978
979 ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
980 }
981
982 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register newval, Address addr, Register tmp, bool narrow) {
983 assert_different_registers(newval, tmp, addr.base(), addr.index());
984
985 ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
986
987 if (narrow) {
988 __ xchgl(newval, addr);
989 } else {
990 __ xchgq(newval, addr);
991 }
992
993 ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
994 }
995
996 #undef __
997 #define __ masm.
998
999 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address addr, Register tmp1, Register tmp2) {
1000 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1001
1002 __ lea(tmp1, addr);
1003 __ shrptr(tmp1, CardTable::card_shift());
1004 __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1005 Address card_address(tmp1, 0);
1006
1007 assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1008 Label L_done;
1009 if (UseCondCardMark) {
1010 __ cmpb(card_address, 0);
1011 __ jccb(Assembler::equal, L_done);
1012 }
1013 if (UseCompressedOops && CompressedOops::base() == nullptr) {
1014 __ movb(card_address, r12);
1015 } else {
1016 __ movb(card_address, 0);
1017 }
1018 __ bind(L_done);
1019 }
1020
1021 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
1022 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1023
1024 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
1025 __ cmpb(gc_state_fast, 0);
1026 __ jcc(Assembler::notEqual, *entry());
1027 __ bind(*continuation());
1028 }
1029
1030 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1031 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1032 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1033
1034 // On x86, there is a significant penalty with unaligned branch target, for example
1035 // when the target instruction straggles the fetch line. It makes (performance) sense
1036 // to spend some code size to align the target better.
1037 __ align(16);
1038 __ bind(*entry());
1039
1040 // If we need to load ourselves, do it here.
1041 if (_do_load) {
1042 if (_narrow) {
1043 __ movl(_obj, _addr);
1044 } else {
1045 __ movq(_obj, _addr);
1046 }
1047 }
1048
1049 // If the object is null, there is no point in applying barriers.
1050 maybe_far_jump_if_zero(masm, _obj);
1051
1052 // We need to make sure that loads done by callers survive across slow-path calls.
1053 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1054 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1055 if (!_do_load || needs_both_barriers) {
1056 preserve(_obj);
1057 }
1058
1059 // Go for barriers. Barriers can return straight to continuation, as long
1060 // as another barrier is not needed.
1061 if (needs_both_barriers) {
1062 keepalive(masm, nullptr);
1063 lrb(masm);
1064 } else if (_needs_keep_alive_barrier) {
1065 keepalive(masm, continuation());
1066 } else if (_needs_load_ref_barrier) {
1067 lrb(masm);
1068 } else {
1069 ShouldNotReachHere();
1070 }
1071 }
1072
1073 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1074 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1075 Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1076 Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1077
1078 Label L_through, L_pop_and_slow;
1079
1080 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1081 if (_needs_load_ref_barrier) {
1082 assert(L_done == nullptr, "L_done is always null when _needs_load_ref_barrier is true");
1083 __ cmpb(gc_state_fast, 0);
1084 __ jcc(Assembler::equal, L_through);
1085 }
1086
1087 // Need temp to work, allocate one now.
1088 bool tmp_live;
1089 Register tmp = select_temp_register(tmp_live);
1090 if (tmp_live) {
1091 __ push(tmp);
1092 }
1093
1094 // Fast-path: put object into buffer.
1095 // If buffer is already full, go slow.
1096 __ movptr(tmp, index);
1097 __ subptr(tmp, wordSize);
1098 __ jccb(Assembler::below, L_pop_and_slow);
1099 __ movptr(index, tmp);
1100 __ addptr(tmp, buffer);
1101
1102 // Store the object in queue.
1103 // If object is narrow, we need to decode it before inserting.
1104 // We can skip the re-encoding if we know that object is not preserved.
1105 if (_narrow) {
1106 __ decode_heap_oop_not_null(_obj);
1107 }
1108 __ movptr(Address(tmp, 0), _obj);
1109 if (_narrow && is_preserved(_obj)) {
1110 __ encode_heap_oop_not_null(_obj);
1111 }
1112
1113 // Fast-path exits here.
1114 if (tmp_live) {
1115 __ pop(tmp);
1116 }
1117
1118 if (L_done != nullptr) {
1119 __ jmp(*L_done);
1120 } else {
1121 __ jmp(L_through);
1122 }
1123
1124 // Slow-path: call runtime to handle.
1125 // Need to pop tmp immediately for stack to remain aligned.
1126 __ bind(L_pop_and_slow);
1127 if (tmp_live) {
1128 __ pop(tmp);
1129 }
1130 {
1131 SaveLiveRegisters slr(&masm, this);
1132
1133 // Shuffle in the arguments. The end result should be:
1134 // c_rarg0 <-- obj
1135 if (c_rarg0 != _obj) {
1136 __ mov(c_rarg0, _obj);
1137 }
1138
1139 // Go to runtime and handle the rest there.
1140 __ call(RuntimeAddress(keepalive_runtime_entry_addr()));
1141 }
1142 if (L_done != nullptr) {
1143 __ jmp(*L_done);
1144 } else {
1145 __ bind(L_through);
1146 }
1147 }
1148
1149 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1150 Label L_pop_and_slow, L_slow;
1151
1152 // If another barrier is enabled as well, do a runtime check for a specific barrier.
1153 if (_needs_keep_alive_barrier) {
1154 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1155 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1156 __ cmpb(gc_state_fast, 0);
1157 __ jcc(Assembler::equal, *continuation());
1158 }
1159
1160 // If weak references are being processed, weak/phantom loads need to go slow,
1161 // regardless of their cset status.
1162 if (_needs_load_ref_weak_barrier) {
1163 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1164 __ cmpb(gc_state_fast, 0);
1165 __ jccb(Assembler::notEqual, L_slow);
1166 }
1167
1168 bool is_aot = AOTCodeCache::is_on_for_dump();
1169
1170 // Need temp to work, allocate one now.
1171 bool tmp_live;
1172 Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1173 if (tmp_live) {
1174 __ push(tmp);
1175 }
1176
1177 // Compute the cset bitmap index
1178 if (_narrow) {
1179 __ decode_heap_oop_not_null(tmp, _obj);
1180 } else {
1181 __ movptr(tmp, _obj);
1182 }
1183
1184 Address cset_addr_arg;
1185 intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());
1186 if (!is_aot && cset_addr < INT32_MAX) {
1187 // Cset bitmap is at easily encodeable address. Just use it as displacement.
1188 __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1189 cset_addr_arg = Address(tmp, checked_cast<int>(cset_addr));
1190 } else {
1191 bool tmp2_live;
1192 Register tmp2 = select_temp_register(tmp2_live, /* skip_reg1 = */ tmp, /* skip_reg2 = */ is_aot ? rcx : noreg);
1193 if (tmp2_live) {
1194 __ push(tmp2);
1195 }
1196 if (is_aot) {
1197 // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1198 assert_different_registers(tmp, tmp2, rcx);
1199 __ push(rcx);
1200 __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1201 __ movl(rcx, Address(rcx));
1202 __ shrptr(tmp);
1203 __ pop(rcx);
1204 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1205 __ addptr(tmp, Address(tmp2));
1206 } else {
1207 // Cset bitmap is far away. Add its address fully.
1208 __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1209 __ movptr(tmp2, cset_addr);
1210 __ addptr(tmp, tmp2);
1211 }
1212 if (tmp2_live) {
1213 __ pop(tmp2);
1214 }
1215 cset_addr_arg = Address(tmp, 0);
1216 }
1217
1218 // Cset-check. Fall-through to slow if in collection set.
1219 __ cmpb(cset_addr_arg, 0);
1220 if (tmp_live) {
1221 __ jccb(Assembler::notEqual, L_pop_and_slow);
1222 __ pop(tmp);
1223 __ jmp(*continuation());
1224 } else {
1225 // Nothing else to do, jump back
1226 __ jcc(Assembler::equal, *continuation());
1227 }
1228
1229 // Slow path
1230 __ bind(L_pop_and_slow);
1231 // Need to pop tmp immediately for stack to remain aligned.
1232 if (tmp_live) {
1233 __ pop(tmp);
1234 }
1235 __ bind(L_slow);
1236
1237 // Obj is the result, need to temporarily stop preserving it.
1238 bool is_obj_preserved = is_preserved(_obj);
1239 if (is_obj_preserved) {
1240 dont_preserve(_obj);
1241 }
1242 {
1243 SaveLiveRegisters slr(&masm, this);
1244
1245 assert_different_registers(rax, c_rarg0, c_rarg1);
1246
1247 // Shuffle in the arguments. The end result should be:
1248 // c_rarg0 <-- obj
1249 // c_rarg1 <-- lea(addr)
1250 if (_obj == c_rarg0) {
1251 __ lea(c_rarg1, _addr);
1252 } else if (_obj == c_rarg1) {
1253 // Set up arguments in reverse, and then flip them
1254 __ lea(c_rarg0, _addr);
1255 __ xchgptr(c_rarg0, c_rarg1);
1256 } else {
1257 assert_different_registers(_obj, c_rarg0, c_rarg1);
1258 __ lea(c_rarg1, _addr);
1259 __ movptr(c_rarg0, _obj);
1260 }
1261
1262 // Go to runtime and handle the rest there.
1263 __ call(RuntimeAddress(lrb_runtime_entry_addr()));
1264
1265 // Save the result where needed.
1266 if (_narrow) {
1267 __ movl(_obj, rax);
1268 } else if (_obj != rax) {
1269 __ movptr(_obj, rax);
1270 }
1271 }
1272 if (is_obj_preserved) {
1273 preserve(_obj);
1274 }
1275
1276 __ jmp(*continuation());
1277 }
1278
1279 int ShenandoahBarrierStubC2::available_gp_registers() {
1280 return Register::available_gp_registers();
1281 }
1282
1283 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1284 return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1285 }
1286
1287 void ShenandoahBarrierStubC2::post_init() {
1288 // Do nothing.
1289 }
1290
1291 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg) {
1292 if (_narrow) {
1293 __ testl(reg, reg);
1294 } else {
1295 __ testq(reg, reg);
1296 }
1297 __ jcc(Assembler::zero, *continuation());
1298 }
1299
1300 #endif // COMPILER2