1 /*
2 * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
4 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
28 #include "gc/shenandoah/mode/shenandoahMode.hpp"
29 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
30 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
31 #include "gc/shenandoah/shenandoahForwarding.hpp"
32 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
34 #include "gc/shenandoah/shenandoahRuntime.hpp"
35 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
36 #include "interpreter/interpreter.hpp"
37 #include "nativeInst_x86.hpp"
38 #include "runtime/javaThread.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "utilities/macros.hpp"
41 #ifdef COMPILER1
42 #include "c1/c1_LIRAssembler.hpp"
43 #include "c1/c1_MacroAssembler.hpp"
44 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
45 #endif
46 #ifdef COMPILER2
47 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
48 #endif
49
50 #define __ masm->
51
52 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
53 Register src, Register dst, Register count) {
54
55 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
56
57 if (is_reference_type(type)) {
58 if (ShenandoahCardBarrier) {
59 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
60 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
61 bool obj_int = (type == T_OBJECT) && UseCompressedOops;
62
63 // We need to save the original element count because the array copy stub
64 // will destroy the value and we need it for the card marking barrier.
65 if (!checkcast) {
66 if (!obj_int) {
67 // Save count for barrier
68 __ movptr(r11, count);
69 } else if (disjoint) {
70 // Save dst in r11 in the disjoint case
71 __ movq(r11, dst);
72 }
73 }
74 }
75
76 if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
77 Register thread = r15_thread;
78 assert_different_registers(src, dst, count, thread);
79
80 Label L_done;
81 // Short-circuit if count == 0.
82 __ testptr(count, count);
83 __ jcc(Assembler::zero, L_done);
84
85 // Avoid runtime call when not active.
86 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
87 int flags;
88 if (ShenandoahSATBBarrier && dest_uninitialized) {
89 flags = ShenandoahHeap::HAS_FORWARDED;
90 } else {
91 flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;
92 }
93 __ testb(gc_state, flags);
94 __ jcc(Assembler::zero, L_done);
95
96 __ push_call_clobbered_registers(/* save_fpu = */ false);
97 // If arguments are not in proper places, shuffle them.
98 // Doing this via the stack is the most straight-forward way to avoid
99 // accidentally smashing any register.
100 if (c_rarg0 != src || c_rarg1 != dst || c_rarg2 != count) {
101 __ push(src);
102 __ push(dst);
103 __ push(count);
104 __ pop(c_rarg2);
105 __ pop(c_rarg1);
106 __ pop(c_rarg0);
107 }
108 address target = nullptr;
109 if (UseCompressedOops) {
110 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop);
111 } else {
112 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop);
113 }
114 __ call_VM_leaf(target, 3);
115
116 __ pop_call_clobbered_registers(/* restore_fpu = */ false);
117
118 __ bind(L_done);
119 }
120 }
121
122 }
123
124 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
125 Register src, Register dst, Register count) {
126
127 if (ShenandoahCardBarrier && is_reference_type(type)) {
128 bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
129 bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
130 bool obj_int = (type == T_OBJECT) && UseCompressedOops;
131 Register tmp = rax;
132
133 if (!checkcast) {
134 if (!obj_int) {
135 // Save count for barrier
136 count = r11;
137 } else if (disjoint) {
138 // Use the saved dst in the disjoint case
139 dst = r11;
140 }
141 } else {
142 tmp = rscratch1;
143 }
144 gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp);
145 }
146 }
147
148 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
149 Register obj,
150 Register pre_val,
151 Register tmp) {
152 assert(ShenandoahSATBBarrier, "Should be checked by caller");
153 const Register thread = r15_thread;
154
155 Label done;
156 Label runtime;
157
158 assert(pre_val != noreg, "check this code");
159
160 if (obj != noreg) {
161 assert_different_registers(obj, pre_val, tmp);
162 assert(pre_val != rax, "check this code");
163 }
164
165 Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
166 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
167
168 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
169 __ testb(gc_state, ShenandoahHeap::MARKING);
170 __ jcc(Assembler::zero, done);
171
172 // Do we need to load the previous value?
173 if (obj != noreg) {
174 if (UseCompressedOops) {
175 __ movl(pre_val, Address(obj, 0));
176 __ decode_heap_oop(pre_val);
177 } else {
178 __ movq(pre_val, Address(obj, 0));
179 }
180 }
181
182 // Is the previous value null?
183 __ cmpptr(pre_val, NULL_WORD);
184 __ jcc(Assembler::equal, done);
185
186 // Can we store original value in the thread's buffer?
187 // Is index == 0?
188 // (The index field is typed as size_t.)
189
190 __ movptr(tmp, index); // tmp := *index_adr
191 __ cmpptr(tmp, 0); // tmp == 0?
192 __ jcc(Assembler::equal, runtime); // If yes, goto runtime
193
194 __ subptr(tmp, wordSize); // tmp := tmp - wordSize
195 __ movptr(index, tmp); // *index_adr := tmp
196 __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr
197
198 // Record the previous value
199 __ movptr(Address(tmp, 0), pre_val);
200 __ jmp(done);
201
202 __ bind(runtime);
203
204 // Slow-path call.
205 // Some paths can be reached from the c2i adapter with live fp arguments in registers.
206 __ enter();
207 __ push_call_clobbered_registers(/* save_fpu = */ true);
208
209 assert(thread != c_rarg0, "smashed arg");
210 if (c_rarg0 != pre_val) {
211 __ mov(c_rarg0, pre_val);
212 }
213
214 // Calling with super_call_VM_leaf with c_rarg0 bypasses interpreter checks and avoids any moves.
215 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
216
217 __ pop_call_clobbered_registers(/* restore_fpu = */ true);
218 __ leave();
219
220 __ bind(done);
221 }
222
223 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, DecoratorSet decorators) {
224 assert(ShenandoahLoadRefBarrier, "Should be enabled");
225
226 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
227 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
228 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
229 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
230 bool is_narrow = UseCompressedOops && !is_native;
231
232 Label heap_stable, not_cset;
233
234 __ block_comment("load_reference_barrier { ");
235
236 // Check if GC is active
237 Register thread = r15_thread;
238
239 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
240 int flags = ShenandoahHeap::HAS_FORWARDED;
241 if (!is_strong) {
242 flags |= ShenandoahHeap::WEAK_ROOTS;
243 }
244 __ testb(gc_state, flags);
245 __ jcc(Assembler::zero, heap_stable);
246
247 Register tmp1 = noreg, tmp2 = noreg;
248 if (is_strong) {
249 // Test for object in cset
250 // Allocate temporary registers
251 for (int i = 0; i < Register::available_gp_registers(); i++) {
252 Register r = as_Register(i);
253 if (r != rsp && r != rbp && r != rcx && r != dst && r != src.base() && r != src.index() ) {
254 if (tmp1 == noreg) {
255 tmp1 = r;
256 } else {
257 tmp2 = r;
258 break;
259 }
260 }
261 }
262 assert(tmp1 != noreg, "tmp1 allocated");
263 assert(tmp2 != noreg, "tmp2 allocated");
264 assert_different_registers(tmp1, tmp2, src.base(), src.index());
265 assert_different_registers(tmp1, tmp2, dst);
266
267 __ push(tmp1);
268 __ push(tmp2);
269
270 // Optimized cset-test
271 __ movptr(tmp1, dst);
272 if (AOTCodeCache::is_on_for_dump()) {
273 assert_different_registers(tmp1, tmp2, rcx);
274 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
275 __ push(rcx);
276 __ movb(rcx, Address(tmp2));
277 __ shrptr(tmp1);
278 __ pop(rcx);
279 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
280 __ movptr(tmp2, Address(tmp2));
281 } else {
282 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
283 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
284 }
285 __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1));
286 __ testbool(tmp1);
287 __ jcc(Assembler::zero, not_cset);
288 }
289
290 // Slow-path call.
291 // Save registers that can be clobbered by call.
292 // Some paths can be reached from the c2i adapter with live fp arguments in registers.
293 __ enter();
294 if (dst != rax) {
295 __ push(rax);
296 }
297 __ push_call_clobbered_registers_except(rax, /* save_fpu = */ true);
298
299 // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1.
300 if (dst == c_rarg1) {
301 __ lea(c_rarg0, src);
302 __ xchgptr(c_rarg1, c_rarg0);
303 } else {
304 __ lea(c_rarg1, src);
305 __ movptr(c_rarg0, dst);
306 }
307
308 address target = nullptr;
309 if (is_strong) {
310 if (is_narrow) {
311 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow);
312 } else {
313 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong);
314 }
315 } else if (is_weak) {
316 if (is_narrow) {
317 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow);
318 } else {
319 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
320 }
321 } else {
322 assert(is_phantom, "only remaining strength");
323 assert(!is_narrow, "phantom access cannot be narrow");
324 target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
325 }
326
327 // Calling with super_call_VM_leaf with c_rarg0/1 bypasses interpreter checks and avoids any moves.
328 __ super_call_VM_leaf(target, c_rarg0, c_rarg1);
329 __ pop_call_clobbered_registers_except(rax, /* restore_fpu = */ true);
330 if (dst != rax) {
331 __ movptr(dst, rax);
332 __ pop(rax);
333 }
334 __ leave();
335
336 __ bind(not_cset);
337
338 if (is_strong) {
339 __ pop(tmp2);
340 __ pop(tmp1);
341 }
342
343 __ bind(heap_stable);
344
345 __ block_comment("} load_reference_barrier");
346 }
347
348 //
349 // Arguments:
350 //
351 // Inputs:
352 // src: oop location, might be clobbered
353 // tmp1: scratch register, might not be valid.
354 //
355 // Output:
356 // dst: oop loaded from src location
357 //
358 // Kill:
359 // tmp1 (if it is valid)
360 //
361 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
362 Register dst, Address src, Register tmp1) {
363 // 1: non-reference load, no additional barrier is needed
364 if (!is_reference_type(type)) {
365 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
366 return;
367 }
368
369 assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected");
370
371 // 2: load a reference from src location and apply LRB if needed
372 if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
373 Register result_dst = dst;
374 bool use_tmp1_for_dst = false;
375
376 // Preserve src location for LRB
377 if (dst == src.base() || dst == src.index()) {
378 // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()
379 if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) {
380 dst = tmp1;
381 use_tmp1_for_dst = true;
382 } else {
383 dst = rdi;
384 __ push(dst);
385 }
386 assert_different_registers(dst, src.base(), src.index());
387 }
388
389 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
390
391 load_reference_barrier(masm, dst, src, decorators);
392
393 // Move loaded oop to final destination
394 if (dst != result_dst) {
395 __ movptr(result_dst, dst);
396
397 if (!use_tmp1_for_dst) {
398 __ pop(dst);
399 }
400
401 dst = result_dst;
402 }
403 } else {
404 BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
405 }
406
407 // 3: apply keep-alive barrier if needed
408 if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
409 satb_barrier(masm /* masm */,
410 noreg /* obj */,
411 dst /* pre_val */,
412 tmp1 /* tmp */);
413 }
414 }
415
416 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
417 assert(ShenandoahCardBarrier, "Should have been checked by caller");
418
419 // Does a store check for the oop in register obj. The content of
420 // register obj is destroyed afterwards.
421 __ shrptr(obj, CardTable::card_shift());
422
423 // We'll use this register as the TLS base address and also later on
424 // to hold the byte_map_base.
425 Register thread = r15_thread;
426 Register tmp = rscratch1;
427
428 Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
429 __ movptr(tmp, curr_ct_holder_addr);
430 Address card_addr(tmp, obj, Address::times_1);
431
432 int dirty = CardTable::dirty_card_val();
433 if (UseCondCardMark) {
434 Label L_already_dirty;
435 __ cmpb(card_addr, dirty);
436 __ jccb(Assembler::equal, L_already_dirty);
437 __ movb(card_addr, dirty);
438 __ bind(L_already_dirty);
439 } else {
440 __ movb(card_addr, dirty);
441 }
442 }
443
444 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
445 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
446
447 // 1: non-reference types require no barriers
448 if (!is_reference_type(type)) {
449 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
450 return;
451 }
452
453 // Flatten object address right away for simplicity: likely needed by barriers
454 assert_different_registers(val, tmp1, tmp2, tmp3, r15_thread);
455 if (dst.index() == noreg && dst.disp() == 0) {
456 if (dst.base() != tmp1) {
457 __ movptr(tmp1, dst.base());
458 }
459 } else {
460 __ lea(tmp1, dst);
461 }
462
463 // 2: pre-barrier: SATB needs the previous value
464 if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
465 satb_barrier(masm,
466 tmp1 /* obj */,
467 tmp2 /* pre_val */,
468 tmp3 /* tmp */);
469 }
470
471 // Store!
472 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
473
474 // 3: post-barrier: card barrier needs store address
475 bool storing_non_null = (val != noreg);
476 if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
477 card_barrier(masm, tmp1);
478 }
479 }
480
481 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
482 Register obj, Register tmp, Label& slowpath) {
483 Label done;
484 // Resolve jobject
485 BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
486
487 // Check for null.
488 __ testptr(obj, obj);
489 __ jcc(Assembler::zero, done);
490
491 Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
492 __ testb(gc_state, ShenandoahHeap::EVACUATION);
493 __ jccb(Assembler::notZero, slowpath);
494 __ bind(done);
495 }
496
497 void ShenandoahBarrierSetAssembler::try_peek_weak_handle_in_nmethod(MacroAssembler* masm, Register weak_handle, Register obj, Label& slowpath) {
498 Label done;
499
500 // Peek weak handle using the standard implementation.
501 BarrierSetAssembler::try_peek_weak_handle_in_nmethod(masm, weak_handle, obj, slowpath);
502
503 // Check if the reference is null, and if it is, take the fast path.
504 __ testptr(obj, obj);
505 __ jcc(Assembler::zero, done);
506
507 Address gc_state(r15_thread, ShenandoahThreadLocalData::gc_state_offset());
508
509 // Check if the heap is under weak-reference/roots processing, in
510 // which case we need to take the slow path.
511 __ testb(gc_state, ShenandoahHeap::WEAK_ROOTS);
512 __ jcc(Assembler::notZero, slowpath);
513 __ bind(done);
514 }
515
516 // Special Shenandoah CAS implementation that handles false negatives
517 // due to concurrent evacuation.
518 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
519 Register res, Address addr, Register oldval, Register newval,
520 bool exchange, Register tmp1, Register tmp2) {
521 assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
522 assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
523 assert_different_registers(oldval, tmp1, tmp2);
524 assert_different_registers(newval, tmp1, tmp2);
525
526 Label L_success, L_failure;
527
528 // Remember oldval for retry logic below
529 if (UseCompressedOops) {
530 __ movl(tmp1, oldval);
531 } else {
532 __ movptr(tmp1, oldval);
533 }
534
535 // Step 1. Fast-path.
536 //
537 // Try to CAS with given arguments. If successful, then we are done.
538
539 if (UseCompressedOops) {
540 __ lock();
541 __ cmpxchgl(newval, addr);
542 } else {
543 __ lock();
544 __ cmpxchgptr(newval, addr);
545 }
546 __ jcc(Assembler::equal, L_success);
547
548 // Step 2. CAS had failed. This may be a false negative.
549 //
550 // The trouble comes when we compare the to-space pointer with the from-space
551 // pointer to the same object. To resolve this, it will suffice to resolve
552 // the value from memory -- this will give both to-space pointers.
553 // If they mismatch, then it was a legitimate failure.
554 //
555 // Before reaching to resolve sequence, see if we can avoid the whole shebang
556 // with filters.
557
558 // Filter: when offending in-memory value is null, the failure is definitely legitimate
559 __ testptr(oldval, oldval);
560 __ jcc(Assembler::zero, L_failure);
561
562 // Filter: when heap is stable, the failure is definitely legitimate
563 const Register thread = r15_thread;
564 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
565 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
566 __ jcc(Assembler::zero, L_failure);
567
568 if (UseCompressedOops) {
569 __ movl(tmp2, oldval);
570 __ decode_heap_oop(tmp2);
571 } else {
572 __ movptr(tmp2, oldval);
573 }
574
575 // Decode offending in-memory value.
576 // Test if-forwarded
577 __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value);
578 __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded
579 __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded
580
581 // Load and mask forwarding pointer
582 __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes()));
583 __ shrptr(tmp2, 2);
584 __ shlptr(tmp2, 2);
585
586 if (UseCompressedOops) {
587 __ decode_heap_oop(tmp1); // decode for comparison
588 }
589
590 // Now we have the forwarded offender in tmp2.
591 // Compare and if they don't match, we have legitimate failure
592 __ cmpptr(tmp1, tmp2);
593 __ jcc(Assembler::notEqual, L_failure);
594
595 // Step 3. Need to fix the memory ptr before continuing.
596 //
597 // At this point, we have from-space oldval in the register, and its to-space
598 // address is in tmp2. Let's try to update it into memory. We don't care if it
599 // succeeds or not. If it does, then the retrying CAS would see it and succeed.
600 // If this fixup fails, this means somebody else beat us to it, and necessarily
601 // with to-space ptr store. We still have to do the retry, because the GC might
602 // have updated the reference for us.
603
604 if (UseCompressedOops) {
605 __ encode_heap_oop(tmp2); // previously decoded at step 2.
606 }
607
608 if (UseCompressedOops) {
609 __ lock();
610 __ cmpxchgl(tmp2, addr);
611 } else {
612 __ lock();
613 __ cmpxchgptr(tmp2, addr);
614 }
615
616 // Step 4. Try to CAS again.
617 //
618 // This is guaranteed not to have false negatives, because oldval is definitely
619 // to-space, and memory pointer is to-space as well. Nothing is able to store
620 // from-space ptr into memory anymore. Make sure oldval is restored, after being
621 // garbled during retries.
622 //
623 if (UseCompressedOops) {
624 __ movl(oldval, tmp2);
625 } else {
626 __ movptr(oldval, tmp2);
627 }
628
629 if (UseCompressedOops) {
630 __ lock();
631 __ cmpxchgl(newval, addr);
632 } else {
633 __ lock();
634 __ cmpxchgptr(newval, addr);
635 }
636 if (!exchange) {
637 __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump
638 }
639
640 // Step 5. If we need a boolean result out of CAS, set the flag appropriately.
641 // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS.
642 // Otherwise, failure witness for CAE is in oldval on all paths, and we can return.
643
644 if (exchange) {
645 __ bind(L_failure);
646 __ bind(L_success);
647 } else {
648 assert(res != noreg, "need result register");
649
650 Label exit;
651 __ bind(L_failure);
652 __ xorptr(res, res);
653 __ jmpb(exit);
654
655 __ bind(L_success);
656 __ movptr(res, 1);
657 __ bind(exit);
658 }
659 }
660
661 #ifdef PRODUCT
662 #define BLOCK_COMMENT(str) /* nothing */
663 #else
664 #define BLOCK_COMMENT(str) __ block_comment(str)
665 #endif
666
667 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
668
669 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
670
671 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
672 Register addr, Register count,
673 Register tmp) {
674 assert(ShenandoahCardBarrier, "Should have been checked by caller");
675
676 Label L_loop, L_done;
677 const Register end = count;
678 assert_different_registers(addr, end);
679
680 // Zero count? Nothing to do.
681 __ testl(count, count);
682 __ jccb(Assembler::zero, L_done);
683
684 const Register thread = r15_thread;
685 Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
686 __ movptr(tmp, curr_ct_holder_addr);
687
688 __ leaq(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size
689 __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
690 __ shrptr(addr, CardTable::card_shift());
691 __ shrptr(end, CardTable::card_shift());
692 __ subptr(end, addr); // end --> cards count
693
694 __ addptr(addr, tmp);
695
696 __ BIND(L_loop);
697 __ movb(Address(addr, count, Address::times_1), 0);
698 __ decrement(count);
699 __ jccb(Assembler::greaterEqual, L_loop);
700
701 __ BIND(L_done);
702 }
703
704 #undef __
705
706 #ifdef COMPILER1
707
708 #define __ ce->masm()->
709
710 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
711 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
712 // At this point we know that marking is in progress.
713 // If do_load() is true then we have to emit the
714 // load of the previous value; otherwise it has already
715 // been loaded into _pre_val.
716
717 __ bind(*stub->entry());
718 assert(stub->pre_val()->is_register(), "Precondition.");
719
720 Register pre_val_reg = stub->pre_val()->as_register();
721
722 if (stub->do_load()) {
723 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
724 }
725
726 __ cmpptr(pre_val_reg, NULL_WORD);
727 __ jcc(Assembler::equal, *stub->continuation());
728 ce->store_parameter(stub->pre_val()->as_register(), 0);
729 __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
730 __ jmp(*stub->continuation());
731
732 }
733
734 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
735 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
736 __ bind(*stub->entry());
737
738 DecoratorSet decorators = stub->decorators();
739 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
740 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
741 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
742 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
743
744 Register obj = stub->obj()->as_register();
745 Register res = stub->result()->as_register();
746 Register addr = stub->addr()->as_pointer_register();
747 Register tmp1 = stub->tmp1()->as_register();
748 Register tmp2 = stub->tmp2()->as_register();
749 assert_different_registers(obj, res, addr, tmp1, tmp2);
750
751 Label slow_path;
752
753 assert(res == rax, "result must arrive in rax");
754
755 if (res != obj) {
756 __ mov(res, obj);
757 }
758
759 if (is_strong) {
760 // Check for object being in the collection set.
761 __ mov(tmp1, res);
762 if (AOTCodeCache::is_on_for_dump()) {
763 __ push(rcx);
764 __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
765 __ movl(rcx, Address(rcx));
766 if (tmp1 != rcx) {
767 __ mov(tmp1, res);
768 __ shrptr(tmp1);
769 __ pop(rcx);
770 } else {
771 assert_different_registers(tmp2, rcx);
772 __ mov(tmp2, res);
773 __ shrptr(tmp2);
774 __ pop(rcx);
775 __ movptr(tmp1, tmp2);
776 }
777 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
778 __ movptr(tmp2, Address(tmp2));
779 } else {
780 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
781 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
782 }
783 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
784 __ testbool(tmp2);
785 __ jcc(Assembler::zero, *stub->continuation());
786 }
787
788 __ bind(slow_path);
789 ce->store_parameter(res, 0);
790 ce->store_parameter(addr, 1);
791 if (is_strong) {
792 if (is_native) {
793 __ call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
794 } else {
795 __ call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
796 }
797 } else if (is_weak) {
798 __ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
799 } else {
800 assert(is_phantom, "only remaining strength");
801 __ call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
802 }
803 __ jmp(*stub->continuation());
804 }
805
806 #undef __
807
808 #define __ sasm->
809
810 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
811 __ prologue("shenandoah_pre_barrier", false);
812 // arg0 : previous value of memory
813
814 __ push(rax);
815 __ push(rdx);
816
817 const Register pre_val = rax;
818 const Register thread = r15_thread;
819 const Register tmp = rdx;
820
821 Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
822 Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
823
824 Label done;
825 Label runtime;
826
827 // Is SATB still active?
828 Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
829 __ testb(gc_state, ShenandoahHeap::MARKING);
830 __ jcc(Assembler::zero, done);
831
832 // Can we store original value in the thread's buffer?
833
834 __ movptr(tmp, queue_index);
835 __ testptr(tmp, tmp);
836 __ jcc(Assembler::zero, runtime);
837 __ subptr(tmp, wordSize);
838 __ movptr(queue_index, tmp);
839 __ addptr(tmp, buffer);
840
841 // prev_val (rax)
842 __ load_parameter(0, pre_val);
843 __ movptr(Address(tmp, 0), pre_val);
844 __ jmp(done);
845
846 __ bind(runtime);
847
848 __ save_live_registers_no_oop_map(true);
849
850 // load the pre-value
851 __ load_parameter(0, rcx);
852 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), rcx);
853
854 __ restore_live_registers(true);
855
856 __ bind(done);
857
858 __ pop(rdx);
859 __ pop(rax);
860
861 __ epilogue();
862 }
863
864 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
865 __ prologue("shenandoah_load_reference_barrier", false);
866 // arg0 : object to be resolved
867
868 __ save_live_registers_no_oop_map(true);
869
870 bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);
871 bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);
872 bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
873 bool is_native = ShenandoahBarrierSet::is_native_access(decorators);
874
875 __ load_parameter(0, c_rarg0);
876 __ load_parameter(1, c_rarg1);
877 if (is_strong) {
878 if (is_native) {
879 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
880 } else {
881 if (UseCompressedOops) {
882 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), c_rarg0, c_rarg1);
883 } else {
884 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
885 }
886 }
887 } else if (is_weak) {
888 assert(!is_native, "weak must not be called off-heap");
889 if (UseCompressedOops) {
890 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
891 } else {
892 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
893 }
894 } else {
895 assert(is_phantom, "only remaining strength");
896 assert(is_native, "phantom must only be called off-heap");
897 __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
898 }
899
900 __ restore_live_registers_except_rax(true);
901
902 __ epilogue();
903 }
904
905 #undef __
906
907 #endif // COMPILER1
908
909 #ifdef COMPILER2
910
911 #undef __
912 #define __ masm->
913
914 void ShenandoahBarrierSetAssembler::load_c2(const MachNode* node, MacroAssembler* masm, Register dst, Address src, bool narrow) {
915 // Do the actual load. This load is the candidate for implicit null check, and MUST come first.
916 if (narrow) {
917 __ movl(dst, src);
918 } else {
919 __ movq(dst, src);
920 }
921
922 ShenandoahBarrierStubC2::load_post(masm, node, dst, src, noreg, noreg, narrow);
923 }
924
925 void ShenandoahBarrierSetAssembler::store_c2(const MachNode* node, MacroAssembler* masm,
926 Address dst, bool dst_narrow,
927 Register src, bool src_narrow,
928 Register tmp) {
929
930 ShenandoahBarrierStubC2::store_pre(masm, node, tmp, dst, noreg, noreg, dst_narrow);
931
932 // Need to encode into tmp, because we cannot clobber src.
933 if (dst_narrow && !src_narrow) {
934 __ movq(tmp, src);
935 if ((node->barrier_data() & ShenandoahBitNotNull) == 0) {
936 __ encode_heap_oop(tmp);
937 } else {
938 __ encode_heap_oop_not_null(tmp);
939 }
940 src = tmp;
941 }
942
943 // Do the actual store
944 if (dst_narrow) {
945 __ movl(dst, src);
946 } else {
947 __ movq(dst, src);
948 }
949
950 ShenandoahBarrierStubC2::store_post(masm, node, dst, tmp, noreg);
951 }
952
953 void ShenandoahBarrierSetAssembler::compare_and_set_c2(const MachNode* node, MacroAssembler* masm,
954 Register res, Address addr,
955 Register oldval, Register newval, Register tmp,
956 bool narrow) {
957
958 assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
959
960 // Oldval and newval can be in the same register, but all other registers should be
961 // distinct for extra safety, as we shuffle register values around.
962 assert_different_registers(oldval, tmp, addr.base(), addr.index());
963 assert_different_registers(newval, tmp, addr.base(), addr.index());
964
965 ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
966
967 // CAS!
968 __ lock();
969 if (narrow) {
970 __ cmpxchgl(newval, addr);
971 } else {
972 __ cmpxchgptr(newval, addr);
973 }
974
975 // If we need a boolean result out of CAS, set the flag appropriately and promote the result.
976 if (res != noreg) {
977 __ setcc(Assembler::equal, res);
978 }
979
980 ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
981 }
982
983 void ShenandoahBarrierSetAssembler::get_and_set_c2(const MachNode* node, MacroAssembler* masm, Register newval, Address addr, Register tmp, bool narrow) {
984 assert_different_registers(newval, tmp, addr.base(), addr.index());
985
986 ShenandoahBarrierStubC2::load_store_pre(masm, node, tmp, addr, noreg, noreg, narrow);
987
988 if (narrow) {
989 __ xchgl(newval, addr);
990 } else {
991 __ xchgq(newval, addr);
992 }
993
994 ShenandoahBarrierStubC2::load_store_post(masm, node, addr, tmp, noreg);
995 }
996
997 #undef __
998 #define __ masm.
999
1000 void ShenandoahBarrierStubC2::cardtable(MacroAssembler& masm, Address addr, Register tmp1, Register tmp2) {
1001 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1002
1003 __ lea(tmp1, addr);
1004 __ shrptr(tmp1, CardTable::card_shift());
1005 __ addptr(tmp1, Address(r15_thread, in_bytes(ShenandoahThreadLocalData::card_table_offset())));
1006 Address card_address(tmp1, 0);
1007
1008 assert(CardTable::dirty_card_val() == 0, "Encoding assumption");
1009 Label L_done;
1010 if (UseCondCardMark) {
1011 __ cmpb(card_address, 0);
1012 __ jccb(Assembler::equal, L_done);
1013 }
1014 if (UseCompressedOops && CompressedOops::base() == nullptr) {
1015 __ movb(card_address, r12);
1016 } else {
1017 __ movb(card_address, 0);
1018 }
1019 __ bind(L_done);
1020 }
1021
1022 void ShenandoahBarrierStubC2::enter_if_gc_state(MacroAssembler& masm, const char test_state, Register tmp) {
1023 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1024
1025 // Emit the unconditional branch in the first version of the method.
1026 // Let the rest of runtime figure out how to manage it.
1027 __ relocate(barrier_Relocation::spec(), ShenandoahThreadLocalData::gc_state_to_fast_array_index(test_state));
1028 __ jmp(*entry(), /* maybe_short = */ false);
1029
1030 #ifdef ASSERT
1031 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(test_state)));
1032 __ cmpb(gc_state_fast, 0);
1033 __ jccb(Assembler::zero, *continuation());
1034 __ hlt(); // Correctness bug: barrier is NOP-ed, but heap is NOT IDLE
1035 #endif
1036 // TODO: When barriers are consistently turned off at the end of the cycle, assert that barrier is NOP-ed.
1037
1038 __ bind(*continuation());
1039 }
1040
1041 address ShenandoahBarrierSetAssembler::parse_stub_address(address pc) {
1042 NativeInstruction* ni = nativeInstruction_at(pc);
1043 assert(ni->is_jump(), "Initial code version: GC barrier fastpath must be a jump");
1044 NativeJump* jmp = nativeJump_at(pc);
1045 return jmp->jump_destination();
1046 }
1047
1048 void insert_5_byte_nop(address pc) {
1049 *(pc + 0) = 0x0F;
1050 *(pc + 1) = 0x1F;
1051 *(pc + 2) = 0x44;
1052 *(pc + 3) = 0x00;
1053 *(pc + 4) = 0x00;
1054 ICache::invalidate_range(pc, 5);
1055 }
1056
1057 bool is_5_byte_nop(address pc) {
1058 if (*(pc + 0) != 0x0F) return false;
1059 if (*(pc + 1) != 0x1F) return false;
1060 if (*(pc + 2) != 0x44) return false;
1061 if (*(pc + 3) != 0x00) return false;
1062 if (*(pc + 4) != 0x00) return false;
1063 return true;
1064 }
1065
1066 void check_at(bool cond, address pc, const char* msg) {
1067 assert(cond, "%s: at PC " PTR_FORMAT ": %02x%02x%02x%02x%02x",
1068 msg, p2i(pc), *(pc + 0), *(pc + 1), *(pc + 2), *(pc + 3), *(pc + 4));
1069 }
1070
1071 bool ShenandoahBarrierSetAssembler::is_active(address pc) {
1072 NativeInstruction* ni = nativeInstruction_at(pc);
1073 return ni->is_jump();
1074 }
1075
1076 void ShenandoahBarrierSetAssembler::patch_branch_to_nop(address pc) {
1077 NativeInstruction* ni = nativeInstruction_at(pc);
1078 if (ni->is_jump()) {
1079 insert_5_byte_nop(pc);
1080 } else {
1081 check_at(is_5_byte_nop(pc), pc, "Should already be nop");
1082 }
1083 }
1084
1085 void ShenandoahBarrierSetAssembler::patch_nop_to_branch(address pc, address stub_addr) {
1086 NativeInstruction* ni = nativeInstruction_at(pc);
1087 if (is_5_byte_nop(pc)) {
1088 NativeJump::insert(pc, stub_addr);
1089 } else {
1090 check_at(ni->is_jump(), pc, "Should already be jump");
1091 check_at(nativeJump_at(pc)->jump_destination() == stub_addr, pc, "Jump should be to the same address");
1092 }
1093 }
1094
1095 void ShenandoahBarrierStubC2::emit_code(MacroAssembler& masm) {
1096 Assembler::InlineSkippedInstructionsCounter skip_counter(&masm);
1097 assert(_needs_keep_alive_barrier || _needs_load_ref_barrier, "Why are you here?");
1098
1099 // On x86, there is a significant penalty with unaligned branch target, for example
1100 // when the target instruction straggles the fetch line. It makes (performance) sense
1101 // to spend some code size to align the target better.
1102 __ align(16);
1103 __ bind(*entry());
1104
1105 // If we need to load ourselves, do it here.
1106 if (_do_load) {
1107 if (_narrow) {
1108 __ movl(_obj, _addr);
1109 } else {
1110 __ movq(_obj, _addr);
1111 }
1112 }
1113
1114 // If the object is null, there is no point in applying barriers.
1115 maybe_far_jump_if_zero(masm, _obj, continuation());
1116
1117 // We need to make sure that loads done by callers survive across slow-path calls.
1118 // For self-loads, we need to care about the case when both KA and LRB are enabled (rare).
1119 bool needs_both_barriers = _needs_keep_alive_barrier && _needs_load_ref_barrier;
1120 if (!_do_load || needs_both_barriers) {
1121 preserve(_obj);
1122 }
1123
1124 // Go for barriers. Barriers can return straight to continuation, as long
1125 // as another barrier is not needed.
1126 if (needs_both_barriers) {
1127 keepalive(masm, nullptr);
1128 lrb(masm);
1129 } else if (_needs_keep_alive_barrier) {
1130 keepalive(masm, continuation());
1131 } else if (_needs_load_ref_barrier) {
1132 lrb(masm);
1133 } else {
1134 ShouldNotReachHere();
1135 }
1136 }
1137
1138 void ShenandoahBarrierStubC2::keepalive(MacroAssembler& masm, Label* L_done) {
1139 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::MARKING)));
1140 Address index(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
1141 Address buffer(r15_thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
1142
1143 Label L_through, L_pop_and_slow;
1144
1145 // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1146 // Therefore, alas we need a separate check here. TODO: Figure this out.
1147 __ cmpb(gc_state_fast, 0);
1148 if (L_done != nullptr) {
1149 __ jcc(Assembler::equal, *L_done);
1150 } else {
1151 __ jcc(Assembler::equal, L_through);
1152 }
1153
1154 // Need temp to work, allocate one now.
1155 bool tmp_live;
1156 Register tmp = select_temp_register(tmp_live);
1157 if (tmp_live) {
1158 __ push(tmp);
1159 }
1160
1161 // Fast-path: put object into buffer.
1162 // If buffer is already full, go slow.
1163 __ movptr(tmp, index);
1164 __ subptr(tmp, wordSize);
1165 __ jccb(Assembler::below, L_pop_and_slow);
1166 __ movptr(index, tmp);
1167 __ addptr(tmp, buffer);
1168
1169 // Store the object in queue.
1170 // If object is narrow, we need to decode it before inserting.
1171 // We can skip the re-encoding if we know that object is not preserved.
1172 if (_narrow) {
1173 __ decode_heap_oop_not_null(_obj);
1174 }
1175 __ movptr(Address(tmp, 0), _obj);
1176 if (_narrow && is_preserved(_obj)) {
1177 __ encode_heap_oop_not_null(_obj);
1178 }
1179
1180 // Fast-path exits here.
1181 if (tmp_live) {
1182 __ pop(tmp);
1183 }
1184
1185 if (L_done != nullptr) {
1186 __ jmp(*L_done);
1187 } else {
1188 __ jmp(L_through);
1189 }
1190
1191 // Slow-path: call runtime to handle.
1192 // Need to pop tmp immediately for stack to remain aligned.
1193 __ bind(L_pop_and_slow);
1194 if (tmp_live) {
1195 __ pop(tmp);
1196 }
1197 {
1198 SaveLiveRegisters slr(&masm, this);
1199
1200 // Shuffle in the arguments. The end result should be:
1201 // c_rarg0 <-- obj
1202 if (c_rarg0 != _obj) {
1203 __ mov(c_rarg0, _obj);
1204 }
1205
1206 // Go to runtime and handle the rest there.
1207 // Use rax as scratch, as it will be saved if live.
1208 __ call(RuntimeAddress(keepalive_runtime_entry_addr()), rax);
1209 }
1210 if (L_done != nullptr) {
1211 __ jmp(*L_done);
1212 } else {
1213 __ bind(L_through);
1214 }
1215 }
1216
1217 void ShenandoahBarrierStubC2::lrb(MacroAssembler& masm) {
1218 Label L_pop_and_slow, L_slow;
1219
1220 // Hotpatched GC checks are racy: we can turn off GC state before we patch the barriers.
1221 // Therefore, alas we need a separate check here. TODO: Figure this out.
1222 char state_to_check = ShenandoahHeap::HAS_FORWARDED | (_needs_load_ref_weak_barrier ? ShenandoahHeap::WEAK_ROOTS : 0);
1223 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(state_to_check)));
1224 __ cmpb(gc_state_fast, 0);
1225 __ jcc(Assembler::equal, *continuation());
1226
1227 // If weak references are being processed, weak/phantom loads need to go slow,
1228 // regardless of their cset status.
1229 if (_needs_load_ref_weak_barrier) {
1230 Address gc_state_fast(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_fast_array_offset(ShenandoahHeap::WEAK_ROOTS)));
1231 __ cmpb(gc_state_fast, 0);
1232 __ jccb(Assembler::notEqual, L_slow);
1233 }
1234
1235 bool is_aot = AOTCodeCache::is_on_for_dump();
1236
1237 // Need temp to work, allocate one now.
1238 bool tmp_live;
1239 Register tmp = select_temp_register(tmp_live, /* skip_reg1 = */ is_aot ? rcx : noreg);
1240 if (tmp_live) {
1241 __ push(tmp);
1242 }
1243
1244 // Compute the cset bitmap index
1245 if (_narrow) {
1246 __ decode_heap_oop_not_null(tmp, _obj);
1247 } else {
1248 __ movptr(tmp, _obj);
1249 }
1250
1251 Address cset_addr_arg;
1252 intptr_t cset_addr = reinterpret_cast<intptr_t>(ShenandoahHeap::in_cset_fast_test_addr());
1253 if (!is_aot && cset_addr < INT32_MAX) {
1254 // Cset bitmap is at easily encodeable address. Just use it as displacement.
1255 __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1256 cset_addr_arg = Address(tmp, checked_cast<int>(cset_addr));
1257 } else {
1258 bool tmp2_live;
1259 Register tmp2 = select_temp_register(tmp2_live, /* skip_reg1 = */ tmp, /* skip_reg2 = */ is_aot ? rcx : noreg);
1260 if (tmp2_live) {
1261 __ push(tmp2);
1262 }
1263 if (is_aot) {
1264 // Generating AOT code, pull the cset bitmap and region shift from AOT table.
1265 assert_different_registers(tmp, tmp2, rcx);
1266 __ push(rcx);
1267 __ lea(rcx, ExternalAddress(AOTRuntimeConstants::grain_shift_address()));
1268 __ movl(rcx, Address(rcx));
1269 __ shrptr(tmp);
1270 __ pop(rcx);
1271 __ lea(tmp2, ExternalAddress(AOTRuntimeConstants::cset_base_address()));
1272 __ addptr(tmp, Address(tmp2));
1273 } else {
1274 // Cset bitmap is far away. Add its address fully.
1275 __ shrptr(tmp, ShenandoahHeapRegion::region_size_bytes_shift_jint());
1276 __ movptr(tmp2, cset_addr);
1277 __ addptr(tmp, tmp2);
1278 }
1279 if (tmp2_live) {
1280 __ pop(tmp2);
1281 }
1282 cset_addr_arg = Address(tmp, 0);
1283 }
1284
1285 // Cset-check. Fall-through to slow if in collection set.
1286 __ cmpb(cset_addr_arg, 0);
1287 if (tmp_live) {
1288 __ jccb(Assembler::notEqual, L_pop_and_slow);
1289 __ pop(tmp);
1290 __ jmp(*continuation());
1291 } else {
1292 // Nothing else to do, jump back
1293 __ jcc(Assembler::equal, *continuation());
1294 }
1295
1296 // Slow path
1297 __ bind(L_pop_and_slow);
1298 // Need to pop tmp immediately for stack to remain aligned.
1299 if (tmp_live) {
1300 __ pop(tmp);
1301 }
1302 __ bind(L_slow);
1303
1304 // Obj is the result, need to temporarily stop preserving it.
1305 bool is_obj_preserved = is_preserved(_obj);
1306 if (is_obj_preserved) {
1307 dont_preserve(_obj);
1308 }
1309 {
1310 SaveLiveRegisters slr(&masm, this);
1311
1312 assert_different_registers(rax, c_rarg0, c_rarg1);
1313
1314 // Shuffle in the arguments. The end result should be:
1315 // c_rarg0 <-- obj
1316 // c_rarg1 <-- lea(addr)
1317 if (_obj == c_rarg0) {
1318 __ lea(c_rarg1, _addr);
1319 } else if (_obj == c_rarg1) {
1320 // Set up arguments in reverse, and then flip them
1321 __ lea(c_rarg0, _addr);
1322 __ xchgptr(c_rarg0, c_rarg1);
1323 } else {
1324 assert_different_registers(_obj, c_rarg0, c_rarg1);
1325 __ lea(c_rarg1, _addr);
1326 __ movptr(c_rarg0, _obj);
1327 }
1328
1329 // Go to runtime and handle the rest there.
1330 // Use rax as scratch, as it will be clobbered by result anyway.
1331 __ call(RuntimeAddress(lrb_runtime_entry_addr()), rax);
1332
1333 // Save the result where needed.
1334 if (_narrow) {
1335 __ movl(_obj, rax);
1336 } else if (_obj != rax) {
1337 __ movptr(_obj, rax);
1338 }
1339 }
1340 if (is_obj_preserved) {
1341 preserve(_obj);
1342 }
1343
1344 __ jmp(*continuation());
1345 }
1346
1347 int ShenandoahBarrierStubC2::available_gp_registers() {
1348 return Register::available_gp_registers();
1349 }
1350
1351 bool ShenandoahBarrierStubC2::is_special_register(Register r) {
1352 return r == rsp || r == rbp || r == r12_heapbase || r == r15_thread;
1353 }
1354
1355 void ShenandoahBarrierStubC2::post_init() {
1356 // Do nothing.
1357 }
1358
1359 void ShenandoahBarrierStubC2::maybe_far_jump_if_zero(MacroAssembler& masm, Register reg, Label* L_target) {
1360 if (_narrow) {
1361 __ testl(reg, reg);
1362 } else {
1363 __ testq(reg, reg);
1364 }
1365 __ jcc(Assembler::zero, *L_target);
1366 }
1367
1368 #endif // COMPILER2