1 /*
  2  * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
  3  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
 27 #include "gc/shenandoah/mode/shenandoahMode.hpp"
 28 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
 29 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
 30 #include "gc/shenandoah/shenandoahForwarding.hpp"
 31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
 32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
 33 #include "gc/shenandoah/shenandoahRuntime.hpp"
 34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
 35 #include "interpreter/interpreter.hpp"
 36 #include "runtime/javaThread.hpp"
 37 #include "runtime/sharedRuntime.hpp"
 38 #include "utilities/macros.hpp"
 39 #ifdef COMPILER1
 40 #include "c1/c1_LIRAssembler.hpp"
 41 #include "c1/c1_MacroAssembler.hpp"
 42 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
 43 #endif
 44 
 45 #define __ masm->
 46 
 47 static void save_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
 48   if (handle_gpr) {
 49     __ push_IU_state();
 50   }
 51 
 52   if (handle_fp) {
 53     // Some paths can be reached from the c2i adapter with live fp arguments in registers.
 54     assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call");
 55 
 56     const int xmm_size = wordSize * 2;
 57     __ subptr(rsp, xmm_size * 8);
 58     __ movdbl(Address(rsp, xmm_size * 0), xmm0);
 59     __ movdbl(Address(rsp, xmm_size * 1), xmm1);
 60     __ movdbl(Address(rsp, xmm_size * 2), xmm2);
 61     __ movdbl(Address(rsp, xmm_size * 3), xmm3);
 62     __ movdbl(Address(rsp, xmm_size * 4), xmm4);
 63     __ movdbl(Address(rsp, xmm_size * 5), xmm5);
 64     __ movdbl(Address(rsp, xmm_size * 6), xmm6);
 65     __ movdbl(Address(rsp, xmm_size * 7), xmm7);
 66   }
 67 }
 68 
 69 static void restore_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
 70   if (handle_fp) {
 71     const int xmm_size = wordSize * 2;
 72     __ movdbl(xmm0, Address(rsp, xmm_size * 0));
 73     __ movdbl(xmm1, Address(rsp, xmm_size * 1));
 74     __ movdbl(xmm2, Address(rsp, xmm_size * 2));
 75     __ movdbl(xmm3, Address(rsp, xmm_size * 3));
 76     __ movdbl(xmm4, Address(rsp, xmm_size * 4));
 77     __ movdbl(xmm5, Address(rsp, xmm_size * 5));
 78     __ movdbl(xmm6, Address(rsp, xmm_size * 6));
 79     __ movdbl(xmm7, Address(rsp, xmm_size * 7));
 80     __ addptr(rsp, xmm_size * 8);
 81   }
 82 
 83   if (handle_gpr) {
 84     __ pop_IU_state();
 85   }
 86 }
 87 
 88 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 89                                                        Register src, Register dst, Register count) {
 90 
 91   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
 92 
 93   if (is_reference_type(type)) {
 94     if (ShenandoahCardBarrier) {
 95       bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
 96       bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
 97       bool obj_int = (type == T_OBJECT) && UseCompressedOops;
 98 
 99       // We need to save the original element count because the array copy stub
100       // will destroy the value and we need it for the card marking barrier.
101       if (!checkcast) {
102         if (!obj_int) {
103           // Save count for barrier
104           __ movptr(r11, count);
105         } else if (disjoint) {
106           // Save dst in r11 in the disjoint case
107           __ movq(r11, dst);
108         }
109       }
110     }
111 
112     if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
113       Register thread = r15_thread;
114       assert_different_registers(src, dst, count, thread);
115 
116       Label L_done;
117       // Short-circuit if count == 0.
118       __ testptr(count, count);
119       __ jcc(Assembler::zero, L_done);
120 
121       // Avoid runtime call when not active.
122       Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
123       int flags;
124       if (ShenandoahSATBBarrier && dest_uninitialized) {
125         flags = ShenandoahHeap::HAS_FORWARDED;
126       } else {
127         flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;
128       }
129       __ testb(gc_state, flags);
130       __ jcc(Assembler::zero, L_done);
131 
132       save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false);
133 
134       assert(src == rdi, "expected");
135       assert(dst == rsi, "expected");
136       assert(count == rdx, "expected");
137       if (UseCompressedOops) {
138         __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop),
139                         src, dst, count);
140       } else {
141         __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop),
142                         src, dst, count);
143       }
144 
145       restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false);
146 
147       __ bind(L_done);
148     }
149   }
150 
151 }
152 
153 void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
154                                                        Register src, Register dst, Register count) {
155 
156   if (ShenandoahCardBarrier && is_reference_type(type)) {
157     bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
158     bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
159     bool obj_int = (type == T_OBJECT) && UseCompressedOops;
160     Register tmp = rax;
161 
162     if (!checkcast) {
163       if (!obj_int) {
164         // Save count for barrier
165         count = r11;
166       } else if (disjoint) {
167         // Use the saved dst in the disjoint case
168         dst = r11;
169       }
170     } else {
171       tmp = rscratch1;
172     }
173     gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp);
174   }
175 }
176 
177 void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
178                                                  Register obj,
179                                                  Register pre_val,
180                                                  Register tmp,
181                                                  bool tosca_live,
182                                                  bool expand_call) {
183   assert(ShenandoahSATBBarrier, "Should be checked by caller");
184 
185   // If expand_call is true then we expand the call_VM_leaf macro
186   // directly to skip generating the check by
187   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
188 
189   const Register thread = r15_thread;
190 
191   Label done;
192   Label runtime;
193 
194   assert(pre_val != noreg, "check this code");
195 
196   if (obj != noreg) {
197     assert_different_registers(obj, pre_val, tmp);
198     assert(pre_val != rax, "check this code");
199   }
200 
201   Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
202   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
203 
204   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
205   __ testb(gc_state, ShenandoahHeap::MARKING);
206   __ jcc(Assembler::zero, done);
207 
208   // Do we need to load the previous value?
209   if (obj != noreg) {
210     __ load_heap_oop(pre_val, Address(obj, 0), noreg, AS_RAW);
211   }
212 
213   // Is the previous value null?
214   __ cmpptr(pre_val, NULL_WORD);
215   __ jcc(Assembler::equal, done);
216 
217   // Can we store original value in the thread's buffer?
218   // Is index == 0?
219   // (The index field is typed as size_t.)
220 
221   __ movptr(tmp, index);                   // tmp := *index_adr
222   __ cmpptr(tmp, 0);                       // tmp == 0?
223   __ jcc(Assembler::equal, runtime);       // If yes, goto runtime
224 
225   __ subptr(tmp, wordSize);                // tmp := tmp - wordSize
226   __ movptr(index, tmp);                   // *index_adr := tmp
227   __ addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
228 
229   // Record the previous value
230   __ movptr(Address(tmp, 0), pre_val);
231   __ jmp(done);
232 
233   __ bind(runtime);
234   // save the live input values
235   if(tosca_live) __ push(rax);
236 
237   if (obj != noreg && obj != rax)
238     __ push(obj);
239 
240   if (pre_val != rax)
241     __ push(pre_val);
242 
243   // Calling the runtime using the regular call_VM_leaf mechanism generates
244   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
245   // that checks that the *(ebp+frame::interpreter_frame_last_sp) == nullptr.
246   //
247   // If we care generating the pre-barrier without a frame (e.g. in the
248   // intrinsified Reference.get() routine) then ebp might be pointing to
249   // the caller frame and so this check will most likely fail at runtime.
250   //
251   // Expanding the call directly bypasses the generation of the check.
252   // So when we do not have have a full interpreter frame on the stack
253   // expand_call should be passed true.
254 
255   // We move pre_val into c_rarg0 early, in order to avoid smashing it, should
256   // pre_val be c_rarg1 (where the call prologue would copy thread argument).
257   // Note: this should not accidentally smash thread, because thread is always r15.
258   assert(thread != c_rarg0, "smashed arg");
259   if (c_rarg0 != pre_val) {
260     __ mov(c_rarg0, pre_val);
261   }
262 
263   if (expand_call) {
264     assert(pre_val != c_rarg1, "smashed arg");
265     if (c_rarg1 != thread) {
266       __ mov(c_rarg1, thread);
267     }
268     // Already moved pre_val into c_rarg0 above
269     __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), 1);
270   } else {
271     __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), c_rarg0);
272   }
273 
274   // save the live input values
275   if (pre_val != rax)
276     __ pop(pre_val);
277 
278   if (obj != noreg && obj != rax)
279     __ pop(obj);
280 
281   if(tosca_live) __ pop(rax);
282 
283   __ bind(done);
284 }
285 
286 void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, DecoratorSet decorators) {
287   assert(ShenandoahLoadRefBarrier, "Should be enabled");
288 
289   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
290   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
291   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
292   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
293   bool is_narrow  = UseCompressedOops && !is_native;
294 
295   Label heap_stable, not_cset;
296 
297   __ block_comment("load_reference_barrier { ");
298 
299   // Check if GC is active
300   Register thread = r15_thread;
301 
302   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
303   int flags = ShenandoahHeap::HAS_FORWARDED;
304   if (!is_strong) {
305     flags |= ShenandoahHeap::WEAK_ROOTS;
306   }
307   __ testb(gc_state, flags);
308   __ jcc(Assembler::zero, heap_stable);
309 
310   Register tmp1 = noreg, tmp2 = noreg;
311   if (is_strong) {
312     // Test for object in cset
313     // Allocate temporary registers
314     for (int i = 0; i < 8; i++) {
315       Register r = as_Register(i);
316       if (r != rsp && r != rbp && r != dst && r != src.base() && r != src.index()) {
317         if (tmp1 == noreg) {
318           tmp1 = r;
319         } else {
320           tmp2 = r;
321           break;
322         }
323       }
324     }
325     assert(tmp1 != noreg, "tmp1 allocated");
326     assert(tmp2 != noreg, "tmp2 allocated");
327     assert_different_registers(tmp1, tmp2, src.base(), src.index());
328     assert_different_registers(tmp1, tmp2, dst);
329 
330     __ push(tmp1);
331     __ push(tmp2);
332 
333     // Optimized cset-test
334     __ movptr(tmp1, dst);
335     __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
336     __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
337     __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1));
338     __ testbool(tmp1);
339     __ jcc(Assembler::zero, not_cset);
340   }
341 
342   save_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true);
343 
344   // The rest is saved with the optimized path
345 
346   uint num_saved_regs = 4 + (dst != rax ? 1 : 0) + 4 + (UseAPX ? 16 : 0);
347   __ subptr(rsp, num_saved_regs * wordSize);
348   uint slot = num_saved_regs;
349   if (dst != rax) {
350     __ movptr(Address(rsp, (--slot) * wordSize), rax);
351   }
352   __ movptr(Address(rsp, (--slot) * wordSize), rcx);
353   __ movptr(Address(rsp, (--slot) * wordSize), rdx);
354   __ movptr(Address(rsp, (--slot) * wordSize), rdi);
355   __ movptr(Address(rsp, (--slot) * wordSize), rsi);
356   __ movptr(Address(rsp, (--slot) * wordSize), r8);
357   __ movptr(Address(rsp, (--slot) * wordSize), r9);
358   __ movptr(Address(rsp, (--slot) * wordSize), r10);
359   __ movptr(Address(rsp, (--slot) * wordSize), r11);
360   // Save APX extended registers r16–r31 if enabled
361   if (UseAPX) {
362     __ movptr(Address(rsp, (--slot) * wordSize), r16);
363     __ movptr(Address(rsp, (--slot) * wordSize), r17);
364     __ movptr(Address(rsp, (--slot) * wordSize), r18);
365     __ movptr(Address(rsp, (--slot) * wordSize), r19);
366     __ movptr(Address(rsp, (--slot) * wordSize), r20);
367     __ movptr(Address(rsp, (--slot) * wordSize), r21);
368     __ movptr(Address(rsp, (--slot) * wordSize), r22);
369     __ movptr(Address(rsp, (--slot) * wordSize), r23);
370     __ movptr(Address(rsp, (--slot) * wordSize), r24);
371     __ movptr(Address(rsp, (--slot) * wordSize), r25);
372     __ movptr(Address(rsp, (--slot) * wordSize), r26);
373     __ movptr(Address(rsp, (--slot) * wordSize), r27);
374     __ movptr(Address(rsp, (--slot) * wordSize), r28);
375     __ movptr(Address(rsp, (--slot) * wordSize), r29);
376     __ movptr(Address(rsp, (--slot) * wordSize), r30);
377     __ movptr(Address(rsp, (--slot) * wordSize), r31);
378   }
379   // r12-r15 are callee saved in all calling conventions
380   assert(slot == 0, "must use all slots");
381 
382   // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1.
383   Register arg0 = c_rarg0, arg1 = c_rarg1;
384   if (dst == arg1) {
385     __ lea(arg0, src);
386     __ xchgptr(arg1, arg0);
387   } else {
388     __ lea(arg1, src);
389     __ movptr(arg0, dst);
390   }
391 
392   if (is_strong) {
393     if (is_narrow) {
394       __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), arg0, arg1);
395     } else {
396       __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), arg0, arg1);
397     }
398   } else if (is_weak) {
399     if (is_narrow) {
400       __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), arg0, arg1);
401     } else {
402       __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), arg0, arg1);
403     }
404   } else {
405     assert(is_phantom, "only remaining strength");
406     assert(!is_narrow, "phantom access cannot be narrow");
407     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), arg0, arg1);
408   }
409 
410   // Restore APX extended registers r31–r16 if previously saved
411   if (UseAPX) {
412     __ movptr(r31, Address(rsp, (slot++) * wordSize));
413     __ movptr(r30, Address(rsp, (slot++) * wordSize));
414     __ movptr(r29, Address(rsp, (slot++) * wordSize));
415     __ movptr(r28, Address(rsp, (slot++) * wordSize));
416     __ movptr(r27, Address(rsp, (slot++) * wordSize));
417     __ movptr(r26, Address(rsp, (slot++) * wordSize));
418     __ movptr(r25, Address(rsp, (slot++) * wordSize));
419     __ movptr(r24, Address(rsp, (slot++) * wordSize));
420     __ movptr(r23, Address(rsp, (slot++) * wordSize));
421     __ movptr(r22, Address(rsp, (slot++) * wordSize));
422     __ movptr(r21, Address(rsp, (slot++) * wordSize));
423     __ movptr(r20, Address(rsp, (slot++) * wordSize));
424     __ movptr(r19, Address(rsp, (slot++) * wordSize));
425     __ movptr(r18, Address(rsp, (slot++) * wordSize));
426     __ movptr(r17, Address(rsp, (slot++) * wordSize));
427     __ movptr(r16, Address(rsp, (slot++) * wordSize));
428   }
429   __ movptr(r11, Address(rsp, (slot++) * wordSize));
430   __ movptr(r10, Address(rsp, (slot++) * wordSize));
431   __ movptr(r9,  Address(rsp, (slot++) * wordSize));
432   __ movptr(r8,  Address(rsp, (slot++) * wordSize));
433   __ movptr(rsi, Address(rsp, (slot++) * wordSize));
434   __ movptr(rdi, Address(rsp, (slot++) * wordSize));
435   __ movptr(rdx, Address(rsp, (slot++) * wordSize));
436   __ movptr(rcx, Address(rsp, (slot++) * wordSize));
437 
438   if (dst != rax) {
439     __ movptr(dst, rax);
440     __ movptr(rax, Address(rsp, (slot++) * wordSize));
441   }
442 
443   assert(slot == num_saved_regs, "must use all slots");
444   __ addptr(rsp, num_saved_regs * wordSize);
445 
446   restore_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true);
447 
448   __ bind(not_cset);
449 
450   if  (is_strong) {
451     __ pop(tmp2);
452     __ pop(tmp1);
453   }
454 
455   __ bind(heap_stable);
456 
457   __ block_comment("} load_reference_barrier");
458 }
459 
460 //
461 // Arguments:
462 //
463 // Inputs:
464 //   src:        oop location, might be clobbered
465 //   tmp1:       scratch register, might not be valid.
466 //
467 // Output:
468 //   dst:        oop loaded from src location
469 //
470 // Kill:
471 //   tmp1 (if it is valid)
472 //
473 void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
474              Register dst, Address src, Register tmp1) {
475   // 1: non-reference load, no additional barrier is needed
476   if (!is_reference_type(type)) {
477     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
478     return;
479   }
480 
481   assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected");
482 
483   // 2: load a reference from src location and apply LRB if needed
484   if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
485     Register result_dst = dst;
486     bool use_tmp1_for_dst = false;
487 
488     // Preserve src location for LRB
489     if (dst == src.base() || dst == src.index()) {
490     // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()
491       if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) {
492         dst = tmp1;
493         use_tmp1_for_dst = true;
494       } else {
495         dst = rdi;
496         __ push(dst);
497       }
498       assert_different_registers(dst, src.base(), src.index());
499     }
500 
501     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
502 
503     load_reference_barrier(masm, dst, src, decorators);
504 
505     // Move loaded oop to final destination
506     if (dst != result_dst) {
507       __ movptr(result_dst, dst);
508 
509       if (!use_tmp1_for_dst) {
510         __ pop(dst);
511       }
512 
513       dst = result_dst;
514     }
515   } else {
516     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1);
517   }
518 
519   // 3: apply keep-alive barrier if needed
520   if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
521     save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
522 
523     assert_different_registers(dst, tmp1, r15_thread);
524     // Generate the SATB pre-barrier code to log the value of
525     // the referent field in an SATB buffer.
526     satb_barrier(masm /* masm */,
527                  noreg /* obj */,
528                  dst /* pre_val */,
529                  tmp1 /* tmp */,
530                  true /* tosca_live */,
531                  true /* expand_call */);
532 
533     restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
534   }
535 }
536 
537 void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
538   assert(ShenandoahCardBarrier, "Should have been checked by caller");
539 
540   // Does a store check for the oop in register obj. The content of
541   // register obj is destroyed afterwards.
542   __ shrptr(obj, CardTable::card_shift());
543 
544   // We'll use this register as the TLS base address and also later on
545   // to hold the byte_map_base.
546   Register thread = r15_thread;
547   Register tmp = rscratch1;
548 
549   Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
550   __ movptr(tmp, curr_ct_holder_addr);
551   Address card_addr(tmp, obj, Address::times_1);
552 
553   int dirty = CardTable::dirty_card_val();
554   if (UseCondCardMark) {
555     Label L_already_dirty;
556     __ cmpb(card_addr, dirty);
557     __ jccb(Assembler::equal, L_already_dirty);
558     __ movb(card_addr, dirty);
559     __ bind(L_already_dirty);
560   } else {
561     __ movb(card_addr, dirty);
562   }
563 }
564 
565 void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
566               Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
567 
568   // 1: non-reference types require no barriers
569   if (!is_reference_type(type)) {
570     BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
571     return;
572   }
573 
574   // Flatten object address right away for simplicity: likely needed by barriers
575   assert_different_registers(val, tmp1, tmp2, tmp3, r15_thread);
576   if (dst.index() == noreg && dst.disp() == 0) {
577     if (dst.base() != tmp1) {
578       __ movptr(tmp1, dst.base());
579     }
580   } else {
581     __ lea(tmp1, dst);
582   }
583 
584   bool storing_non_null = (val != noreg);
585 
586   // 2: pre-barrier: SATB needs the previous value
587   if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
588     satb_barrier(masm,
589                  tmp1 /* obj */,
590                  tmp2 /* pre_val */,
591                  tmp3 /* tmp */,
592                  storing_non_null /* tosca_live */,
593                  false /* expand_call */);
594   }
595 
596   // Store!
597   BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
598 
599   // 3: post-barrier: card barrier needs store address
600   if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
601     card_barrier(masm, tmp1);
602   }
603 }
604 
605 void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
606                                                                   Register obj, Register tmp, Label& slowpath) {
607   Label done;
608   // Resolve jobject
609   BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
610 
611   // Check for null.
612   __ testptr(obj, obj);
613   __ jcc(Assembler::zero, done);
614 
615   Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
616   __ testb(gc_state, ShenandoahHeap::EVACUATION);
617   __ jccb(Assembler::notZero, slowpath);
618   __ bind(done);
619 }
620 
621 // Special Shenandoah CAS implementation that handles false negatives
622 // due to concurrent evacuation.
623 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
624                                                 Register res, Address addr, Register oldval, Register newval,
625                                                 bool exchange, Register tmp1, Register tmp2) {
626   assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
627   assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
628   assert_different_registers(oldval, tmp1, tmp2);
629   assert_different_registers(newval, tmp1, tmp2);
630 
631   Label L_success, L_failure;
632 
633   // Remember oldval for retry logic below
634   if (UseCompressedOops) {
635     __ movl(tmp1, oldval);
636   } else {
637     __ movptr(tmp1, oldval);
638   }
639 
640   // Step 1. Fast-path.
641   //
642   // Try to CAS with given arguments. If successful, then we are done.
643 
644   if (UseCompressedOops) {
645     __ lock();
646     __ cmpxchgl(newval, addr);
647   } else {
648     __ lock();
649     __ cmpxchgptr(newval, addr);
650   }
651   __ jcc(Assembler::equal, L_success);
652 
653   // Step 2. CAS had failed. This may be a false negative.
654   //
655   // The trouble comes when we compare the to-space pointer with the from-space
656   // pointer to the same object. To resolve this, it will suffice to resolve
657   // the value from memory -- this will give both to-space pointers.
658   // If they mismatch, then it was a legitimate failure.
659   //
660   // Before reaching to resolve sequence, see if we can avoid the whole shebang
661   // with filters.
662 
663   // Filter: when offending in-memory value is null, the failure is definitely legitimate
664   __ testptr(oldval, oldval);
665   __ jcc(Assembler::zero, L_failure);
666 
667   // Filter: when heap is stable, the failure is definitely legitimate
668   const Register thread = r15_thread;
669   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
670   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
671   __ jcc(Assembler::zero, L_failure);
672 
673   if (UseCompressedOops) {
674     __ movl(tmp2, oldval);
675     __ decode_heap_oop(tmp2);
676   } else {
677     __ movptr(tmp2, oldval);
678   }
679 
680   // Decode offending in-memory value.
681   // Test if-forwarded
682   __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value);
683   __ jcc(Assembler::noParity, L_failure);  // When odd number of bits, then not forwarded
684   __ jcc(Assembler::zero, L_failure);      // When it is 00, then also not forwarded
685 
686   // Load and mask forwarding pointer
687   __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes()));
688   __ shrptr(tmp2, 2);
689   __ shlptr(tmp2, 2);
690 
691   if (UseCompressedOops) {
692     __ decode_heap_oop(tmp1); // decode for comparison
693   }
694 
695   // Now we have the forwarded offender in tmp2.
696   // Compare and if they don't match, we have legitimate failure
697   __ cmpptr(tmp1, tmp2);
698   __ jcc(Assembler::notEqual, L_failure);
699 
700   // Step 3. Need to fix the memory ptr before continuing.
701   //
702   // At this point, we have from-space oldval in the register, and its to-space
703   // address is in tmp2. Let's try to update it into memory. We don't care if it
704   // succeeds or not. If it does, then the retrying CAS would see it and succeed.
705   // If this fixup fails, this means somebody else beat us to it, and necessarily
706   // with to-space ptr store. We still have to do the retry, because the GC might
707   // have updated the reference for us.
708 
709   if (UseCompressedOops) {
710     __ encode_heap_oop(tmp2); // previously decoded at step 2.
711   }
712 
713   if (UseCompressedOops) {
714     __ lock();
715     __ cmpxchgl(tmp2, addr);
716   } else {
717     __ lock();
718     __ cmpxchgptr(tmp2, addr);
719   }
720 
721   // Step 4. Try to CAS again.
722   //
723   // This is guaranteed not to have false negatives, because oldval is definitely
724   // to-space, and memory pointer is to-space as well. Nothing is able to store
725   // from-space ptr into memory anymore. Make sure oldval is restored, after being
726   // garbled during retries.
727   //
728   if (UseCompressedOops) {
729     __ movl(oldval, tmp2);
730   } else {
731     __ movptr(oldval, tmp2);
732   }
733 
734   if (UseCompressedOops) {
735     __ lock();
736     __ cmpxchgl(newval, addr);
737   } else {
738     __ lock();
739     __ cmpxchgptr(newval, addr);
740   }
741   if (!exchange) {
742     __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump
743   }
744 
745   // Step 5. If we need a boolean result out of CAS, set the flag appropriately.
746   // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS.
747   // Otherwise, failure witness for CAE is in oldval on all paths, and we can return.
748 
749   if (exchange) {
750     __ bind(L_failure);
751     __ bind(L_success);
752   } else {
753     assert(res != noreg, "need result register");
754 
755     Label exit;
756     __ bind(L_failure);
757     __ xorptr(res, res);
758     __ jmpb(exit);
759 
760     __ bind(L_success);
761     __ movptr(res, 1);
762     __ bind(exit);
763   }
764 }
765 
766 #ifdef PRODUCT
767 #define BLOCK_COMMENT(str) /* nothing */
768 #else
769 #define BLOCK_COMMENT(str) __ block_comment(str)
770 #endif
771 
772 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
773 
774 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
775 
776 void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
777                                                                      Register addr, Register count,
778                                                                      Register tmp) {
779   assert(ShenandoahCardBarrier, "Should have been checked by caller");
780 
781   Label L_loop, L_done;
782   const Register end = count;
783   assert_different_registers(addr, end);
784 
785   // Zero count? Nothing to do.
786   __ testl(count, count);
787   __ jccb(Assembler::zero, L_done);
788 
789   const Register thread = r15_thread;
790   Address curr_ct_holder_addr(thread, in_bytes(ShenandoahThreadLocalData::card_table_offset()));
791   __ movptr(tmp, curr_ct_holder_addr);
792 
793   __ leaq(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
794   __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
795   __ shrptr(addr, CardTable::card_shift());
796   __ shrptr(end, CardTable::card_shift());
797   __ subptr(end, addr); // end --> cards count
798 
799   __ addptr(addr, tmp);
800 
801   __ BIND(L_loop);
802   __ movb(Address(addr, count, Address::times_1), 0);
803   __ decrement(count);
804   __ jccb(Assembler::greaterEqual, L_loop);
805 
806   __ BIND(L_done);
807 }
808 
809 #undef __
810 
811 #ifdef COMPILER1
812 
813 #define __ ce->masm()->
814 
815 void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
816   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
817   // At this point we know that marking is in progress.
818   // If do_load() is true then we have to emit the
819   // load of the previous value; otherwise it has already
820   // been loaded into _pre_val.
821 
822   __ bind(*stub->entry());
823   assert(stub->pre_val()->is_register(), "Precondition.");
824 
825   Register pre_val_reg = stub->pre_val()->as_register();
826 
827   if (stub->do_load()) {
828     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
829   }
830 
831   __ cmpptr(pre_val_reg, NULL_WORD);
832   __ jcc(Assembler::equal, *stub->continuation());
833   ce->store_parameter(stub->pre_val()->as_register(), 0);
834   __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
835   __ jmp(*stub->continuation());
836 
837 }
838 
839 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
840   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
841   __ bind(*stub->entry());
842 
843   DecoratorSet decorators = stub->decorators();
844   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
845   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
846   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
847   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
848 
849   Register obj = stub->obj()->as_register();
850   Register res = stub->result()->as_register();
851   Register addr = stub->addr()->as_pointer_register();
852   Register tmp1 = stub->tmp1()->as_register();
853   Register tmp2 = stub->tmp2()->as_register();
854   assert_different_registers(obj, res, addr, tmp1, tmp2);
855 
856   Label slow_path;
857 
858   assert(res == rax, "result must arrive in rax");
859 
860   if (res != obj) {
861     __ mov(res, obj);
862   }
863 
864   if (is_strong) {
865     // Check for object being in the collection set.
866     __ mov(tmp1, res);
867     __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
868     __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
869     __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
870     __ testbool(tmp2);
871     __ jcc(Assembler::zero, *stub->continuation());
872   }
873 
874   __ bind(slow_path);
875   ce->store_parameter(res, 0);
876   ce->store_parameter(addr, 1);
877   if (is_strong) {
878     if (is_native) {
879       __ call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
880     } else {
881       __ call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
882     }
883   } else if (is_weak) {
884     __ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
885   } else {
886     assert(is_phantom, "only remaining strength");
887     __ call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
888   }
889   __ jmp(*stub->continuation());
890 }
891 
892 #undef __
893 
894 #define __ sasm->
895 
896 void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
897   __ prologue("shenandoah_pre_barrier", false);
898   // arg0 : previous value of memory
899 
900   __ push(rax);
901   __ push(rdx);
902 
903   const Register pre_val = rax;
904   const Register thread = r15_thread;
905   const Register tmp = rdx;
906 
907   Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
908   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
909 
910   Label done;
911   Label runtime;
912 
913   // Is SATB still active?
914   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
915   __ testb(gc_state, ShenandoahHeap::MARKING);
916   __ jcc(Assembler::zero, done);
917 
918   // Can we store original value in the thread's buffer?
919 
920   __ movptr(tmp, queue_index);
921   __ testptr(tmp, tmp);
922   __ jcc(Assembler::zero, runtime);
923   __ subptr(tmp, wordSize);
924   __ movptr(queue_index, tmp);
925   __ addptr(tmp, buffer);
926 
927   // prev_val (rax)
928   __ load_parameter(0, pre_val);
929   __ movptr(Address(tmp, 0), pre_val);
930   __ jmp(done);
931 
932   __ bind(runtime);
933 
934   __ save_live_registers_no_oop_map(true);
935 
936   // load the pre-value
937   __ load_parameter(0, rcx);
938   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_barrier_pre), rcx);
939 
940   __ restore_live_registers(true);
941 
942   __ bind(done);
943 
944   __ pop(rdx);
945   __ pop(rax);
946 
947   __ epilogue();
948 }
949 
950 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {
951   __ prologue("shenandoah_load_reference_barrier", false);
952   // arg0 : object to be resolved
953 
954   __ save_live_registers_no_oop_map(true);
955 
956   bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
957   bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
958   bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
959   bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
960 
961   __ load_parameter(0, c_rarg0);
962   __ load_parameter(1, c_rarg1);
963   if (is_strong) {
964     if (is_native) {
965       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
966     } else {
967       if (UseCompressedOops) {
968         __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), c_rarg0, c_rarg1);
969       } else {
970         __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);
971       }
972     }
973   } else if (is_weak) {
974     assert(!is_native, "weak must not be called off-heap");
975     if (UseCompressedOops) {
976       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
977     } else {
978       __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
979     }
980   } else {
981     assert(is_phantom, "only remaining strength");
982     assert(is_native, "phantom must only be called off-heap");
983     __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);
984   }
985 
986   __ restore_live_registers_except_rax(true);
987 
988   __ epilogue();
989 }
990 
991 #undef __
992 
993 #endif // COMPILER1