1 /*
  2  * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.inline.hpp"
 27 #if INCLUDE_CDS
 28 #include "code/SCCache.hpp"
 29 #endif
 30 #include "gc/g1/g1BarrierSet.hpp"
 31 #include "gc/g1/g1BarrierSetAssembler.hpp"
 32 #include "gc/g1/g1BarrierSetRuntime.hpp"
 33 #include "gc/g1/g1CardTable.hpp"
 34 #include "gc/g1/g1HeapRegion.hpp"
 35 #include "gc/g1/g1ThreadLocalData.hpp"
 36 #include "gc/shared/collectedHeap.hpp"
 37 #include "interpreter/interp_masm.hpp"
 38 #include "runtime/javaThread.hpp"
 39 #include "runtime/sharedRuntime.hpp"
 40 #ifdef COMPILER1
 41 #include "c1/c1_LIRAssembler.hpp"
 42 #include "c1/c1_MacroAssembler.hpp"
 43 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 44 #endif // COMPILER1
 45 #ifdef COMPILER2
 46 #include "gc/g1/c2/g1BarrierSetC2.hpp"
 47 #endif // COMPILER2
 48 
 49 #define __ masm->
 50 
 51 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
 52                                                             Register addr, Register count, RegSet saved_regs) {
 53   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
 54   if (!dest_uninitialized) {
 55     Label done;
 56     Address in_progress(rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
 57 
 58     // Is marking active?
 59     if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
 60       __ ldrw(rscratch1, in_progress);
 61     } else {
 62       assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 63       __ ldrb(rscratch1, in_progress);
 64     }
 65     __ cbzw(rscratch1, done);
 66 
 67     __ push(saved_regs, sp);
 68     if (count == c_rarg0) {
 69       if (addr == c_rarg1) {
 70         // exactly backwards!!
 71         __ mov(rscratch1, c_rarg0);
 72         __ mov(c_rarg0, c_rarg1);
 73         __ mov(c_rarg1, rscratch1);
 74       } else {
 75         __ mov(c_rarg1, count);
 76         __ mov(c_rarg0, addr);
 77       }
 78     } else {
 79       __ mov(c_rarg0, addr);
 80       __ mov(c_rarg1, count);
 81     }
 82     if (UseCompressedOops) {
 83       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
 84     } else {
 85       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
 86     }
 87     __ pop(saved_regs, sp);
 88 
 89     __ bind(done);
 90   }
 91 }
 92 
 93 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 94                                                              Register start, Register count, Register scratch, RegSet saved_regs) {
 95   __ push(saved_regs, sp);
 96   assert_different_registers(start, count, scratch);
 97   assert_different_registers(c_rarg0, count);
 98   __ mov(c_rarg0, start);
 99   __ mov(c_rarg1, count);
100   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
101   __ pop(saved_regs, sp);
102 }
103 
104 static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
105                                               const Register thread, const Register value, const Register temp1, const Register temp2) {
106   // Can we store a value in the given thread's buffer?
107   // (The index field is typed as size_t.)
108   __ ldr(temp1, Address(thread, in_bytes(index_offset)));   // temp1 := *(index address)
109   __ cbz(temp1, runtime);                                   // jump to runtime if index == 0 (full buffer)
110   // The buffer is not full, store value into it.
111   __ sub(temp1, temp1, wordSize);                           // temp1 := next index
112   __ str(temp1, Address(thread, in_bytes(index_offset)));   // *(index address) := next index
113   __ ldr(temp2, Address(thread, in_bytes(buffer_offset)));  // temp2 := buffer address
114   __ str(value, Address(temp2, temp1));                     // *(buffer address + next index) := value
115 }
116 
117 static void generate_pre_barrier_fast_path(MacroAssembler* masm,
118                                            const Register thread,
119                                            const Register tmp1) {
120   Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
121   // Is marking active?
122   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
123     __ ldrw(tmp1, in_progress);
124   } else {
125     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
126     __ ldrb(tmp1, in_progress);
127   }
128 }
129 
130 static void generate_pre_barrier_slow_path(MacroAssembler* masm,
131                                            const Register obj,
132                                            const Register pre_val,
133                                            const Register thread,
134                                            const Register tmp1,
135                                            const Register tmp2,
136                                            Label& done,
137                                            Label& runtime) {
138   // Do we need to load the previous value?
139   if (obj != noreg) {
140     __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
141   }
142   // Is the previous value null?
143   __ cbz(pre_val, done);
144   generate_queue_test_and_insertion(masm,
145                                     G1ThreadLocalData::satb_mark_queue_index_offset(),
146                                     G1ThreadLocalData::satb_mark_queue_buffer_offset(),
147                                     runtime,
148                                     thread, pre_val, tmp1, tmp2);
149   __ b(done);
150 }
151 
152 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
153                                                  Register obj,
154                                                  Register pre_val,
155                                                  Register thread,
156                                                  Register tmp1,
157                                                  Register tmp2,
158                                                  bool tosca_live,
159                                                  bool expand_call) {
160   // If expand_call is true then we expand the call_VM_leaf macro
161   // directly to skip generating the check by
162   // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
163 
164   assert(thread == rthread, "must be");
165 
166   Label done;
167   Label runtime;
168 
169   assert_different_registers(obj, pre_val, tmp1, tmp2);
170   assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
171 
172   generate_pre_barrier_fast_path(masm, thread, tmp1);
173   // If marking is not active (*(mark queue active address) == 0), jump to done
174   __ cbzw(tmp1, done);
175   generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime);
176 
177   __ bind(runtime);
178 
179   __ push_call_clobbered_registers();
180 
181   // Calling the runtime using the regular call_VM_leaf mechanism generates
182   // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
183   // that checks that the *(rfp+frame::interpreter_frame_last_sp) == nullptr.
184   //
185   // If we care generating the pre-barrier without a frame (e.g. in the
186   // intrinsified Reference.get() routine) then rfp might be pointing to
187   // the caller frame and so this check will most likely fail at runtime.
188   //
189   // Expanding the call directly bypasses the generation of the check.
190   // So when we do not have have a full interpreter frame on the stack
191   // expand_call should be passed true.
192 
193   if (expand_call) {
194     assert(pre_val != c_rarg1, "smashed arg");
195     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
196   } else {
197     __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
198   }
199 
200   __ pop_call_clobbered_registers();
201 
202   __ bind(done);
203 
204 }
205 
206 static void generate_post_barrier_fast_path(MacroAssembler* masm,
207                                             const Register store_addr,
208                                             const Register new_val,
209                                             const Register tmp1,
210                                             const Register tmp2,
211                                             Label& done,
212                                             bool new_val_may_be_null) {
213   // Does store cross heap regions?
214 #if INCLUDE_CDS
215   // AOT code needs to load the barrier grain shift from the aot
216   // runtime constants area in the code cache otherwise we can compile
217   // it as an immediate operand
218   if (SCCache::is_on_for_write()) {
219     address grain_shift_address = (address)AOTRuntimeConstants::grain_shift_address();
220     __ eor(tmp1, store_addr, new_val);
221     __ lea(tmp2, ExternalAddress(grain_shift_address));
222     __ ldrb(tmp2, tmp2);
223     __ lsrv(tmp1, tmp1, tmp2);
224     __ cbz(tmp1, done);
225   } else
226 #endif
227   {
228     __ eor(tmp1, store_addr, new_val);                     // tmp1 := store address ^ new value
229     __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);   // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
230     __ cbz(tmp1, done);
231   }
232 
233   // Crosses regions, storing null?
234   if (new_val_may_be_null) {
235     __ cbz(new_val, done);
236   }
237   // Storing region crossing non-null, is card young?
238 
239 #if INCLUDE_CDS
240   // AOT code needs to load the barrier card shift from the aot
241   // runtime constants area in the code cache otherwise we can compile
242   // it as an immediate operand
243   if (SCCache::is_on_for_write()) {
244     address card_shift_address = (address)AOTRuntimeConstants::card_shift_address();
245     __ lea(tmp2, ExternalAddress(card_shift_address));
246     __ ldrb(tmp2, tmp2);
247     __ lsrv(tmp1, store_addr, tmp2);                        // tmp1 := card address relative to card table base
248   } else
249 #endif
250   {
251     __ lsr(tmp1, store_addr, CardTable::card_shift());     // tmp1 := card address relative to card table base
252   }
253 
254   __ load_byte_map_base(tmp2);                           // tmp2 := card table base address
255   __ add(tmp1, tmp1, tmp2);                              // tmp1 := card address
256   __ ldrb(tmp2, Address(tmp1));                          // tmp2 := card
257   __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val());  // tmp2 := card == young_card_val?
258 }
259 
260 static void generate_post_barrier_slow_path(MacroAssembler* masm,
261                                             const Register thread,
262                                             const Register tmp1,
263                                             const Register tmp2,
264                                             Label& done,
265                                             Label& runtime) {
266   __ membar(Assembler::StoreLoad);  // StoreLoad membar
267   __ ldrb(tmp2, Address(tmp1));     // tmp2 := card
268   __ cbzw(tmp2, done);
269   // Storing a region crossing, non-null oop, card is clean.
270   // Dirty card and log.
271   STATIC_ASSERT(CardTable::dirty_card_val() == 0);
272   __ strb(zr, Address(tmp1));       // *(card address) := dirty_card_val
273   generate_queue_test_and_insertion(masm,
274                                     G1ThreadLocalData::dirty_card_queue_index_offset(),
275                                     G1ThreadLocalData::dirty_card_queue_buffer_offset(),
276                                     runtime,
277                                     thread, tmp1, tmp2, rscratch1);
278   __ b(done);
279 }
280 
281 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
282                                                   Register store_addr,
283                                                   Register new_val,
284                                                   Register thread,
285                                                   Register tmp1,
286                                                   Register tmp2) {
287   assert(thread == rthread, "must be");
288   assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
289                              rscratch1);
290   assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
291          && tmp2 != noreg, "expecting a register");
292 
293   Label done;
294   Label runtime;
295 
296   generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
297   // If card is young, jump to done
298   __ br(Assembler::EQ, done);
299   generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
300 
301   __ bind(runtime);
302   // save the live input values
303   RegSet saved = RegSet::of(store_addr);
304   __ push(saved, sp);
305   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
306   __ pop(saved, sp);
307 
308   __ bind(done);
309 }
310 
311 #if defined(COMPILER2)
312 
313 static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
314   SaveLiveRegisters save_registers(masm, stub);
315   if (c_rarg0 != arg) {
316     __ mov(c_rarg0, arg);
317   }
318   __ mov(c_rarg1, rthread);
319   __ lea(rscratch1, RuntimeAddress(runtime_path));
320   __ blr(rscratch1);
321 }
322 
323 void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
324                                                     Register obj,
325                                                     Register pre_val,
326                                                     Register thread,
327                                                     Register tmp1,
328                                                     Register tmp2,
329                                                     G1PreBarrierStubC2* stub) {
330   assert(thread == rthread, "must be");
331   assert_different_registers(obj, pre_val, tmp1, tmp2);
332   assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
333 
334   stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2);
335 
336   generate_pre_barrier_fast_path(masm, thread, tmp1);
337   // If marking is active (*(mark queue active address) != 0), jump to stub (slow path)
338   __ cbnzw(tmp1, *stub->entry());
339 
340   __ bind(*stub->continuation());
341 }
342 
343 void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
344                                                          G1PreBarrierStubC2* stub) const {
345   Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
346   Label runtime;
347   Register obj = stub->obj();
348   Register pre_val = stub->pre_val();
349   Register thread = stub->thread();
350   Register tmp1 = stub->tmp1();
351   Register tmp2 = stub->tmp2();
352 
353   __ bind(*stub->entry());
354   generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime);
355 
356   __ bind(runtime);
357   generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
358   __ b(*stub->continuation());
359 }
360 
361 void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
362                                                      Register store_addr,
363                                                      Register new_val,
364                                                      Register thread,
365                                                      Register tmp1,
366                                                      Register tmp2,
367                                                      G1PostBarrierStubC2* stub) {
368   assert(thread == rthread, "must be");
369   assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
370                              rscratch1);
371   assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
372          && tmp2 != noreg, "expecting a register");
373 
374   stub->initialize_registers(thread, tmp1, tmp2);
375 
376   bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
377   generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
378   // If card is not young, jump to stub (slow path)
379   __ br(Assembler::NE, *stub->entry());
380 
381   __ bind(*stub->continuation());
382 }
383 
384 void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
385                                                           G1PostBarrierStubC2* stub) const {
386   Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
387   Label runtime;
388   Register thread = stub->thread();
389   Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
390   Register tmp2 = stub->tmp2();
391   assert(stub->tmp3() == noreg, "not needed in this platform");
392 
393   __ bind(*stub->entry());
394   generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
395 
396   __ bind(runtime);
397   generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
398   __ b(*stub->continuation());
399 }
400 
401 #endif // COMPILER2
402 
403 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
404                                     Register dst, Address src, Register tmp1, Register tmp2) {
405   bool on_oop = is_reference_type(type);
406   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
407   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
408   bool on_reference = on_weak || on_phantom;
409   ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2);
410   if (on_oop && on_reference) {
411     // LR is live.  It must be saved around calls.
412     __ enter(/*strip_ret_addr*/true); // barrier may call runtime
413     // Generate the G1 pre-barrier code to log the value of
414     // the referent field in an SATB buffer.
415     g1_write_barrier_pre(masm /* masm */,
416                          noreg /* obj */,
417                          dst /* pre_val */,
418                          rthread /* thread */,
419                          tmp1 /* tmp1 */,
420                          tmp2 /* tmp2 */,
421                          true /* tosca_live */,
422                          true /* expand_call */);
423     __ leave();
424   }
425 }
426 
427 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
428                                          Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
429   // flatten object address if needed
430   if (dst.index() == noreg && dst.offset() == 0) {
431     if (dst.base() != tmp3) {
432       __ mov(tmp3, dst.base());
433     }
434   } else {
435     __ lea(tmp3, dst);
436   }
437 
438   g1_write_barrier_pre(masm,
439                        tmp3 /* obj */,
440                        tmp2 /* pre_val */,
441                        rthread /* thread */,
442                        tmp1  /* tmp1 */,
443                        rscratch2  /* tmp2 */,
444                        val != noreg /* tosca_live */,
445                        false /* expand_call */);
446 
447   if (val == noreg) {
448     BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg);
449   } else {
450     // G1 barrier needs uncompressed oop for region cross check.
451     Register new_val = val;
452     if (UseCompressedOops) {
453       new_val = rscratch2;
454       __ mov(new_val, val);
455     }
456     BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
457     g1_write_barrier_post(masm,
458                           tmp3 /* store_adr */,
459                           new_val /* new_val */,
460                           rthread /* thread */,
461                           tmp1 /* tmp1 */,
462                           tmp2 /* tmp2 */);
463   }
464 
465 }
466 
467 #ifdef COMPILER1
468 
469 #undef __
470 #define __ ce->masm()->
471 
472 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
473   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
474   // At this point we know that marking is in progress.
475   // If do_load() is true then we have to emit the
476   // load of the previous value; otherwise it has already
477   // been loaded into _pre_val.
478 
479   __ bind(*stub->entry());
480 
481   assert(stub->pre_val()->is_register(), "Precondition.");
482 
483   Register pre_val_reg = stub->pre_val()->as_register();
484 
485   if (stub->do_load()) {
486     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
487   }
488   __ cbz(pre_val_reg, *stub->continuation());
489   ce->store_parameter(stub->pre_val()->as_register(), 0);
490   __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
491   __ b(*stub->continuation());
492 }
493 
494 void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
495   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
496   __ bind(*stub->entry());
497   assert(stub->addr()->is_register(), "Precondition.");
498   assert(stub->new_val()->is_register(), "Precondition.");
499   Register new_val_reg = stub->new_val()->as_register();
500   __ cbz(new_val_reg, *stub->continuation());
501   ce->store_parameter(stub->addr()->as_pointer_register(), 0);
502   __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
503   __ b(*stub->continuation());
504 }
505 
506 #undef __
507 
508 #define __ sasm->
509 
510 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
511   __ prologue("g1_pre_barrier", false);
512 
513   // arg0 : previous value of memory
514 
515   BarrierSet* bs = BarrierSet::barrier_set();
516 
517   const Register pre_val = r0;
518   const Register thread = rthread;
519   const Register tmp = rscratch1;
520 
521   Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
522   Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
523   Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
524 
525   Label done;
526   Label runtime;
527 
528   // Is marking still active?
529   if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
530     __ ldrw(tmp, in_progress);
531   } else {
532     assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
533     __ ldrb(tmp, in_progress);
534   }
535   __ cbzw(tmp, done);
536 
537   // Can we store original value in the thread's buffer?
538   __ ldr(tmp, queue_index);
539   __ cbz(tmp, runtime);
540 
541   __ sub(tmp, tmp, wordSize);
542   __ str(tmp, queue_index);
543   __ ldr(rscratch2, buffer);
544   __ add(tmp, tmp, rscratch2);
545   __ load_parameter(0, rscratch2);
546   __ str(rscratch2, Address(tmp, 0));
547   __ b(done);
548 
549   __ bind(runtime);
550   __ push_call_clobbered_registers();
551   __ load_parameter(0, pre_val);
552   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
553   __ pop_call_clobbered_registers();
554   __ bind(done);
555 
556   __ epilogue();
557 }
558 
559 void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
560   __ prologue("g1_post_barrier", false);
561 
562   // arg0: store_address
563   Address store_addr(rfp, 2*BytesPerWord);
564 
565   BarrierSet* bs = BarrierSet::barrier_set();
566   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
567   CardTable* ct = ctbs->card_table();
568 
569   Label done;
570   Label runtime;
571 
572   // At this point we know new_value is non-null and the new_value crosses regions.
573   // Must check to see if card is already dirty
574 
575   const Register thread = rthread;
576 
577   Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
578   Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
579 
580   const Register card_offset = rscratch2;
581   // LR is free here, so we can use it to hold the byte_map_base.
582   const Register byte_map_base = lr;
583 
584   assert_different_registers(card_offset, byte_map_base, rscratch1);
585 
586   __ load_parameter(0, card_offset);
587   __ lsr(card_offset, card_offset, CardTable::card_shift());
588   __ load_byte_map_base(byte_map_base);
589   __ ldrb(rscratch1, Address(byte_map_base, card_offset));
590   __ cmpw(rscratch1, (int)G1CardTable::g1_young_card_val());
591   __ br(Assembler::EQ, done);
592 
593   assert((int)CardTable::dirty_card_val() == 0, "must be 0");
594 
595   __ membar(Assembler::StoreLoad);
596   __ ldrb(rscratch1, Address(byte_map_base, card_offset));
597   __ cbzw(rscratch1, done);
598 
599   // storing region crossing non-null, card is clean.
600   // dirty card and log.
601   __ strb(zr, Address(byte_map_base, card_offset));
602 
603   // Convert card offset into an address in card_addr
604   Register card_addr = card_offset;
605   __ add(card_addr, byte_map_base, card_addr);
606 
607   __ ldr(rscratch1, queue_index);
608   __ cbz(rscratch1, runtime);
609   __ sub(rscratch1, rscratch1, wordSize);
610   __ str(rscratch1, queue_index);
611 
612   // Reuse LR to hold buffer_addr
613   const Register buffer_addr = lr;
614 
615   __ ldr(buffer_addr, buffer);
616   __ str(card_addr, Address(buffer_addr, rscratch1));
617   __ b(done);
618 
619   __ bind(runtime);
620   __ push_call_clobbered_registers();
621   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
622   __ pop_call_clobbered_registers();
623   __ bind(done);
624   __ epilogue();
625 }
626 
627 #undef __
628 
629 #endif // COMPILER1