1 /*
  2  * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2018, 2026 SAP SE. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "asm/macroAssembler.inline.hpp"
 27 #include "gc/g1/g1BarrierSet.hpp"
 28 #include "gc/g1/g1BarrierSetAssembler.hpp"
 29 #include "gc/g1/g1BarrierSetRuntime.hpp"
 30 #include "gc/g1/g1CardTable.hpp"
 31 #include "gc/g1/g1HeapRegion.hpp"
 32 #include "gc/g1/g1SATBMarkQueueSet.hpp"
 33 #include "gc/g1/g1ThreadLocalData.hpp"
 34 #include "interpreter/interp_masm.hpp"
 35 #include "runtime/jniHandles.hpp"
 36 #include "runtime/sharedRuntime.hpp"
 37 #include "utilities/macros.hpp"
 38 #ifdef COMPILER1
 39 #include "c1/c1_LIRAssembler.hpp"
 40 #include "c1/c1_MacroAssembler.hpp"
 41 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 42 #endif // COMPILER1
 43 #ifdef COMPILER2
 44 #include "gc/g1/c2/g1BarrierSetC2.hpp"
 45 #endif // COMPILER2
 46 
 47 #define __ masm->
 48 
 49 static void generate_marking_inactive_test(MacroAssembler* masm) {
 50   int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
 51   assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 52   __ lbz(R0, active_offset, R16_thread);  // tmp1 := *(mark queue active address)
 53   __ cmpwi(CR0, R0, 0);
 54 }
 55 
 56 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
 57                                                             Register from, Register to, Register count,
 58                                                             Register preserve1, Register preserve2) {
 59   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
 60   // With G1, don't generate the call if we statically know that the target in uninitialized
 61   if (!dest_uninitialized) {
 62     int spill_slots = 3;
 63     if (preserve1 != noreg) { spill_slots++; }
 64     if (preserve2 != noreg) { spill_slots++; }
 65     const int frame_size = align_up(frame::native_abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
 66     Label filtered;
 67 
 68     // Is marking active?
 69     generate_marking_inactive_test(masm);
 70     __ beq(CR0, filtered);
 71 
 72     __ save_LR(R0);
 73     __ push_frame(frame_size, R0);
 74     int slot_nr = 0;
 75     __ std(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
 76     __ std(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
 77     __ std(count, frame_size - (++slot_nr) * wordSize, R1_SP);
 78     if (preserve1 != noreg) { __ std(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
 79     if (preserve2 != noreg) { __ std(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
 80 
 81     if (UseCompressedOops) {
 82       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), to, count);
 83     } else {
 84       __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), to, count);
 85     }
 86 
 87     slot_nr = 0;
 88     __ ld(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
 89     __ ld(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
 90     __ ld(count, frame_size - (++slot_nr) * wordSize, R1_SP);
 91     if (preserve1 != noreg) { __ ld(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
 92     if (preserve2 != noreg) { __ ld(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
 93     __ addi(R1_SP, R1_SP, frame_size); // pop_frame()
 94     __ restore_LR(R0);
 95 
 96     __ bind(filtered);
 97   }
 98 }
 99 
100 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
101                                                              Register addr, Register count, Register preserve) {
102   int spill_slots = (preserve != noreg) ? 1 : 0;
103   const int frame_size = align_up(frame::native_abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
104 
105   __ save_LR(R0);
106   __ push_frame(frame_size, R0);
107   if (preserve != noreg) { __ std(preserve, frame_size - 1 * wordSize, R1_SP); }
108   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), addr, count);
109   if (preserve != noreg) { __ ld(preserve, frame_size - 1 * wordSize, R1_SP); }
110   __ addi(R1_SP, R1_SP, frame_size); // pop_frame();
111   __ restore_LR(R0);
112 }
113 
114 static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
115                                      const Register value, const Register temp) {
116   assert_different_registers(value, temp);
117   // Can we store a value in the given thread's buffer?
118   // (The index field is typed as size_t.)
119   __ ld(temp, in_bytes(index_offset), R16_thread);  // temp := *(index address)
120   __ cmpdi(CR0, temp, 0);                          // jump to runtime if index == 0 (full buffer)
121   __ beq(CR0, runtime);
122   // The buffer is not full, store value into it.
123   __ ld(R0, in_bytes(buffer_offset), R16_thread);   // R0 := buffer address
124   __ addi(temp, temp, -wordSize);                   // temp := next index
125   __ std(temp, in_bytes(index_offset), R16_thread); // *(index address) := next index
126   __ stdx(value, temp, R0);                         // *(buffer address + next index) := value
127 }
128 
129 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators,
130                                                  Register obj, RegisterOrConstant ind_or_offs, Register pre_val,
131                                                  Register tmp1, Register tmp2,
132                                                  MacroAssembler::PreservationLevel preservation_level) {
133   assert_different_registers(pre_val, tmp1, tmp2);
134 
135   bool not_null  = (decorators & IS_NOT_NULL) != 0,
136        preloaded = obj == noreg;
137   Register nv_save = noreg;
138 
139   // Determine necessary runtime invocation preservation measures
140   const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR;
141   const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS;
142   const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS;
143   int nbytes_save = 0;
144 
145   if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) {
146     // We are not loading the previous value so make
147     // sure that we don't trash the value in pre_val
148     // with the code below.
149     nv_save = !tmp1->is_volatile() ? tmp1 : tmp2;
150     assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register");
151   }
152 
153   Label runtime, filtered;
154 
155   generate_marking_inactive_test(masm);
156   __ beq(CR0, filtered);
157 
158   // Do we need to load the previous value?
159   if (!preloaded) {
160     // Load the previous value...
161     if (UseCompressedOops) {
162       __ lwz(pre_val, ind_or_offs, obj);
163     } else {
164       __ ld(pre_val, ind_or_offs, obj);
165     }
166     // Previous value has been loaded into Rpre_val.
167   }
168   assert(pre_val != noreg, "must have a real register");
169 
170   // Is the previous value null?
171   if (preloaded && not_null) {
172 #ifdef ASSERT
173     __ cmpdi(CR0, pre_val, 0);
174     __ asm_assert_ne("null oop not allowed (G1 pre)"); // Checked by caller.
175 #endif
176   } else {
177     __ cmpdi(CR0, pre_val, 0);
178     __ beq(CR0, filtered);
179   }
180 
181   if (!preloaded && UseCompressedOops) {
182     __ decode_heap_oop_not_null(pre_val);
183   }
184 
185   // OK, it's not filtered, so we'll need to call enqueue. In the normal
186   // case, pre_val will be a scratch G-reg, but there are some cases in
187   // which it's an O-reg. In the first case, do a normal call. In the
188   // latter, do a save here and call the frameless version.
189 
190   // Can we store original value in the thread's buffer?
191   // Is index == 0?
192   // (The index field is typed as size_t.)
193   generate_queue_insertion(masm, G1ThreadLocalData::satb_mark_queue_index_offset(), G1ThreadLocalData::satb_mark_queue_buffer_offset(),
194                            runtime, pre_val, tmp1);
195   __ b(filtered);
196 
197   __ bind(runtime);
198 
199   // May need to preserve LR. Also needed if current frame is not compatible with C calling convention.
200   if (needs_frame) {
201     if (preserve_gp_registers) {
202       nbytes_save = (MacroAssembler::num_volatile_gp_regs
203                      + (preserve_fp_registers ? MacroAssembler::num_volatile_fp_regs : 0)
204                     ) * BytesPerWord;
205       __ save_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
206     }
207 
208     __ save_LR(tmp1);
209     __ push_frame_reg_args(nbytes_save, tmp2);
210   }
211 
212   if (nv_save != noreg) {
213     __ mr(nv_save, pre_val); // Save pre_val across C call if it was preloaded.
214   }
215   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, R16_thread);
216   if (nv_save != noreg) {
217     __ mr(pre_val, nv_save); // restore
218   }
219 
220   if (needs_frame) {
221     __ pop_frame();
222     __ restore_LR(tmp1);
223 
224     if (preserve_gp_registers) {
225       __ restore_volatile_gprs(R1_SP, -nbytes_save, preserve_fp_registers);
226     }
227   }
228 
229   __ bind(filtered);
230 }
231 
232 static void generate_post_barrier(MacroAssembler* masm,
233                                   const Register store_addr,
234                                   const Register new_val,
235                                   const Register thread,
236                                   const Register tmp1,
237                                   const Register tmp2,
238                                   Label& done,
239                                   bool new_val_may_be_null) {
240   assert_different_registers(store_addr, new_val, tmp1, R0);
241   assert_different_registers(store_addr, tmp1, tmp2, R0);
242 
243   __ xorr(R0, store_addr, new_val);                          // R0 := store address ^ new value
244   __ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes);         // R0 := ((store address ^ new value) >> LogOfHRGrainBytes)
245   __ beq(CR0, done);
246 
247   // Crosses regions, storing null?
248   if (!new_val_may_be_null) {
249 #ifdef ASSERT
250     __ cmpdi(CR0, new_val, 0);
251     __ asm_assert_ne("null oop not allowed (G1 post)");      // Checked by caller.
252 #endif
253   } else {
254     __ cmpdi(CR0, new_val, 0);
255     __ beq(CR0, done);
256   }
257 
258   __ ld(tmp1, G1ThreadLocalData::card_table_base_offset(), thread);
259   __ srdi(tmp2, store_addr, CardTable::card_shift());        // tmp2 := card address relative to card table base
260   if (UseCondCardMark) {
261     __ lbzx(R0, tmp1, tmp2);
262     __ cmpwi(CR0, R0, (int)G1CardTable::clean_card_val());
263     __ bne(CR0, done);
264   }
265 
266   __ li(R0, G1CardTable::dirty_card_val());
267   __ stbx(R0, tmp1, tmp2);
268 }
269 
270 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators,
271                                                   Register store_addr, Register new_val,
272                                                   Register tmp1, Register tmp2) {
273   bool not_null = (decorators & IS_NOT_NULL) != 0;
274 
275   Label done;
276   generate_post_barrier(masm, store_addr, new_val, R16_thread, tmp1, tmp2, done, !not_null);
277   __ bind(done);
278 }
279 
280 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
281                                        Register base, RegisterOrConstant ind_or_offs, Register val,
282                                        Register tmp1, Register tmp2, Register tmp3,
283                                        MacroAssembler::PreservationLevel preservation_level) {
284   bool in_heap = (decorators & IN_HEAP) != 0;
285   bool as_normal = (decorators & AS_NORMAL) != 0;
286   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
287   bool needs_pre_barrier = as_normal && !dest_uninitialized;
288   bool needs_post_barrier = (val != noreg && in_heap);
289   bool is_array = (decorators & IS_ARRAY) != 0;
290   bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
291   bool precise = is_array || on_anonymous;
292 
293   // Load and record the previous value.
294   if (needs_pre_barrier) {
295     g1_write_barrier_pre(masm, decorators,
296                          base, ind_or_offs,
297                          tmp1, tmp2, tmp3,
298                          preservation_level);
299   }
300 
301   BarrierSetAssembler::store_at(masm, decorators,
302                                 type, base, ind_or_offs, val,
303                                 tmp1, tmp2, tmp3,
304                                 preservation_level);
305 
306   // No need for post barrier if storing null
307   if (needs_post_barrier) {
308     if (precise) {
309       if (ind_or_offs.is_constant()) {
310         __ add_const_optimized(base, base, ind_or_offs.as_constant(), tmp1);
311       } else {
312         __ add(base, ind_or_offs.as_register(), base);
313       }
314     }
315     g1_write_barrier_post(masm, decorators,
316                           base, val,
317                           tmp1, tmp2);
318   }
319 }
320 
321 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
322                                     Register base, RegisterOrConstant ind_or_offs, Register dst,
323                                     Register tmp1, Register tmp2,
324                                     MacroAssembler::PreservationLevel preservation_level, Label *L_handle_null) {
325   bool on_oop = is_reference_type(type);
326   bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
327   bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
328   bool on_reference = on_weak || on_phantom;
329   Label done;
330   if (on_oop && on_reference && L_handle_null == nullptr) { L_handle_null = &done; }
331   // Load the value of the referent field.
332   CardTableBarrierSetAssembler::load_at(masm, decorators, type,
333                                         base, ind_or_offs, dst,
334                                         tmp1, tmp2,
335                                         preservation_level, L_handle_null);
336   if (on_oop && on_reference) {
337     // Generate the G1 pre-barrier code to log the value of
338     // the referent field in an SATB buffer. Note with
339     // these parameters the pre-barrier does not generate
340     // the load of the previous value
341     // We only reach here if value is not null.
342     g1_write_barrier_pre(masm, decorators | IS_NOT_NULL,
343                          noreg /* obj */, (intptr_t)0, dst /* pre_val */,
344                          tmp1, tmp2,
345                          preservation_level);
346   }
347   __ bind(done);
348 }
349 
350 void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value,
351                                             Register tmp1, Register tmp2,
352                                             MacroAssembler::PreservationLevel preservation_level) {
353   Label done, not_weak;
354   __ cmpdi(CR0, value, 0);
355   __ beq(CR0, done);         // Use null as-is.
356 
357   __ clrrdi(tmp1, value, JNIHandles::tag_size);
358   __ andi_(tmp2, value, JNIHandles::TypeTag::weak_global);
359   __ ld(value, 0, tmp1);      // Resolve (untagged) jobject.
360 
361   __ beq(CR0, not_weak);     // Test for jweak tag.
362   __ verify_oop(value, FILE_AND_LINE);
363   g1_write_barrier_pre(masm, IN_NATIVE | ON_PHANTOM_OOP_REF,
364                        noreg, noreg, value,
365                        tmp1, tmp2,
366                        preservation_level);
367   __ bind(not_weak);
368   __ verify_oop(value, FILE_AND_LINE);
369   __ bind(done);
370 }
371 
372 #ifdef COMPILER2
373 
374 static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) {
375   SaveLiveRegisters save_registers(masm, stub);
376   __ call_VM_leaf(runtime_path, arg, R16_thread);
377 }
378 
379 void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
380                                                     Register obj,
381                                                     Register pre_val,
382                                                     Register tmp1,
383                                                     Register tmp2,
384                                                     G1PreBarrierStubC2* stub) {
385   assert_different_registers(obj, tmp1, tmp2, R0);
386   assert_different_registers(pre_val, tmp1, R0);
387   assert(!UseCompressedOops || tmp2 != noreg, "tmp2 needed with CompressedOops");
388 
389   stub->initialize_registers(obj, pre_val, R16_thread, tmp1, tmp2);
390 
391   generate_marking_inactive_test(masm);
392   __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CR0, Assembler::equal), *stub->entry());
393 
394   __ bind(*stub->continuation());
395 }
396 
397 void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
398                                                          G1PreBarrierStubC2* stub) const {
399   Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
400   Label runtime;
401   Register obj = stub->obj();
402   Register pre_val = stub->pre_val();
403   Register tmp1 = stub->tmp1();
404 
405   __ bind(*stub->entry());
406 
407   if (obj != noreg) {
408     // Note: C2 currently doesn't use implicit null checks with barriers.
409     // Otherwise, obj could be null and the following instruction would raise a SIGSEGV.
410     if (UseCompressedOops) {
411       __ lwz(pre_val, 0, obj);
412     } else {
413       __ ld(pre_val, 0, obj);
414     }
415   }
416   __ cmpdi(CR0, pre_val, 0);
417   __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *stub->continuation());
418 
419   Register pre_val_decoded = pre_val;
420   if (UseCompressedOops) {
421     pre_val_decoded = __ decode_heap_oop_not_null(stub->tmp2(), pre_val);
422   }
423 
424   generate_queue_insertion(masm,
425                            G1ThreadLocalData::satb_mark_queue_index_offset(),
426                            G1ThreadLocalData::satb_mark_queue_buffer_offset(),
427                            runtime, pre_val_decoded, tmp1);
428   __ b(*stub->continuation());
429 
430   __ bind(runtime);
431   generate_c2_barrier_runtime_call(masm, stub, pre_val_decoded, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
432   __ b(*stub->continuation());
433 }
434 
435 void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
436                                                      Register store_addr,
437                                                      Register new_val,
438                                                      Register tmp1,
439                                                      Register tmp2,
440                                                      bool new_val_may_be_null,
441                                                      bool decode_new_val) {
442   assert_different_registers(store_addr, new_val, tmp1, R0);
443   assert_different_registers(store_addr, tmp1, tmp2, R0);
444 
445   Label done;
446 
447   Register new_val_decoded = new_val;
448 
449   if (decode_new_val) {
450     assert(UseCompressedOops, "or should not be here");
451     if (new_val_may_be_null && CompressedOops::base() != nullptr) {
452       // We prefer doing the null check after the region crossing check.
453       // Only compressed oop modes with base != null require a null check here.
454       __ cmpwi(CR0, new_val, 0);
455       __ beq(CR0, done);
456       new_val_may_be_null = false;
457     }
458     new_val_decoded = __ decode_heap_oop_not_null(tmp2, new_val);
459   }
460 
461   generate_post_barrier(masm, store_addr, new_val_decoded, R16_thread, tmp1, tmp2, done, new_val_may_be_null);
462   __ bind(done);
463 }
464 
465 #endif // COMPILER2
466 
467 #ifdef COMPILER1
468 
469 #undef __
470 #define __ ce->masm()->
471 
472 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
473   G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
474   // At this point we know that marking is in progress.
475   // If do_load() is true then we have to emit the
476   // load of the previous value; otherwise it has already
477   // been loaded into _pre_val.
478 
479   __ bind(*stub->entry());
480 
481   assert(stub->pre_val()->is_register(), "Precondition.");
482   Register pre_val_reg = stub->pre_val()->as_register();
483 
484   if (stub->do_load()) {
485     ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
486   }
487 
488   __ cmpdi(CR0, pre_val_reg, 0);
489   __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *stub->continuation());
490 
491   address c_code = bs->pre_barrier_c1_runtime_code_blob()->code_begin();
492   //__ load_const_optimized(R0, c_code);
493   __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
494   __ std(pre_val_reg, -8, R1_SP); // Pass pre_val on stack.
495   __ mtctr(R0);
496   __ bctrl();
497   __ b(*stub->continuation());
498 }
499 
500 #undef __
501 
502 void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
503                                                      Register store_addr,
504                                                      Register new_val,
505                                                      Register thread,
506                                                      Register tmp1,
507                                                      Register tmp2) {
508   Label done;
509   generate_post_barrier(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
510   masm->bind(done);
511 }
512 
513 #define __ sasm->
514 
515 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
516   BarrierSet* bs = BarrierSet::barrier_set();
517 
518   __ set_info("g1_pre_barrier_slow_id", false);
519 
520   // Using stack slots: pre_val (pre-pushed), spill tmp, spill tmp2.
521   const int stack_slots = 3;
522   Register pre_val = R0; // previous value of memory
523   Register tmp  = R14;
524   Register tmp2 = R15;
525 
526   Label refill, restart, marking_not_active;
527   int satb_q_active_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
528   int satb_q_index_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
529   int satb_q_buf_byte_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
530 
531   // Spill
532   __ std(tmp, -16, R1_SP);
533   __ std(tmp2, -24, R1_SP);
534 
535   // Is marking still active?
536   generate_marking_inactive_test(sasm);
537   __ beq(CR0, marking_not_active);
538 
539   __ bind(restart);
540   // Load the index into the SATB buffer. SATBMarkQueue::_index is a
541   // size_t so ld_ptr is appropriate.
542   __ ld(tmp, satb_q_index_byte_offset, R16_thread);
543 
544   // index == 0?
545   __ cmpdi(CR0, tmp, 0);
546   __ beq(CR0, refill);
547 
548   __ ld(tmp2, satb_q_buf_byte_offset, R16_thread);
549   __ ld(pre_val, -8, R1_SP); // Load from stack.
550   __ addi(tmp, tmp, -oopSize);
551 
552   __ std(tmp, satb_q_index_byte_offset, R16_thread);
553   __ stdx(pre_val, tmp2, tmp); // [_buf + index] := <address_of_card>
554 
555   __ bind(marking_not_active);
556   // Restore temp registers and return-from-leaf.
557   __ ld(tmp2, -24, R1_SP);
558   __ ld(tmp, -16, R1_SP);
559   __ blr();
560 
561   __ bind(refill);
562   const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
563   __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
564   __ mflr(R0);
565   __ std(R0, _abi0(lr), R1_SP);
566   __ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
567   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1SATBMarkQueueSet::handle_zero_index_for_thread), R16_thread);
568   __ pop_frame();
569   __ ld(R0, _abi0(lr), R1_SP);
570   __ mtlr(R0);
571   __ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
572   __ b(restart);
573 }
574 
575 #undef __
576 
577 #endif // COMPILER1