1 /* 2 * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.inline.hpp" 26 #include "gc/g1/g1BarrierSet.hpp" 27 #include "gc/g1/g1BarrierSetAssembler.hpp" 28 #include "gc/g1/g1BarrierSetRuntime.hpp" 29 #include "gc/g1/g1CardTable.hpp" 30 #include "gc/g1/g1HeapRegion.hpp" 31 #include "gc/g1/g1ThreadLocalData.hpp" 32 #include "gc/shared/collectedHeap.hpp" 33 #include "interpreter/interp_masm.hpp" 34 #include "runtime/javaThread.hpp" 35 #include "runtime/sharedRuntime.hpp" 36 #ifdef COMPILER1 37 #include "c1/c1_LIRAssembler.hpp" 38 #include "c1/c1_MacroAssembler.hpp" 39 #include "gc/g1/c1/g1BarrierSetC1.hpp" 40 #endif // COMPILER1 41 #ifdef COMPILER2 42 #include "gc/g1/c2/g1BarrierSetC2.hpp" 43 #endif // COMPILER2 44 45 #define __ masm-> 46 47 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, 48 Register addr, Register count, RegSet saved_regs) { 49 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 50 if (!dest_uninitialized) { 51 Label done; 52 Address in_progress(rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 53 54 // Is marking active? 55 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 56 __ ldrw(rscratch1, in_progress); 57 } else { 58 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 59 __ ldrb(rscratch1, in_progress); 60 } 61 __ cbzw(rscratch1, done); 62 63 __ push(saved_regs, sp); 64 if (count == c_rarg0) { 65 if (addr == c_rarg1) { 66 // exactly backwards!! 67 __ mov(rscratch1, c_rarg0); 68 __ mov(c_rarg0, c_rarg1); 69 __ mov(c_rarg1, rscratch1); 70 } else { 71 __ mov(c_rarg1, count); 72 __ mov(c_rarg0, addr); 73 } 74 } else { 75 __ mov(c_rarg0, addr); 76 __ mov(c_rarg1, count); 77 } 78 if (UseCompressedOops) { 79 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); 80 } else { 81 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); 82 } 83 __ pop(saved_regs, sp); 84 85 __ bind(done); 86 } 87 } 88 89 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, 90 DecoratorSet decorators, 91 Register start, 92 Register count, 93 Register scratch, 94 RegSet saved_regs) { 95 96 Label done; 97 Label loop; 98 Label next; 99 100 __ cbz(count, done); 101 102 // Calculate the number of card marks to set. Since the object might start and 103 // end within a card, we need to calculate this via the card table indexes of 104 // the actual start and last addresses covered by the object. 105 // Temporarily use the count register for the last element address. 106 __ lea(count, Address(start, count, Address::lsl(LogBytesPerHeapOop))); // end = start + count << LogBytesPerHeapOop 107 __ sub(count, count, BytesPerHeapOop); // Use last element address for end. 108 109 __ lsr(start, start, CardTable::card_shift()); 110 __ lsr(count, count, CardTable::card_shift()); 111 __ sub(count, count, start); // Number of bytes to mark - 1. 112 113 // Add card table base offset to start. 114 __ ldr(scratch, Address(rthread, in_bytes(G1ThreadLocalData::card_table_base_offset()))); 115 __ add(start, start, scratch); 116 117 __ bind(loop); 118 if (UseCondCardMark) { 119 __ ldrb(scratch, Address(start, count)); 120 // Instead of loading clean_card_val and comparing, we exploit the fact that 121 // the LSB of non-clean cards is always 0, and the LSB of clean cards 1. 122 __ tbz(scratch, 0, next); 123 } 124 static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr"); 125 __ strb(zr, Address(start, count)); 126 __ bind(next); 127 __ subs(count, count, 1); 128 __ br(Assembler::GE, loop); 129 130 __ bind(done); 131 } 132 133 static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, 134 const Register thread, const Register value, const Register temp1, const Register temp2) { 135 // Can we store a value in the given thread's buffer? 136 // (The index field is typed as size_t.) 137 __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address) 138 __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer) 139 // The buffer is not full, store value into it. 140 __ sub(temp1, temp1, wordSize); // temp1 := next index 141 __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index 142 __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address 143 __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value 144 } 145 146 static void generate_pre_barrier_fast_path(MacroAssembler* masm, 147 const Register thread, 148 const Register tmp1) { 149 Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 150 // Is marking active? 151 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 152 __ ldrw(tmp1, in_progress); 153 } else { 154 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 155 __ ldrb(tmp1, in_progress); 156 } 157 } 158 159 static void generate_pre_barrier_slow_path(MacroAssembler* masm, 160 const Register obj, 161 const Register pre_val, 162 const Register thread, 163 const Register tmp1, 164 const Register tmp2, 165 Label& done, 166 Label& runtime) { 167 // Do we need to load the previous value? 168 if (obj != noreg) { 169 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 170 } 171 // Is the previous value null? 172 __ cbz(pre_val, done); 173 generate_queue_test_and_insertion(masm, 174 G1ThreadLocalData::satb_mark_queue_index_offset(), 175 G1ThreadLocalData::satb_mark_queue_buffer_offset(), 176 runtime, 177 thread, pre_val, tmp1, tmp2); 178 __ b(done); 179 } 180 181 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, 182 Register obj, 183 Register pre_val, 184 Register thread, 185 Register tmp1, 186 Register tmp2, 187 bool tosca_live, 188 bool expand_call) { 189 // If expand_call is true then we expand the call_VM_leaf macro 190 // directly to skip generating the check by 191 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 192 193 assert(thread == rthread, "must be"); 194 195 Label done; 196 Label runtime; 197 198 assert_different_registers(obj, pre_val, tmp1, tmp2); 199 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); 200 201 generate_pre_barrier_fast_path(masm, thread, tmp1); 202 // If marking is not active (*(mark queue active address) == 0), jump to done 203 __ cbzw(tmp1, done); 204 generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime); 205 206 __ bind(runtime); 207 208 __ push_call_clobbered_registers(); 209 210 // Calling the runtime using the regular call_VM_leaf mechanism generates 211 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 212 // that checks that the *(rfp+frame::interpreter_frame_last_sp) == nullptr. 213 // 214 // If we care generating the pre-barrier without a frame (e.g. in the 215 // intrinsified Reference.get() routine) then rfp might be pointing to 216 // the caller frame and so this check will most likely fail at runtime. 217 // 218 // Expanding the call directly bypasses the generation of the check. 219 // So when we do not have have a full interpreter frame on the stack 220 // expand_call should be passed true. 221 222 if (expand_call) { 223 assert(pre_val != c_rarg1, "smashed arg"); 224 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); 225 } else { 226 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); 227 } 228 229 __ pop_call_clobbered_registers(); 230 231 __ bind(done); 232 233 } 234 235 static void generate_post_barrier_fast_path(MacroAssembler* masm, 236 const Register store_addr, 237 const Register new_val, 238 const Register thread, 239 const Register tmp1, 240 const Register tmp2, 241 Label& done, 242 bool new_val_may_be_null) { 243 assert(thread == rthread, "must be"); 244 assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg, rscratch1); 245 246 // Does store cross heap regions? 247 __ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value 248 __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes) 249 __ cbz(tmp1, done); 250 // Crosses regions, storing null? 251 if (new_val_may_be_null) { 252 __ cbz(new_val, done); 253 } 254 // Storing region crossing non-null. 255 __ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base 256 257 Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset())); 258 __ ldr(tmp2, card_table_addr); // tmp2 := card table base address 259 if (UseCondCardMark) { 260 __ ldrb(rscratch1, Address(tmp1, tmp2)); // rscratch1 := card 261 // Instead of loading clean_card_val and comparing, we exploit the fact that 262 // the LSB of non-clean cards is always 0, and the LSB of clean cards 1. 263 __ tbz(rscratch1, 0, done); 264 } 265 static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr"); 266 __ strb(zr, Address(tmp1, tmp2)); // *(card address) := dirty_card_val 267 } 268 269 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, 270 Register store_addr, 271 Register new_val, 272 Register thread, 273 Register tmp1, 274 Register tmp2) { 275 Label done; 276 generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, false /* new_val_may_be_null */); 277 __ bind(done); 278 } 279 280 #if defined(COMPILER2) 281 282 static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { 283 SaveLiveRegisters save_registers(masm, stub); 284 if (c_rarg0 != arg) { 285 __ mov(c_rarg0, arg); 286 } 287 __ mov(c_rarg1, rthread); 288 __ mov(rscratch1, runtime_path); 289 __ blr(rscratch1); 290 } 291 292 void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, 293 Register obj, 294 Register pre_val, 295 Register thread, 296 Register tmp1, 297 Register tmp2, 298 G1PreBarrierStubC2* stub) { 299 assert(thread == rthread, "must be"); 300 assert_different_registers(obj, pre_val, tmp1, tmp2); 301 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); 302 303 stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2); 304 305 generate_pre_barrier_fast_path(masm, thread, tmp1); 306 // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) 307 __ cbnzw(tmp1, *stub->entry()); 308 309 __ bind(*stub->continuation()); 310 } 311 312 void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, 313 G1PreBarrierStubC2* stub) const { 314 Assembler::InlineSkippedInstructionsCounter skip_counter(masm); 315 Label runtime; 316 Register obj = stub->obj(); 317 Register pre_val = stub->pre_val(); 318 Register thread = stub->thread(); 319 Register tmp1 = stub->tmp1(); 320 Register tmp2 = stub->tmp2(); 321 322 __ bind(*stub->entry()); 323 generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime); 324 325 __ bind(runtime); 326 generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); 327 __ b(*stub->continuation()); 328 } 329 330 void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, 331 Register store_addr, 332 Register new_val, 333 Register thread, 334 Register tmp1, 335 Register tmp2, 336 bool new_val_may_be_null) { 337 Label done; 338 generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null); 339 __ bind(done); 340 } 341 342 #endif // COMPILER2 343 344 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 345 Register dst, Address src, Register tmp1, Register tmp2) { 346 bool on_oop = is_reference_type(type); 347 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 348 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 349 bool on_reference = on_weak || on_phantom; 350 ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2); 351 if (on_oop && on_reference) { 352 // LR is live. It must be saved around calls. 353 __ enter(/*strip_ret_addr*/true); // barrier may call runtime 354 // Generate the G1 pre-barrier code to log the value of 355 // the referent field in an SATB buffer. 356 g1_write_barrier_pre(masm /* masm */, 357 noreg /* obj */, 358 dst /* pre_val */, 359 rthread /* thread */, 360 tmp1 /* tmp1 */, 361 tmp2 /* tmp2 */, 362 true /* tosca_live */, 363 true /* expand_call */); 364 __ leave(); 365 } 366 } 367 368 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 369 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { 370 // flatten object address if needed 371 if (dst.index() == noreg && dst.offset() == 0) { 372 if (dst.base() != tmp3) { 373 __ mov(tmp3, dst.base()); 374 } 375 } else { 376 __ lea(tmp3, dst); 377 } 378 379 g1_write_barrier_pre(masm, 380 tmp3 /* obj */, 381 tmp2 /* pre_val */, 382 rthread /* thread */, 383 tmp1 /* tmp1 */, 384 rscratch2 /* tmp2 */, 385 val != noreg /* tosca_live */, 386 false /* expand_call */); 387 388 if (val == noreg) { 389 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg); 390 } else { 391 // G1 barrier needs uncompressed oop for region cross check. 392 Register new_val = val; 393 if (UseCompressedOops) { 394 new_val = rscratch2; 395 __ mov(new_val, val); 396 } 397 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); 398 g1_write_barrier_post(masm, 399 tmp3 /* store_adr */, 400 new_val /* new_val */, 401 rthread /* thread */, 402 tmp1 /* tmp1 */, 403 tmp2 /* tmp2 */); 404 } 405 406 } 407 408 #ifdef COMPILER1 409 410 #undef __ 411 #define __ ce->masm()-> 412 413 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { 414 G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 415 // At this point we know that marking is in progress. 416 // If do_load() is true then we have to emit the 417 // load of the previous value; otherwise it has already 418 // been loaded into _pre_val. 419 420 __ bind(*stub->entry()); 421 422 assert(stub->pre_val()->is_register(), "Precondition."); 423 424 Register pre_val_reg = stub->pre_val()->as_register(); 425 426 if (stub->do_load()) { 427 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); 428 } 429 __ cbz(pre_val_reg, *stub->continuation()); 430 ce->store_parameter(stub->pre_val()->as_register(), 0); 431 __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 432 __ b(*stub->continuation()); 433 } 434 435 #undef __ 436 437 void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm, 438 Register store_addr, 439 Register new_val, 440 Register thread, 441 Register tmp1, 442 Register tmp2) { 443 Label done; 444 generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */); 445 masm->bind(done); 446 } 447 448 #define __ sasm-> 449 450 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 451 __ prologue("g1_pre_barrier", false); 452 453 // arg0 : previous value of memory 454 455 BarrierSet* bs = BarrierSet::barrier_set(); 456 457 const Register pre_val = r0; 458 const Register thread = rthread; 459 const Register tmp = rscratch1; 460 461 Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 462 Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); 463 Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); 464 465 Label done; 466 Label runtime; 467 468 // Is marking still active? 469 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 470 __ ldrw(tmp, in_progress); 471 } else { 472 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 473 __ ldrb(tmp, in_progress); 474 } 475 __ cbzw(tmp, done); 476 477 // Can we store original value in the thread's buffer? 478 __ ldr(tmp, queue_index); 479 __ cbz(tmp, runtime); 480 481 __ sub(tmp, tmp, wordSize); 482 __ str(tmp, queue_index); 483 __ ldr(rscratch2, buffer); 484 __ add(tmp, tmp, rscratch2); 485 __ load_parameter(0, rscratch2); 486 __ str(rscratch2, Address(tmp, 0)); 487 __ b(done); 488 489 __ bind(runtime); 490 __ push_call_clobbered_registers(); 491 __ load_parameter(0, pre_val); 492 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); 493 __ pop_call_clobbered_registers(); 494 __ bind(done); 495 496 __ epilogue(); 497 } 498 499 #undef __ 500 501 #endif // COMPILER1