1 /* 2 * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.inline.hpp" 26 #include "code/aotCodeCache.hpp" 27 #include "gc/g1/g1BarrierSet.hpp" 28 #include "gc/g1/g1BarrierSetAssembler.hpp" 29 #include "gc/g1/g1BarrierSetRuntime.hpp" 30 #include "gc/g1/g1CardTable.hpp" 31 #include "gc/g1/g1HeapRegion.hpp" 32 #include "gc/g1/g1ThreadLocalData.hpp" 33 #include "gc/shared/collectedHeap.hpp" 34 #include "interpreter/interp_masm.hpp" 35 #include "runtime/javaThread.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/g1/c1/g1BarrierSetC1.hpp" 41 #endif // COMPILER1 42 #ifdef COMPILER2 43 #include "gc/g1/c2/g1BarrierSetC2.hpp" 44 #endif // COMPILER2 45 46 #define __ masm-> 47 48 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, 49 Register addr, Register count, RegSet saved_regs) { 50 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 51 if (!dest_uninitialized) { 52 Label done; 53 Address in_progress(rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 54 55 // Is marking active? 56 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 57 __ ldrw(rscratch1, in_progress); 58 } else { 59 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 60 __ ldrb(rscratch1, in_progress); 61 } 62 __ cbzw(rscratch1, done); 63 64 __ push(saved_regs, sp); 65 if (count == c_rarg0) { 66 if (addr == c_rarg1) { 67 // exactly backwards!! 68 __ mov(rscratch1, c_rarg0); 69 __ mov(c_rarg0, c_rarg1); 70 __ mov(c_rarg1, rscratch1); 71 } else { 72 __ mov(c_rarg1, count); 73 __ mov(c_rarg0, addr); 74 } 75 } else { 76 __ mov(c_rarg0, addr); 77 __ mov(c_rarg1, count); 78 } 79 if (UseCompressedOops) { 80 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); 81 } else { 82 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); 83 } 84 __ pop(saved_regs, sp); 85 86 __ bind(done); 87 } 88 } 89 90 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, 91 DecoratorSet decorators, 92 Register start, 93 Register count, 94 Register scratch, 95 RegSet saved_regs) { 96 97 Label done; 98 Label loop; 99 Label next; 100 101 __ cbz(count, done); 102 103 // Calculate the number of card marks to set. Since the object might start and 104 // end within a card, we need to calculate this via the card table indexes of 105 // the actual start and last addresses covered by the object. 106 // Temporarily use the count register for the last element address. 107 __ lea(count, Address(start, count, Address::lsl(LogBytesPerHeapOop))); // end = start + count << LogBytesPerHeapOop 108 __ sub(count, count, BytesPerHeapOop); // Use last element address for end. 109 110 __ lsr(start, start, CardTable::card_shift()); 111 __ lsr(count, count, CardTable::card_shift()); 112 __ sub(count, count, start); // Number of bytes to mark - 1. 113 114 // Add card table base offset to start. 115 __ ldr(scratch, Address(rthread, in_bytes(G1ThreadLocalData::card_table_base_offset()))); 116 __ add(start, start, scratch); 117 118 __ bind(loop); 119 if (UseCondCardMark) { 120 __ ldrb(scratch, Address(start, count)); 121 // Instead of loading clean_card_val and comparing, we exploit the fact that 122 // the LSB of non-clean cards is always 0, and the LSB of clean cards 1. 123 __ tbz(scratch, 0, next); 124 } 125 static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr"); 126 __ strb(zr, Address(start, count)); 127 __ bind(next); 128 __ subs(count, count, 1); 129 __ br(Assembler::GE, loop); 130 131 __ bind(done); 132 } 133 134 static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, 135 const Register thread, const Register value, const Register temp1, const Register temp2) { 136 // Can we store a value in the given thread's buffer? 137 // (The index field is typed as size_t.) 138 __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address) 139 __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer) 140 // The buffer is not full, store value into it. 141 __ sub(temp1, temp1, wordSize); // temp1 := next index 142 __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index 143 __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address 144 __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value 145 } 146 147 static void generate_pre_barrier_fast_path(MacroAssembler* masm, 148 const Register thread, 149 const Register tmp1) { 150 Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 151 // Is marking active? 152 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 153 __ ldrw(tmp1, in_progress); 154 } else { 155 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 156 __ ldrb(tmp1, in_progress); 157 } 158 } 159 160 static void generate_pre_barrier_slow_path(MacroAssembler* masm, 161 const Register obj, 162 const Register pre_val, 163 const Register thread, 164 const Register tmp1, 165 const Register tmp2, 166 Label& done, 167 Label& runtime) { 168 // Do we need to load the previous value? 169 if (obj != noreg) { 170 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 171 } 172 // Is the previous value null? 173 __ cbz(pre_val, done); 174 generate_queue_test_and_insertion(masm, 175 G1ThreadLocalData::satb_mark_queue_index_offset(), 176 G1ThreadLocalData::satb_mark_queue_buffer_offset(), 177 runtime, 178 thread, pre_val, tmp1, tmp2); 179 __ b(done); 180 } 181 182 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, 183 Register obj, 184 Register pre_val, 185 Register thread, 186 Register tmp1, 187 Register tmp2, 188 bool tosca_live, 189 bool expand_call) { 190 // If expand_call is true then we expand the call_VM_leaf macro 191 // directly to skip generating the check by 192 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 193 194 assert(thread == rthread, "must be"); 195 196 Label done; 197 Label runtime; 198 199 assert_different_registers(obj, pre_val, tmp1, tmp2); 200 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); 201 202 generate_pre_barrier_fast_path(masm, thread, tmp1); 203 // If marking is not active (*(mark queue active address) == 0), jump to done 204 __ cbzw(tmp1, done); 205 generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime); 206 207 __ bind(runtime); 208 209 __ push_call_clobbered_registers(); 210 211 // Calling the runtime using the regular call_VM_leaf mechanism generates 212 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 213 // that checks that the *(rfp+frame::interpreter_frame_last_sp) == nullptr. 214 // 215 // If we care generating the pre-barrier without a frame (e.g. in the 216 // intrinsified Reference.get() routine) then rfp might be pointing to 217 // the caller frame and so this check will most likely fail at runtime. 218 // 219 // Expanding the call directly bypasses the generation of the check. 220 // So when we do not have have a full interpreter frame on the stack 221 // expand_call should be passed true. 222 223 if (expand_call) { 224 assert(pre_val != c_rarg1, "smashed arg"); 225 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); 226 } else { 227 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); 228 } 229 230 __ pop_call_clobbered_registers(); 231 232 __ bind(done); 233 234 } 235 236 static void generate_post_barrier_fast_path(MacroAssembler* masm, 237 const Register store_addr, 238 const Register new_val, 239 const Register thread, 240 const Register tmp1, 241 const Register tmp2, 242 Label& done, 243 bool new_val_may_be_null) { 244 assert(thread == rthread, "must be"); 245 assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg, rscratch1); 246 247 // Does store cross heap regions? 248 #if INCLUDE_CDS 249 // AOT code needs to load the barrier grain shift from the aot 250 // runtime constants area in the code cache otherwise we can compile 251 // it as an immediate operand 252 if (AOTCodeCache::is_on_for_dump()) { 253 address grain_shift_address = (address)AOTRuntimeConstants::grain_shift_address(); 254 __ eor(tmp1, store_addr, new_val); 255 __ lea(tmp2, ExternalAddress(grain_shift_address)); 256 __ ldrb(tmp2, tmp2); 257 __ lsrv(tmp1, tmp1, tmp2); 258 __ cbz(tmp1, done); 259 } else 260 #endif 261 { 262 __ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value 263 __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes) 264 __ cbz(tmp1, done); 265 } 266 267 // Crosses regions, storing null? 268 if (new_val_may_be_null) { 269 __ cbz(new_val, done); 270 } 271 // Storing region crossing non-null. 272 __ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base 273 274 Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset())); 275 __ ldr(tmp2, card_table_addr); // tmp2 := card table base address 276 if (UseCondCardMark) { 277 __ ldrb(rscratch1, Address(tmp1, tmp2)); // rscratch1 := card 278 // Instead of loading clean_card_val and comparing, we exploit the fact that 279 // the LSB of non-clean cards is always 0, and the LSB of clean cards 1. 280 __ tbz(rscratch1, 0, done); 281 } 282 static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr"); 283 __ strb(zr, Address(tmp1, tmp2)); // *(card address) := dirty_card_val 284 } 285 286 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, 287 Register store_addr, 288 Register new_val, 289 Register thread, 290 Register tmp1, 291 Register tmp2) { 292 Label done; 293 generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, false /* new_val_may_be_null */); 294 __ bind(done); 295 } 296 297 #if defined(COMPILER2) 298 299 static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { 300 SaveLiveRegisters save_registers(masm, stub); 301 if (c_rarg0 != arg) { 302 __ mov(c_rarg0, arg); 303 } 304 __ mov(c_rarg1, rthread); 305 __ lea(rscratch1, RuntimeAddress(runtime_path)); 306 __ blr(rscratch1); 307 } 308 309 void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, 310 Register obj, 311 Register pre_val, 312 Register thread, 313 Register tmp1, 314 Register tmp2, 315 G1PreBarrierStubC2* stub) { 316 assert(thread == rthread, "must be"); 317 assert_different_registers(obj, pre_val, tmp1, tmp2); 318 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); 319 320 stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2); 321 322 generate_pre_barrier_fast_path(masm, thread, tmp1); 323 // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) 324 __ cbnzw(tmp1, *stub->entry()); 325 326 __ bind(*stub->continuation()); 327 } 328 329 void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, 330 G1PreBarrierStubC2* stub) const { 331 Assembler::InlineSkippedInstructionsCounter skip_counter(masm); 332 Label runtime; 333 Register obj = stub->obj(); 334 Register pre_val = stub->pre_val(); 335 Register thread = stub->thread(); 336 Register tmp1 = stub->tmp1(); 337 Register tmp2 = stub->tmp2(); 338 339 __ bind(*stub->entry()); 340 generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime); 341 342 __ bind(runtime); 343 generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); 344 __ b(*stub->continuation()); 345 } 346 347 void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, 348 Register store_addr, 349 Register new_val, 350 Register thread, 351 Register tmp1, 352 Register tmp2, 353 bool new_val_may_be_null) { 354 Label done; 355 generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null); 356 __ bind(done); 357 } 358 359 #endif // COMPILER2 360 361 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 362 Register dst, Address src, Register tmp1, Register tmp2) { 363 bool on_oop = is_reference_type(type); 364 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 365 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 366 bool on_reference = on_weak || on_phantom; 367 ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2); 368 if (on_oop && on_reference) { 369 // LR is live. It must be saved around calls. 370 __ enter(/*strip_ret_addr*/true); // barrier may call runtime 371 // Generate the G1 pre-barrier code to log the value of 372 // the referent field in an SATB buffer. 373 g1_write_barrier_pre(masm /* masm */, 374 noreg /* obj */, 375 dst /* pre_val */, 376 rthread /* thread */, 377 tmp1 /* tmp1 */, 378 tmp2 /* tmp2 */, 379 true /* tosca_live */, 380 true /* expand_call */); 381 __ leave(); 382 } 383 } 384 385 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 386 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { 387 // flatten object address if needed 388 if (dst.index() == noreg && dst.offset() == 0) { 389 if (dst.base() != tmp3) { 390 __ mov(tmp3, dst.base()); 391 } 392 } else { 393 __ lea(tmp3, dst); 394 } 395 396 g1_write_barrier_pre(masm, 397 tmp3 /* obj */, 398 tmp2 /* pre_val */, 399 rthread /* thread */, 400 tmp1 /* tmp1 */, 401 rscratch2 /* tmp2 */, 402 val != noreg /* tosca_live */, 403 false /* expand_call */); 404 405 if (val == noreg) { 406 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg); 407 } else { 408 // G1 barrier needs uncompressed oop for region cross check. 409 Register new_val = val; 410 if (UseCompressedOops) { 411 new_val = rscratch2; 412 __ mov(new_val, val); 413 } 414 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); 415 g1_write_barrier_post(masm, 416 tmp3 /* store_adr */, 417 new_val /* new_val */, 418 rthread /* thread */, 419 tmp1 /* tmp1 */, 420 tmp2 /* tmp2 */); 421 } 422 423 } 424 425 #ifdef COMPILER1 426 427 #undef __ 428 #define __ ce->masm()-> 429 430 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { 431 G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 432 // At this point we know that marking is in progress. 433 // If do_load() is true then we have to emit the 434 // load of the previous value; otherwise it has already 435 // been loaded into _pre_val. 436 437 __ bind(*stub->entry()); 438 439 assert(stub->pre_val()->is_register(), "Precondition."); 440 441 Register pre_val_reg = stub->pre_val()->as_register(); 442 443 if (stub->do_load()) { 444 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); 445 } 446 __ cbz(pre_val_reg, *stub->continuation()); 447 ce->store_parameter(stub->pre_val()->as_register(), 0); 448 __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 449 __ b(*stub->continuation()); 450 } 451 452 #undef __ 453 454 void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm, 455 Register store_addr, 456 Register new_val, 457 Register thread, 458 Register tmp1, 459 Register tmp2) { 460 Label done; 461 generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */); 462 masm->bind(done); 463 } 464 465 #define __ sasm-> 466 467 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 468 __ prologue("g1_pre_barrier", false); 469 470 // arg0 : previous value of memory 471 472 BarrierSet* bs = BarrierSet::barrier_set(); 473 474 const Register pre_val = r0; 475 const Register thread = rthread; 476 const Register tmp = rscratch1; 477 478 Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 479 Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); 480 Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); 481 482 Label done; 483 Label runtime; 484 485 // Is marking still active? 486 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 487 __ ldrw(tmp, in_progress); 488 } else { 489 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 490 __ ldrb(tmp, in_progress); 491 } 492 __ cbzw(tmp, done); 493 494 // Can we store original value in the thread's buffer? 495 __ ldr(tmp, queue_index); 496 __ cbz(tmp, runtime); 497 498 __ sub(tmp, tmp, wordSize); 499 __ str(tmp, queue_index); 500 __ ldr(rscratch2, buffer); 501 __ add(tmp, tmp, rscratch2); 502 __ load_parameter(0, rscratch2); 503 __ str(rscratch2, Address(tmp, 0)); 504 __ b(done); 505 506 __ bind(runtime); 507 __ push_call_clobbered_registers(); 508 __ load_parameter(0, pre_val); 509 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); 510 __ pop_call_clobbered_registers(); 511 __ bind(done); 512 513 __ epilogue(); 514 } 515 516 #undef __ 517 518 #endif // COMPILER1