1 /* 2 * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "asm/macroAssembler.inline.hpp" 26 #include "code/aotCodeCache.hpp" 27 #include "gc/g1/g1BarrierSet.hpp" 28 #include "gc/g1/g1BarrierSetAssembler.hpp" 29 #include "gc/g1/g1BarrierSetRuntime.hpp" 30 #include "gc/g1/g1CardTable.hpp" 31 #include "gc/g1/g1HeapRegion.hpp" 32 #include "gc/g1/g1ThreadLocalData.hpp" 33 #include "gc/shared/collectedHeap.hpp" 34 #include "interpreter/interp_masm.hpp" 35 #include "runtime/javaThread.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #ifdef COMPILER1 38 #include "c1/c1_LIRAssembler.hpp" 39 #include "c1/c1_MacroAssembler.hpp" 40 #include "gc/g1/c1/g1BarrierSetC1.hpp" 41 #endif // COMPILER1 42 #ifdef COMPILER2 43 #include "gc/g1/c2/g1BarrierSetC2.hpp" 44 #endif // COMPILER2 45 46 #define __ masm-> 47 48 void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, 49 Register addr, Register count, RegSet saved_regs) { 50 bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; 51 if (!dest_uninitialized) { 52 Label done; 53 Address in_progress(rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 54 55 // Is marking active? 56 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 57 __ ldrw(rscratch1, in_progress); 58 } else { 59 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 60 __ ldrb(rscratch1, in_progress); 61 } 62 __ cbzw(rscratch1, done); 63 64 __ push(saved_regs, sp); 65 if (count == c_rarg0) { 66 if (addr == c_rarg1) { 67 // exactly backwards!! 68 __ mov(rscratch1, c_rarg0); 69 __ mov(c_rarg0, c_rarg1); 70 __ mov(c_rarg1, rscratch1); 71 } else { 72 __ mov(c_rarg1, count); 73 __ mov(c_rarg0, addr); 74 } 75 } else { 76 __ mov(c_rarg0, addr); 77 __ mov(c_rarg1, count); 78 } 79 if (UseCompressedOops) { 80 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); 81 } else { 82 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); 83 } 84 __ pop(saved_regs, sp); 85 86 __ bind(done); 87 } 88 } 89 90 void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, 91 DecoratorSet decorators, 92 Register start, 93 Register count, 94 Register scratch) { 95 96 Label done; 97 Label loop; 98 Label next; 99 100 __ cbz(count, done); 101 102 // Calculate the number of card marks to set. Since the object might start and 103 // end within a card, we need to calculate this via the card table indexes of 104 // the actual start and last addresses covered by the object. 105 // Temporarily use the count register for the last element address. 106 __ lea(count, Address(start, count, Address::lsl(LogBytesPerHeapOop))); // end = start + count << LogBytesPerHeapOop 107 __ sub(count, count, BytesPerHeapOop); // Use last element address for end. 108 109 __ lsr(start, start, CardTable::card_shift()); 110 __ lsr(count, count, CardTable::card_shift()); 111 __ sub(count, count, start); // Number of bytes to mark - 1. 112 113 // Add card table base offset to start. 114 __ ldr(scratch, Address(rthread, in_bytes(G1ThreadLocalData::card_table_base_offset()))); 115 __ add(start, start, scratch); 116 117 __ bind(loop); 118 if (UseCondCardMark) { 119 __ ldrb(scratch, Address(start, count)); 120 // Instead of loading clean_card_val and comparing, we exploit the fact that 121 // the LSB of non-clean cards is always 0, and the LSB of clean cards 1. 122 __ tbz(scratch, 0, next); 123 } 124 static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr"); 125 __ strb(zr, Address(start, count)); 126 __ bind(next); 127 __ subs(count, count, 1); 128 __ br(Assembler::GE, loop); 129 130 __ bind(done); 131 } 132 133 static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, 134 const Register thread, const Register value, const Register temp1, const Register temp2) { 135 // Can we store a value in the given thread's buffer? 136 // (The index field is typed as size_t.) 137 __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address) 138 __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer) 139 // The buffer is not full, store value into it. 140 __ sub(temp1, temp1, wordSize); // temp1 := next index 141 __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index 142 __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address 143 __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value 144 } 145 146 static void generate_pre_barrier_fast_path(MacroAssembler* masm, 147 const Register thread, 148 const Register tmp1) { 149 Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 150 // Is marking active? 151 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 152 __ ldrw(tmp1, in_progress); 153 } else { 154 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 155 __ ldrb(tmp1, in_progress); 156 } 157 } 158 159 static void generate_pre_barrier_slow_path(MacroAssembler* masm, 160 const Register obj, 161 const Register pre_val, 162 const Register thread, 163 const Register tmp1, 164 const Register tmp2, 165 Label& done, 166 Label& runtime) { 167 // Do we need to load the previous value? 168 if (obj != noreg) { 169 __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); 170 } 171 // Is the previous value null? 172 __ cbz(pre_val, done); 173 generate_queue_test_and_insertion(masm, 174 G1ThreadLocalData::satb_mark_queue_index_offset(), 175 G1ThreadLocalData::satb_mark_queue_buffer_offset(), 176 runtime, 177 thread, pre_val, tmp1, tmp2); 178 __ b(done); 179 } 180 181 void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, 182 Register obj, 183 Register pre_val, 184 Register thread, 185 Register tmp1, 186 Register tmp2, 187 bool tosca_live, 188 bool expand_call) { 189 // If expand_call is true then we expand the call_VM_leaf macro 190 // directly to skip generating the check by 191 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 192 193 assert(thread == rthread, "must be"); 194 195 Label done; 196 Label runtime; 197 198 assert_different_registers(obj, pre_val, tmp1, tmp2); 199 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); 200 201 generate_pre_barrier_fast_path(masm, thread, tmp1); 202 // If marking is not active (*(mark queue active address) == 0), jump to done 203 __ cbzw(tmp1, done); 204 generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime); 205 206 __ bind(runtime); 207 208 __ push_call_clobbered_registers(); 209 210 // Calling the runtime using the regular call_VM_leaf mechanism generates 211 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 212 // that checks that the *(rfp+frame::interpreter_frame_last_sp) == nullptr. 213 // 214 // If we care generating the pre-barrier without a frame (e.g. in the 215 // intrinsified Reference.get() routine) then rfp might be pointing to 216 // the caller frame and so this check will most likely fail at runtime. 217 // 218 // Expanding the call directly bypasses the generation of the check. 219 // So when we do not have have a full interpreter frame on the stack 220 // expand_call should be passed true. 221 222 if (expand_call) { 223 assert(pre_val != c_rarg1, "smashed arg"); 224 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); 225 } else { 226 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); 227 } 228 229 __ pop_call_clobbered_registers(); 230 231 __ bind(done); 232 233 } 234 235 static void generate_post_barrier(MacroAssembler* masm, 236 const Register store_addr, 237 const Register new_val, 238 const Register thread, 239 const Register tmp1, 240 const Register tmp2, 241 Label& done, 242 bool new_val_may_be_null) { 243 assert(thread == rthread, "must be"); 244 assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg, rscratch1); 245 246 // Does store cross heap regions? 247 #if INCLUDE_CDS 248 // AOT code needs to load the barrier grain shift from the aot 249 // runtime constants area in the code cache otherwise we can compile 250 // it as an immediate operand 251 if (AOTCodeCache::is_on_for_dump()) { 252 address grain_shift_address = (address)AOTRuntimeConstants::grain_shift_address(); 253 __ eor(tmp1, store_addr, new_val); 254 __ lea(tmp2, ExternalAddress(grain_shift_address)); 255 __ ldrb(tmp2, tmp2); 256 __ lsrv(tmp1, tmp1, tmp2); 257 __ cbz(tmp1, done); 258 } else 259 #endif 260 { 261 __ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value 262 __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes) 263 __ cbz(tmp1, done); 264 } 265 266 // Crosses regions, storing null? 267 if (new_val_may_be_null) { 268 __ cbz(new_val, done); 269 } 270 // Storing region crossing non-null. 271 __ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base 272 273 Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset())); 274 __ ldr(tmp2, card_table_addr); // tmp2 := card table base address 275 if (UseCondCardMark) { 276 __ ldrb(rscratch1, Address(tmp1, tmp2)); // rscratch1 := card 277 // Instead of loading clean_card_val and comparing, we exploit the fact that 278 // the LSB of non-clean cards is always 0, and the LSB of clean cards 1. 279 __ tbz(rscratch1, 0, done); 280 } 281 static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr"); 282 __ strb(zr, Address(tmp1, tmp2)); // *(card address) := dirty_card_val 283 } 284 285 void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, 286 Register store_addr, 287 Register new_val, 288 Register thread, 289 Register tmp1, 290 Register tmp2) { 291 Label done; 292 generate_post_barrier(masm, store_addr, new_val, thread, tmp1, tmp2, done, false /* new_val_may_be_null */); 293 __ bind(done); 294 } 295 296 #if defined(COMPILER2) 297 298 static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { 299 SaveLiveRegisters save_registers(masm, stub); 300 if (c_rarg0 != arg) { 301 __ mov(c_rarg0, arg); 302 } 303 __ mov(c_rarg1, rthread); 304 __ mov(rscratch1, runtime_path); 305 __ blr(rscratch1); 306 } 307 308 void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, 309 Register obj, 310 Register pre_val, 311 Register thread, 312 Register tmp1, 313 Register tmp2, 314 G1PreBarrierStubC2* stub) { 315 assert(thread == rthread, "must be"); 316 assert_different_registers(obj, pre_val, tmp1, tmp2); 317 assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); 318 319 stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2); 320 321 generate_pre_barrier_fast_path(masm, thread, tmp1); 322 // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) 323 __ cbnzw(tmp1, *stub->entry()); 324 325 __ bind(*stub->continuation()); 326 } 327 328 void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, 329 G1PreBarrierStubC2* stub) const { 330 Assembler::InlineSkippedInstructionsCounter skip_counter(masm); 331 Label runtime; 332 Register obj = stub->obj(); 333 Register pre_val = stub->pre_val(); 334 Register thread = stub->thread(); 335 Register tmp1 = stub->tmp1(); 336 Register tmp2 = stub->tmp2(); 337 338 __ bind(*stub->entry()); 339 generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime); 340 341 __ bind(runtime); 342 generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); 343 __ b(*stub->continuation()); 344 } 345 346 void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, 347 Register store_addr, 348 Register new_val, 349 Register thread, 350 Register tmp1, 351 Register tmp2, 352 bool new_val_may_be_null) { 353 Label done; 354 generate_post_barrier(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null); 355 __ bind(done); 356 } 357 358 #endif // COMPILER2 359 360 void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 361 Register dst, Address src, Register tmp1, Register tmp2) { 362 bool on_oop = is_reference_type(type); 363 bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; 364 bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; 365 bool on_reference = on_weak || on_phantom; 366 CardTableBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2); 367 if (on_oop && on_reference) { 368 // LR is live. It must be saved around calls. 369 __ enter(/*strip_ret_addr*/true); // barrier may call runtime 370 // Generate the G1 pre-barrier code to log the value of 371 // the referent field in an SATB buffer. 372 g1_write_barrier_pre(masm /* masm */, 373 noreg /* obj */, 374 dst /* pre_val */, 375 rthread /* thread */, 376 tmp1 /* tmp1 */, 377 tmp2 /* tmp2 */, 378 true /* tosca_live */, 379 true /* expand_call */); 380 __ leave(); 381 } 382 } 383 384 void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, 385 Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { 386 // flatten object address if needed 387 if (dst.index() == noreg && dst.offset() == 0) { 388 if (dst.base() != tmp3) { 389 __ mov(tmp3, dst.base()); 390 } 391 } else { 392 __ lea(tmp3, dst); 393 } 394 395 g1_write_barrier_pre(masm, 396 tmp3 /* obj */, 397 tmp2 /* pre_val */, 398 rthread /* thread */, 399 tmp1 /* tmp1 */, 400 rscratch2 /* tmp2 */, 401 val != noreg /* tosca_live */, 402 false /* expand_call */); 403 404 if (val == noreg) { 405 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg); 406 } else { 407 // G1 barrier needs uncompressed oop for region cross check. 408 Register new_val = val; 409 if (UseCompressedOops) { 410 new_val = rscratch2; 411 __ mov(new_val, val); 412 } 413 BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); 414 g1_write_barrier_post(masm, 415 tmp3 /* store_adr */, 416 new_val /* new_val */, 417 rthread /* thread */, 418 tmp1 /* tmp1 */, 419 tmp2 /* tmp2 */); 420 } 421 422 } 423 424 #ifdef COMPILER1 425 426 #undef __ 427 #define __ ce->masm()-> 428 429 void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { 430 G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); 431 // At this point we know that marking is in progress. 432 // If do_load() is true then we have to emit the 433 // load of the previous value; otherwise it has already 434 // been loaded into _pre_val. 435 436 __ bind(*stub->entry()); 437 438 assert(stub->pre_val()->is_register(), "Precondition."); 439 440 Register pre_val_reg = stub->pre_val()->as_register(); 441 442 if (stub->do_load()) { 443 ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/); 444 } 445 __ cbz(pre_val_reg, *stub->continuation()); 446 ce->store_parameter(stub->pre_val()->as_register(), 0); 447 __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); 448 __ b(*stub->continuation()); 449 } 450 451 #undef __ 452 453 void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm, 454 Register store_addr, 455 Register new_val, 456 Register thread, 457 Register tmp1, 458 Register tmp2) { 459 Label done; 460 generate_post_barrier(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */); 461 masm->bind(done); 462 } 463 464 #define __ sasm-> 465 466 void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { 467 __ prologue("g1_pre_barrier", false); 468 469 // arg0 : previous value of memory 470 471 BarrierSet* bs = BarrierSet::barrier_set(); 472 473 const Register pre_val = r0; 474 const Register thread = rthread; 475 const Register tmp = rscratch1; 476 477 Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 478 Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); 479 Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); 480 481 Label done; 482 Label runtime; 483 484 // Is marking still active? 485 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { 486 __ ldrw(tmp, in_progress); 487 } else { 488 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); 489 __ ldrb(tmp, in_progress); 490 } 491 __ cbzw(tmp, done); 492 493 // Can we store original value in the thread's buffer? 494 __ ldr(tmp, queue_index); 495 __ cbz(tmp, runtime); 496 497 __ sub(tmp, tmp, wordSize); 498 __ str(tmp, queue_index); 499 __ ldr(rscratch2, buffer); 500 __ add(tmp, tmp, rscratch2); 501 __ load_parameter(0, rscratch2); 502 __ str(rscratch2, Address(tmp, 0)); 503 __ b(done); 504 505 __ bind(runtime); 506 __ push_call_clobbered_registers(); 507 __ load_parameter(0, pre_val); 508 __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); 509 __ pop_call_clobbered_registers(); 510 __ bind(done); 511 512 __ epilogue(); 513 } 514 515 #undef __ 516 517 #endif // COMPILER1 --- EOF ---