1 /* 2 * Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 #include "asm/macroAssembler.hpp" 25 #include "classfile/javaClasses.hpp" 26 #include "gc/z/c2/zBarrierSetC2.hpp" 27 #include "gc/z/zBarrierSet.hpp" 28 #include "gc/z/zBarrierSetAssembler.hpp" 29 #include "gc/z/zBarrierSetRuntime.hpp" 30 #include "opto/arraycopynode.hpp" 31 #include "opto/block.hpp" 32 #include "opto/compile.hpp" 33 #include "opto/graphKit.hpp" 34 #include "opto/machnode.hpp" 35 #include "opto/macro.hpp" 36 #include "opto/memnode.hpp" 37 #include "opto/node.hpp" 38 #include "opto/output.hpp" 39 #include "opto/regalloc.hpp" 40 #include "opto/runtime.hpp" 41 #include "opto/type.hpp" 42 #include "utilities/debug.hpp" 43 #include "utilities/growableArray.hpp" 44 #include "utilities/macros.hpp" 45 46 template<typename K, typename V, size_t TableSize> 47 class ZArenaHashtable : public ResourceObj { 48 class ZArenaHashtableEntry : public ResourceObj { 49 public: 50 ZArenaHashtableEntry* _next; 51 K _key; 52 V _value; 53 }; 54 55 static const size_t TableMask = TableSize - 1; 56 57 Arena* _arena; 58 ZArenaHashtableEntry* _table[TableSize]; 59 60 public: 61 class Iterator { 62 ZArenaHashtable* _table; 63 ZArenaHashtableEntry* _current_entry; 64 size_t _current_index; 65 66 public: 67 Iterator(ZArenaHashtable* table) 68 : _table(table), 69 _current_entry(table->_table[0]), 70 _current_index(0) { 71 if (_current_entry == nullptr) { 72 next(); 73 } 74 } 75 76 bool has_next() { return _current_entry != nullptr; } 77 K key() { return _current_entry->_key; } 78 V value() { return _current_entry->_value; } 79 80 void next() { 81 if (_current_entry != nullptr) { 82 _current_entry = _current_entry->_next; 83 } 84 while (_current_entry == nullptr && ++_current_index < TableSize) { 85 _current_entry = _table->_table[_current_index]; 86 } 87 } 88 }; 89 90 ZArenaHashtable(Arena* arena) 91 : _arena(arena), 92 _table() { 93 Copy::zero_to_bytes(&_table, sizeof(_table)); 94 } 95 96 void add(K key, V value) { 97 ZArenaHashtableEntry* entry = new (_arena) ZArenaHashtableEntry(); 98 entry->_key = key; 99 entry->_value = value; 100 entry->_next = _table[key & TableMask]; 101 _table[key & TableMask] = entry; 102 } 103 104 V* get(K key) const { 105 for (ZArenaHashtableEntry* e = _table[key & TableMask]; e != nullptr; e = e->_next) { 106 if (e->_key == key) { 107 return &(e->_value); 108 } 109 } 110 return nullptr; 111 } 112 113 Iterator iterator() { 114 return Iterator(this); 115 } 116 }; 117 118 typedef ZArenaHashtable<intptr_t, bool, 4> ZOffsetTable; 119 120 class ZBarrierSetC2State : public BarrierSetC2State { 121 private: 122 GrowableArray<ZBarrierStubC2*>* _stubs; 123 int _trampoline_stubs_count; 124 int _stubs_start_offset; 125 126 public: 127 ZBarrierSetC2State(Arena* arena) 128 : BarrierSetC2State(arena), 129 _stubs(new (arena) GrowableArray<ZBarrierStubC2*>(arena, 8, 0, nullptr)), 130 _trampoline_stubs_count(0), 131 _stubs_start_offset(0) {} 132 133 GrowableArray<ZBarrierStubC2*>* stubs() { 134 return _stubs; 135 } 136 137 bool needs_liveness_data(const MachNode* mach) const { 138 // Don't need liveness data for nodes without barriers 139 return mach->barrier_data() != ZBarrierElided; 140 } 141 142 bool needs_livein_data() const { 143 return true; 144 } 145 146 void inc_trampoline_stubs_count() { 147 assert(_trampoline_stubs_count != INT_MAX, "Overflow"); 148 ++_trampoline_stubs_count; 149 } 150 151 int trampoline_stubs_count() { 152 return _trampoline_stubs_count; 153 } 154 155 void set_stubs_start_offset(int offset) { 156 _stubs_start_offset = offset; 157 } 158 159 int stubs_start_offset() { 160 return _stubs_start_offset; 161 } 162 }; 163 164 static ZBarrierSetC2State* barrier_set_state() { 165 return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state()); 166 } 167 168 void ZBarrierStubC2::register_stub(ZBarrierStubC2* stub) { 169 if (!Compile::current()->output()->in_scratch_emit_size()) { 170 barrier_set_state()->stubs()->append(stub); 171 } 172 } 173 174 void ZBarrierStubC2::inc_trampoline_stubs_count() { 175 if (!Compile::current()->output()->in_scratch_emit_size()) { 176 barrier_set_state()->inc_trampoline_stubs_count(); 177 } 178 } 179 180 int ZBarrierStubC2::trampoline_stubs_count() { 181 return barrier_set_state()->trampoline_stubs_count(); 182 } 183 184 int ZBarrierStubC2::stubs_start_offset() { 185 return barrier_set_state()->stubs_start_offset(); 186 } 187 188 ZBarrierStubC2::ZBarrierStubC2(const MachNode* node) : BarrierStubC2(node) {} 189 190 ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref) { 191 AARCH64_ONLY(fatal("Should use ZLoadBarrierStubC2Aarch64::create")); 192 ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref); 193 register_stub(stub); 194 195 return stub; 196 } 197 198 ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref) 199 : ZBarrierStubC2(node), 200 _ref_addr(ref_addr), 201 _ref(ref) { 202 assert_different_registers(ref, ref_addr.base()); 203 assert_different_registers(ref, ref_addr.index()); 204 // The runtime call updates the value of ref, so we should not spill and 205 // reload its outdated value. 206 dont_preserve(ref); 207 } 208 209 Address ZLoadBarrierStubC2::ref_addr() const { 210 return _ref_addr; 211 } 212 213 Register ZLoadBarrierStubC2::ref() const { 214 return _ref; 215 } 216 217 address ZLoadBarrierStubC2::slow_path() const { 218 const uint8_t barrier_data = _node->barrier_data(); 219 DecoratorSet decorators = DECORATORS_NONE; 220 if (barrier_data & ZBarrierStrong) { 221 decorators |= ON_STRONG_OOP_REF; 222 } 223 if (barrier_data & ZBarrierWeak) { 224 decorators |= ON_WEAK_OOP_REF; 225 } 226 if (barrier_data & ZBarrierPhantom) { 227 decorators |= ON_PHANTOM_OOP_REF; 228 } 229 if (barrier_data & ZBarrierNoKeepalive) { 230 decorators |= AS_NO_KEEPALIVE; 231 } 232 return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators); 233 } 234 235 void ZLoadBarrierStubC2::emit_code(MacroAssembler& masm) { 236 ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, static_cast<ZLoadBarrierStubC2*>(this)); 237 } 238 239 ZStoreBarrierStubC2* ZStoreBarrierStubC2::create(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic, bool is_nokeepalive) { 240 AARCH64_ONLY(fatal("Should use ZStoreBarrierStubC2Aarch64::create")); 241 ZStoreBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZStoreBarrierStubC2(node, ref_addr, new_zaddress, new_zpointer, is_native, is_atomic, is_nokeepalive); 242 register_stub(stub); 243 244 return stub; 245 } 246 247 ZStoreBarrierStubC2::ZStoreBarrierStubC2(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, 248 bool is_native, bool is_atomic, bool is_nokeepalive) 249 : ZBarrierStubC2(node), 250 _ref_addr(ref_addr), 251 _new_zaddress(new_zaddress), 252 _new_zpointer(new_zpointer), 253 _is_native(is_native), 254 _is_atomic(is_atomic), 255 _is_nokeepalive(is_nokeepalive) {} 256 257 Address ZStoreBarrierStubC2::ref_addr() const { 258 return _ref_addr; 259 } 260 261 Register ZStoreBarrierStubC2::new_zaddress() const { 262 return _new_zaddress; 263 } 264 265 Register ZStoreBarrierStubC2::new_zpointer() const { 266 return _new_zpointer; 267 } 268 269 bool ZStoreBarrierStubC2::is_native() const { 270 return _is_native; 271 } 272 273 bool ZStoreBarrierStubC2::is_atomic() const { 274 return _is_atomic; 275 } 276 277 bool ZStoreBarrierStubC2::is_nokeepalive() const { 278 return _is_nokeepalive; 279 } 280 281 void ZStoreBarrierStubC2::emit_code(MacroAssembler& masm) { 282 ZBarrierSet::assembler()->generate_c2_store_barrier_stub(&masm, static_cast<ZStoreBarrierStubC2*>(this)); 283 } 284 285 uint ZBarrierSetC2::estimated_barrier_size(const Node* node) const { 286 uint8_t barrier_data = MemNode::barrier_data(node); 287 assert(barrier_data != 0, "should be a barrier node"); 288 uint uncolor_or_color_size = node->is_Load() ? 1 : 2; 289 if ((barrier_data & ZBarrierElided) != 0) { 290 return uncolor_or_color_size; 291 } 292 // A compare and branch corresponds to approximately four fast-path Ideal 293 // nodes (Cmp, Bool, If, If projection). The slow path (If projection and 294 // runtime call) is excluded since the corresponding code is laid out 295 // separately and does not directly affect performance. 296 return uncolor_or_color_size + 4; 297 } 298 299 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const { 300 return new (comp_arena) ZBarrierSetC2State(comp_arena); 301 } 302 303 void ZBarrierSetC2::late_barrier_analysis() const { 304 compute_liveness_at_stubs(); 305 analyze_dominating_barriers(); 306 } 307 308 void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const { 309 MacroAssembler masm(&cb); 310 GrowableArray<ZBarrierStubC2*>* const stubs = barrier_set_state()->stubs(); 311 barrier_set_state()->set_stubs_start_offset(masm.offset()); 312 313 for (int i = 0; i < stubs->length(); i++) { 314 // Make sure there is enough space in the code buffer 315 if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::max_inst_gcstub_size()) && cb.blob() == nullptr) { 316 ciEnv::current()->record_failure("CodeCache is full"); 317 return; 318 } 319 320 stubs->at(i)->emit_code(masm); 321 } 322 323 masm.flush(); 324 } 325 326 int ZBarrierSetC2::estimate_stub_size() const { 327 Compile* const C = Compile::current(); 328 BufferBlob* const blob = C->output()->scratch_buffer_blob(); 329 GrowableArray<ZBarrierStubC2*>* const stubs = barrier_set_state()->stubs(); 330 int size = 0; 331 332 for (int i = 0; i < stubs->length(); i++) { 333 CodeBuffer cb(blob->content_begin(), checked_cast<CodeBuffer::csize_t>((address)C->output()->scratch_locs_memory() - blob->content_begin())); 334 MacroAssembler masm(&cb); 335 stubs->at(i)->emit_code(masm); 336 size += cb.insts_size(); 337 } 338 339 return size; 340 } 341 342 static void set_barrier_data(C2Access& access) { 343 if (!ZBarrierSet::barrier_needed(access.decorators(), access.type())) { 344 return; 345 } 346 347 if (access.decorators() & C2_TIGHTLY_COUPLED_ALLOC) { 348 access.set_barrier_data(ZBarrierElided); 349 return; 350 } 351 352 uint8_t barrier_data = 0; 353 354 if (access.decorators() & ON_PHANTOM_OOP_REF) { 355 barrier_data |= ZBarrierPhantom; 356 } else if (access.decorators() & ON_WEAK_OOP_REF) { 357 barrier_data |= ZBarrierWeak; 358 } else { 359 barrier_data |= ZBarrierStrong; 360 } 361 362 if (access.decorators() & IN_NATIVE) { 363 barrier_data |= ZBarrierNative; 364 } 365 366 if (access.decorators() & AS_NO_KEEPALIVE) { 367 barrier_data |= ZBarrierNoKeepalive; 368 } 369 370 access.set_barrier_data(barrier_data); 371 } 372 373 Node* ZBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { 374 set_barrier_data(access); 375 return BarrierSetC2::store_at_resolved(access, val); 376 } 377 378 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { 379 set_barrier_data(access); 380 return BarrierSetC2::load_at_resolved(access, val_type); 381 } 382 383 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 384 Node* new_val, const Type* val_type) const { 385 set_barrier_data(access); 386 return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type); 387 } 388 389 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 390 Node* new_val, const Type* value_type) const { 391 set_barrier_data(access); 392 return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); 393 } 394 395 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const { 396 set_barrier_data(access); 397 return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type); 398 } 399 400 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, 401 bool is_clone, bool is_clone_instance, 402 ArrayCopyPhase phase) const { 403 if (phase == ArrayCopyPhase::Parsing) { 404 return false; 405 } 406 if (phase == ArrayCopyPhase::Optimization) { 407 return is_clone_instance; 408 } 409 // else ArrayCopyPhase::Expansion 410 return type == T_OBJECT || type == T_ARRAY; 411 } 412 413 #define XTOP LP64_ONLY(COMMA phase->top()) 414 415 void ZBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const { 416 Node* const src = ac->in(ArrayCopyNode::Src); 417 const TypeAryPtr* const ary_ptr = src->get_ptr_type()->isa_aryptr(); 418 419 if (ac->is_clone_array() && ary_ptr != nullptr) { 420 BasicType bt = ary_ptr->elem()->array_element_basic_type(); 421 if (is_reference_type(bt)) { 422 // Clone object array 423 bt = T_OBJECT; 424 } else { 425 // Clone primitive array 426 bt = T_LONG; 427 } 428 429 Node* const ctrl = ac->in(TypeFunc::Control); 430 Node* const mem = ac->in(TypeFunc::Memory); 431 Node* const src = ac->in(ArrayCopyNode::Src); 432 Node* src_offset = ac->in(ArrayCopyNode::SrcPos); 433 Node* const dest = ac->in(ArrayCopyNode::Dest); 434 Node* dest_offset = ac->in(ArrayCopyNode::DestPos); 435 Node* length = ac->in(ArrayCopyNode::Length); 436 437 if (bt == T_OBJECT) { 438 // BarrierSetC2::clone sets the offsets via BarrierSetC2::arraycopy_payload_base_offset 439 // which 8-byte aligns them to allow for word size copies. Make sure the offsets point 440 // to the first element in the array when cloning object arrays. Otherwise, load 441 // barriers are applied to parts of the header. Also adjust the length accordingly. 442 assert(src_offset == dest_offset, "should be equal"); 443 const jlong offset = src_offset->get_long(); 444 if (offset != arrayOopDesc::base_offset_in_bytes(T_OBJECT)) { 445 assert(!UseCompressedClassPointers || UseCompactObjectHeaders, "should only happen without compressed class pointers"); 446 assert((arrayOopDesc::base_offset_in_bytes(T_OBJECT) - offset) == BytesPerLong, "unexpected offset"); 447 length = phase->transform_later(new SubLNode(length, phase->longcon(1))); // Size is in longs 448 src_offset = phase->longcon(arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 449 dest_offset = src_offset; 450 } 451 } 452 Node* const payload_src = phase->basic_plus_adr(src, src_offset); 453 Node* const payload_dst = phase->basic_plus_adr(dest, dest_offset); 454 455 const char* copyfunc_name = "arraycopy"; 456 const address copyfunc_addr = phase->basictype2arraycopy(bt, nullptr, nullptr, true, copyfunc_name, true); 457 458 const TypePtr* const raw_adr_type = TypeRawPtr::BOTTOM; 459 const TypeFunc* const call_type = OptoRuntime::fast_arraycopy_Type(); 460 461 Node* const call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP); 462 phase->transform_later(call); 463 464 phase->igvn().replace_node(ac, call); 465 return; 466 } 467 468 // Clone instance or array where 'src' is only known to be an object (ary_ptr 469 // is null). This can happen in bytecode generated dynamically to implement 470 // reflective array clones. 471 clone_in_runtime(phase, ac, ZBarrierSetRuntime::clone_addr(), "ZBarrierSetRuntime::clone"); 472 } 473 474 #undef XTOP 475 476 void ZBarrierSetC2::elide_dominated_barrier(MachNode* mach) const { 477 mach->set_barrier_data(ZBarrierElided); 478 } 479 480 void ZBarrierSetC2::analyze_dominating_barriers() const { 481 ResourceMark rm; 482 Compile* const C = Compile::current(); 483 PhaseCFG* const cfg = C->cfg(); 484 485 Node_List loads; 486 Node_List load_dominators; 487 488 Node_List stores; 489 Node_List store_dominators; 490 491 Node_List atomics; 492 Node_List atomic_dominators; 493 494 // Step 1 - Find accesses and allocations, and track them in lists 495 for (uint i = 0; i < cfg->number_of_blocks(); ++i) { 496 const Block* const block = cfg->get_block(i); 497 for (uint j = 0; j < block->number_of_nodes(); ++j) { 498 Node* const node = block->get_node(j); 499 if (node->is_Phi()) { 500 if (is_allocation(node)) { 501 load_dominators.push(node); 502 store_dominators.push(node); 503 // An allocation can't be considered to "dominate" an atomic operation. 504 // For example a CAS requires the memory location to be store-good. 505 // When you have a dominating store or atomic instruction, that is 506 // indeed ensured to be the case. However, as for allocations, the 507 // initialized memory location could be raw null, which isn't store-good. 508 } 509 continue; 510 } else if (!node->is_Mach()) { 511 continue; 512 } 513 514 MachNode* const mach = node->as_Mach(); 515 switch (mach->ideal_Opcode()) { 516 case Op_LoadP: 517 if ((mach->barrier_data() & ZBarrierStrong) != 0 && 518 (mach->barrier_data() & ZBarrierNoKeepalive) == 0) { 519 loads.push(mach); 520 load_dominators.push(mach); 521 } 522 break; 523 case Op_StoreP: 524 if (mach->barrier_data() != 0) { 525 stores.push(mach); 526 load_dominators.push(mach); 527 store_dominators.push(mach); 528 atomic_dominators.push(mach); 529 } 530 break; 531 case Op_CompareAndExchangeP: 532 case Op_CompareAndSwapP: 533 case Op_GetAndSetP: 534 if (mach->barrier_data() != 0) { 535 atomics.push(mach); 536 load_dominators.push(mach); 537 store_dominators.push(mach); 538 atomic_dominators.push(mach); 539 } 540 break; 541 542 default: 543 break; 544 } 545 } 546 } 547 548 // Step 2 - Find dominating accesses or allocations for each access 549 elide_dominated_barriers(loads, load_dominators); 550 elide_dominated_barriers(stores, store_dominators); 551 elide_dominated_barriers(atomics, atomic_dominators); 552 } 553 554 void ZBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { 555 eliminate_gc_barrier_data(node); 556 } 557 558 void ZBarrierSetC2::eliminate_gc_barrier_data(Node* node) const { 559 if (node->is_LoadStore()) { 560 LoadStoreNode* loadstore = node->as_LoadStore(); 561 loadstore->set_barrier_data(ZBarrierElided); 562 } else if (node->is_Mem()) { 563 MemNode* mem = node->as_Mem(); 564 mem->set_barrier_data(ZBarrierElided); 565 } 566 } 567 568 #ifndef PRODUCT 569 void ZBarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const { 570 if ((mach->barrier_data() & ZBarrierStrong) != 0) { 571 st->print("strong "); 572 } 573 if ((mach->barrier_data() & ZBarrierWeak) != 0) { 574 st->print("weak "); 575 } 576 if ((mach->barrier_data() & ZBarrierPhantom) != 0) { 577 st->print("phantom "); 578 } 579 if ((mach->barrier_data() & ZBarrierNoKeepalive) != 0) { 580 st->print("nokeepalive "); 581 } 582 if ((mach->barrier_data() & ZBarrierNative) != 0) { 583 st->print("native "); 584 } 585 if ((mach->barrier_data() & ZBarrierElided) != 0) { 586 st->print("elided "); 587 } 588 } 589 #endif // !PRODUCT