1 /* 2 * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 #include "precompiled.hpp" 25 #include "classfile/javaClasses.hpp" 26 #include "gc/x/c2/xBarrierSetC2.hpp" 27 #include "gc/x/xBarrierSet.hpp" 28 #include "gc/x/xBarrierSetAssembler.hpp" 29 #include "gc/x/xBarrierSetRuntime.hpp" 30 #include "opto/arraycopynode.hpp" 31 #include "opto/addnode.hpp" 32 #include "opto/block.hpp" 33 #include "opto/compile.hpp" 34 #include "opto/graphKit.hpp" 35 #include "opto/machnode.hpp" 36 #include "opto/macro.hpp" 37 #include "opto/memnode.hpp" 38 #include "opto/node.hpp" 39 #include "opto/output.hpp" 40 #include "opto/regalloc.hpp" 41 #include "opto/rootnode.hpp" 42 #include "opto/runtime.hpp" 43 #include "opto/type.hpp" 44 #include "utilities/growableArray.hpp" 45 #include "utilities/macros.hpp" 46 47 class XBarrierSetC2State : public ArenaObj { 48 private: 49 GrowableArray<XLoadBarrierStubC2*>* _stubs; 50 Node_Array _live; 51 52 public: 53 XBarrierSetC2State(Arena* arena) : 54 _stubs(new (arena) GrowableArray<XLoadBarrierStubC2*>(arena, 8, 0, nullptr)), 55 _live(arena) {} 56 57 GrowableArray<XLoadBarrierStubC2*>* stubs() { 58 return _stubs; 59 } 60 61 RegMask* live(const Node* node) { 62 if (!node->is_Mach()) { 63 // Don't need liveness for non-MachNodes 64 return nullptr; 65 } 66 67 const MachNode* const mach = node->as_Mach(); 68 if (mach->barrier_data() == XLoadBarrierElided) { 69 // Don't need liveness data for nodes without barriers 70 return nullptr; 71 } 72 73 RegMask* live = (RegMask*)_live[node->_idx]; 74 if (live == nullptr) { 75 live = new (Compile::current()->comp_arena()->AmallocWords(sizeof(RegMask))) RegMask(); 76 _live.map(node->_idx, (Node*)live); 77 } 78 79 return live; 80 } 81 }; 82 83 static XBarrierSetC2State* barrier_set_state() { 84 return reinterpret_cast<XBarrierSetC2State*>(Compile::current()->barrier_set_state()); 85 } 86 87 XLoadBarrierStubC2* XLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) { 88 XLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) XLoadBarrierStubC2(node, ref_addr, ref, tmp, barrier_data); 89 if (!Compile::current()->output()->in_scratch_emit_size()) { 90 barrier_set_state()->stubs()->append(stub); 91 } 92 93 return stub; 94 } 95 96 XLoadBarrierStubC2::XLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) : 97 _node(node), 98 _ref_addr(ref_addr), 99 _ref(ref), 100 _tmp(tmp), 101 _barrier_data(barrier_data), 102 _entry(), 103 _continuation() { 104 assert_different_registers(ref, ref_addr.base()); 105 assert_different_registers(ref, ref_addr.index()); 106 } 107 108 Address XLoadBarrierStubC2::ref_addr() const { 109 return _ref_addr; 110 } 111 112 Register XLoadBarrierStubC2::ref() const { 113 return _ref; 114 } 115 116 Register XLoadBarrierStubC2::tmp() const { 117 return _tmp; 118 } 119 120 address XLoadBarrierStubC2::slow_path() const { 121 DecoratorSet decorators = DECORATORS_NONE; 122 if (_barrier_data & XLoadBarrierStrong) { 123 decorators |= ON_STRONG_OOP_REF; 124 } 125 if (_barrier_data & XLoadBarrierWeak) { 126 decorators |= ON_WEAK_OOP_REF; 127 } 128 if (_barrier_data & XLoadBarrierPhantom) { 129 decorators |= ON_PHANTOM_OOP_REF; 130 } 131 if (_barrier_data & XLoadBarrierNoKeepalive) { 132 decorators |= AS_NO_KEEPALIVE; 133 } 134 return XBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators); 135 } 136 137 RegMask& XLoadBarrierStubC2::live() const { 138 RegMask* mask = barrier_set_state()->live(_node); 139 assert(mask != nullptr, "must be mach-node with barrier"); 140 return *mask; 141 } 142 143 Label* XLoadBarrierStubC2::entry() { 144 // The _entry will never be bound when in_scratch_emit_size() is true. 145 // However, we still need to return a label that is not bound now, but 146 // will eventually be bound. Any label will do, as it will only act as 147 // a placeholder, so we return the _continuation label. 148 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry; 149 } 150 151 Label* XLoadBarrierStubC2::continuation() { 152 return &_continuation; 153 } 154 155 void* XBarrierSetC2::create_barrier_state(Arena* comp_arena) const { 156 return new (comp_arena) XBarrierSetC2State(comp_arena); 157 } 158 159 void XBarrierSetC2::late_barrier_analysis() const { 160 analyze_dominating_barriers(); 161 compute_liveness_at_stubs(); 162 } 163 164 void XBarrierSetC2::emit_stubs(CodeBuffer& cb) const { 165 MacroAssembler masm(&cb); 166 GrowableArray<XLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs(); 167 168 for (int i = 0; i < stubs->length(); i++) { 169 // Make sure there is enough space in the code buffer 170 if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) { 171 ciEnv::current()->record_failure("CodeCache is full"); 172 return; 173 } 174 175 XBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i)); 176 } 177 178 masm.flush(); 179 } 180 181 int XBarrierSetC2::estimate_stub_size() const { 182 Compile* const C = Compile::current(); 183 BufferBlob* const blob = C->output()->scratch_buffer_blob(); 184 GrowableArray<XLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs(); 185 int size = 0; 186 187 for (int i = 0; i < stubs->length(); i++) { 188 CodeBuffer cb(blob->content_begin(), (address)C->output()->scratch_locs_memory() - blob->content_begin()); 189 MacroAssembler masm(&cb); 190 XBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i)); 191 size += cb.insts_size(); 192 } 193 194 return size; 195 } 196 197 static void set_barrier_data(C2Access& access) { 198 if (XBarrierSet::barrier_needed(access.decorators(), access.type())) { 199 uint8_t barrier_data = 0; 200 201 if (access.decorators() & ON_PHANTOM_OOP_REF) { 202 barrier_data |= XLoadBarrierPhantom; 203 } else if (access.decorators() & ON_WEAK_OOP_REF) { 204 barrier_data |= XLoadBarrierWeak; 205 } else { 206 barrier_data |= XLoadBarrierStrong; 207 } 208 209 if (access.decorators() & AS_NO_KEEPALIVE) { 210 barrier_data |= XLoadBarrierNoKeepalive; 211 } 212 213 access.set_barrier_data(barrier_data); 214 } 215 } 216 217 Node* XBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { 218 set_barrier_data(access); 219 return BarrierSetC2::load_at_resolved(access, val_type); 220 } 221 222 Node* XBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 223 Node* new_val, const Type* val_type) const { 224 set_barrier_data(access); 225 return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type); 226 } 227 228 Node* XBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 229 Node* new_val, const Type* value_type) const { 230 set_barrier_data(access); 231 return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); 232 } 233 234 Node* XBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const { 235 set_barrier_data(access); 236 return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type); 237 } 238 239 bool XBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, 240 bool is_clone, bool is_clone_instance, 241 ArrayCopyPhase phase) const { 242 if (phase == ArrayCopyPhase::Parsing) { 243 return false; 244 } 245 if (phase == ArrayCopyPhase::Optimization) { 246 return is_clone_instance; 247 } 248 // else ArrayCopyPhase::Expansion 249 return type == T_OBJECT || type == T_ARRAY; 250 } 251 252 // This TypeFunc assumes a 64bit system 253 static const TypeFunc* clone_type() { 254 // Create input type (domain) 255 const Type** domain_fields = TypeTuple::fields(4); 256 domain_fields[TypeFunc::Parms + 0] = TypeInstPtr::NOTNULL; // src 257 domain_fields[TypeFunc::Parms + 1] = TypeInstPtr::NOTNULL; // dst 258 domain_fields[TypeFunc::Parms + 2] = TypeLong::LONG; // size lower 259 domain_fields[TypeFunc::Parms + 3] = Type::HALF; // size upper 260 const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + 4, domain_fields); 261 262 // Create result type (range) 263 const Type** range_fields = TypeTuple::fields(0); 264 const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 0, range_fields); 265 266 return TypeFunc::make(domain, range); 267 } 268 269 #define XTOP LP64_ONLY(COMMA phase->top()) 270 271 void XBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const { 272 Node* const src = ac->in(ArrayCopyNode::Src); 273 const TypeAryPtr* ary_ptr = src->get_ptr_type()->isa_aryptr(); 274 275 if (ac->is_clone_array() && ary_ptr != nullptr) { 276 BasicType bt = ary_ptr->elem()->array_element_basic_type(); 277 if (is_reference_type(bt)) { 278 // Clone object array 279 bt = T_OBJECT; 280 } else { 281 // Clone primitive array 282 bt = T_LONG; 283 } 284 285 Node* ctrl = ac->in(TypeFunc::Control); 286 Node* mem = ac->in(TypeFunc::Memory); 287 Node* src = ac->in(ArrayCopyNode::Src); 288 Node* src_offset = ac->in(ArrayCopyNode::SrcPos); 289 Node* dest = ac->in(ArrayCopyNode::Dest); 290 Node* dest_offset = ac->in(ArrayCopyNode::DestPos); 291 Node* length = ac->in(ArrayCopyNode::Length); 292 293 if (bt == T_OBJECT) { 294 // BarrierSetC2::clone sets the offsets via BarrierSetC2::arraycopy_payload_base_offset 295 // which 8-byte aligns them to allow for word size copies. Make sure the offsets point 296 // to the first element in the array when cloning object arrays. Otherwise, load 297 // barriers are applied to parts of the header. Also adjust the length accordingly. 298 assert(src_offset == dest_offset, "should be equal"); 299 jlong offset = src_offset->get_long(); 300 if (offset != arrayOopDesc::base_offset_in_bytes(T_OBJECT)) { 301 assert(!UseCompressedClassPointers || UseCompactObjectHeaders, "should only happen without compressed class pointers"); 302 assert((arrayOopDesc::base_offset_in_bytes(T_OBJECT) - offset) == BytesPerLong, "unexpected offset"); 303 length = phase->transform_later(new SubLNode(length, phase->longcon(1))); // Size is in longs 304 src_offset = phase->longcon(arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 305 dest_offset = src_offset; 306 } 307 } 308 Node* payload_src = phase->basic_plus_adr(src, src_offset); 309 Node* payload_dst = phase->basic_plus_adr(dest, dest_offset); 310 311 const char* copyfunc_name = "arraycopy"; 312 address copyfunc_addr = phase->basictype2arraycopy(bt, nullptr, nullptr, true, copyfunc_name, true); 313 314 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 315 const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type(); 316 317 Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP); 318 phase->transform_later(call); 319 320 phase->igvn().replace_node(ac, call); 321 return; 322 } 323 324 // Clone instance 325 Node* const ctrl = ac->in(TypeFunc::Control); 326 Node* const mem = ac->in(TypeFunc::Memory); 327 Node* const dst = ac->in(ArrayCopyNode::Dest); 328 Node* const size = ac->in(ArrayCopyNode::Length); 329 330 assert(size->bottom_type()->is_long(), "Should be long"); 331 332 // The native clone we are calling here expects the instance size in words 333 // Add header/offset size to payload size to get instance size. 334 Node* const base_offset = phase->longcon(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong); 335 Node* const full_size = phase->transform_later(new AddLNode(size, base_offset)); 336 337 Node* const call = phase->make_leaf_call(ctrl, 338 mem, 339 clone_type(), 340 XBarrierSetRuntime::clone_addr(), 341 "XBarrierSetRuntime::clone", 342 TypeRawPtr::BOTTOM, 343 src, 344 dst, 345 full_size, 346 phase->top()); 347 phase->transform_later(call); 348 phase->igvn().replace_node(ac, call); 349 } 350 351 #undef XTOP 352 353 // == Dominating barrier elision == 354 355 static bool block_has_safepoint(const Block* block, uint from, uint to) { 356 for (uint i = from; i < to; i++) { 357 if (block->get_node(i)->is_MachSafePoint()) { 358 // Safepoint found 359 return true; 360 } 361 } 362 363 // Safepoint not found 364 return false; 365 } 366 367 static bool block_has_safepoint(const Block* block) { 368 return block_has_safepoint(block, 0, block->number_of_nodes()); 369 } 370 371 static uint block_index(const Block* block, const Node* node) { 372 for (uint j = 0; j < block->number_of_nodes(); ++j) { 373 if (block->get_node(j) == node) { 374 return j; 375 } 376 } 377 ShouldNotReachHere(); 378 return 0; 379 } 380 381 void XBarrierSetC2::analyze_dominating_barriers() const { 382 ResourceMark rm; 383 Compile* const C = Compile::current(); 384 PhaseCFG* const cfg = C->cfg(); 385 Block_List worklist; 386 Node_List mem_ops; 387 Node_List barrier_loads; 388 389 // Step 1 - Find accesses, and track them in lists 390 for (uint i = 0; i < cfg->number_of_blocks(); ++i) { 391 const Block* const block = cfg->get_block(i); 392 for (uint j = 0; j < block->number_of_nodes(); ++j) { 393 const Node* const node = block->get_node(j); 394 if (!node->is_Mach()) { 395 continue; 396 } 397 398 MachNode* const mach = node->as_Mach(); 399 switch (mach->ideal_Opcode()) { 400 case Op_LoadP: 401 if ((mach->barrier_data() & XLoadBarrierStrong) != 0) { 402 barrier_loads.push(mach); 403 } 404 if ((mach->barrier_data() & (XLoadBarrierStrong | XLoadBarrierNoKeepalive)) == 405 XLoadBarrierStrong) { 406 mem_ops.push(mach); 407 } 408 break; 409 case Op_CompareAndExchangeP: 410 case Op_CompareAndSwapP: 411 case Op_GetAndSetP: 412 if ((mach->barrier_data() & XLoadBarrierStrong) != 0) { 413 barrier_loads.push(mach); 414 } 415 case Op_StoreP: 416 mem_ops.push(mach); 417 break; 418 419 default: 420 break; 421 } 422 } 423 } 424 425 // Step 2 - Find dominating accesses for each load 426 for (uint i = 0; i < barrier_loads.size(); i++) { 427 MachNode* const load = barrier_loads.at(i)->as_Mach(); 428 const TypePtr* load_adr_type = nullptr; 429 intptr_t load_offset = 0; 430 const Node* const load_obj = load->get_base_and_disp(load_offset, load_adr_type); 431 Block* const load_block = cfg->get_block_for_node(load); 432 const uint load_index = block_index(load_block, load); 433 434 for (uint j = 0; j < mem_ops.size(); j++) { 435 MachNode* mem = mem_ops.at(j)->as_Mach(); 436 const TypePtr* mem_adr_type = nullptr; 437 intptr_t mem_offset = 0; 438 const Node* mem_obj = mem->get_base_and_disp(mem_offset, mem_adr_type); 439 Block* mem_block = cfg->get_block_for_node(mem); 440 uint mem_index = block_index(mem_block, mem); 441 442 if (load_obj == NodeSentinel || mem_obj == NodeSentinel || 443 load_obj == nullptr || mem_obj == nullptr || 444 load_offset < 0 || mem_offset < 0) { 445 continue; 446 } 447 448 if (mem_obj != load_obj || mem_offset != load_offset) { 449 // Not the same addresses, not a candidate 450 continue; 451 } 452 453 if (load_block == mem_block) { 454 // Earlier accesses in the same block 455 if (mem_index < load_index && !block_has_safepoint(mem_block, mem_index + 1, load_index)) { 456 load->set_barrier_data(XLoadBarrierElided); 457 } 458 } else if (mem_block->dominates(load_block)) { 459 // Dominating block? Look around for safepoints 460 ResourceMark rm; 461 Block_List stack; 462 VectorSet visited; 463 stack.push(load_block); 464 bool safepoint_found = block_has_safepoint(load_block); 465 while (!safepoint_found && stack.size() > 0) { 466 Block* block = stack.pop(); 467 if (visited.test_set(block->_pre_order)) { 468 continue; 469 } 470 if (block_has_safepoint(block)) { 471 safepoint_found = true; 472 break; 473 } 474 if (block == mem_block) { 475 continue; 476 } 477 478 // Push predecessor blocks 479 for (uint p = 1; p < block->num_preds(); ++p) { 480 Block* pred = cfg->get_block_for_node(block->pred(p)); 481 stack.push(pred); 482 } 483 } 484 485 if (!safepoint_found) { 486 load->set_barrier_data(XLoadBarrierElided); 487 } 488 } 489 } 490 } 491 } 492 493 // == Reduced spilling optimization == 494 495 void XBarrierSetC2::compute_liveness_at_stubs() const { 496 ResourceMark rm; 497 Compile* const C = Compile::current(); 498 Arena* const A = Thread::current()->resource_area(); 499 PhaseCFG* const cfg = C->cfg(); 500 PhaseRegAlloc* const regalloc = C->regalloc(); 501 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask)); 502 XBarrierSetAssembler* const bs = XBarrierSet::assembler(); 503 Block_List worklist; 504 505 for (uint i = 0; i < cfg->number_of_blocks(); ++i) { 506 new ((void*)(live + i)) RegMask(); 507 worklist.push(cfg->get_block(i)); 508 } 509 510 while (worklist.size() > 0) { 511 const Block* const block = worklist.pop(); 512 RegMask& old_live = live[block->_pre_order]; 513 RegMask new_live; 514 515 // Initialize to union of successors 516 for (uint i = 0; i < block->_num_succs; i++) { 517 const uint succ_id = block->_succs[i]->_pre_order; 518 new_live.OR(live[succ_id]); 519 } 520 521 // Walk block backwards, computing liveness 522 for (int i = block->number_of_nodes() - 1; i >= 0; --i) { 523 const Node* const node = block->get_node(i); 524 525 // Remove def bits 526 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node)); 527 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node)); 528 if (first != OptoReg::Bad) { 529 new_live.Remove(first); 530 } 531 if (second != OptoReg::Bad) { 532 new_live.Remove(second); 533 } 534 535 // Add use bits 536 for (uint j = 1; j < node->req(); ++j) { 537 const Node* const use = node->in(j); 538 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use)); 539 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use)); 540 if (first != OptoReg::Bad) { 541 new_live.Insert(first); 542 } 543 if (second != OptoReg::Bad) { 544 new_live.Insert(second); 545 } 546 } 547 548 // If this node tracks liveness, update it 549 RegMask* const regs = barrier_set_state()->live(node); 550 if (regs != nullptr) { 551 regs->OR(new_live); 552 } 553 } 554 555 // Now at block top, see if we have any changes 556 new_live.SUBTRACT(old_live); 557 if (new_live.is_NotEmpty()) { 558 // Liveness has refined, update and propagate to prior blocks 559 old_live.OR(new_live); 560 for (uint i = 1; i < block->num_preds(); ++i) { 561 Block* const pred = cfg->get_block_for_node(block->pred(i)); 562 worklist.push(pred); 563 } 564 } 565 } 566 } 567 568 #ifndef PRODUCT 569 void XBarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const { 570 if ((mach->barrier_data() & XLoadBarrierStrong) != 0) { 571 st->print("strong "); 572 } 573 if ((mach->barrier_data() & XLoadBarrierWeak) != 0) { 574 st->print("weak "); 575 } 576 if ((mach->barrier_data() & XLoadBarrierPhantom) != 0) { 577 st->print("phantom "); 578 } 579 if ((mach->barrier_data() & XLoadBarrierNoKeepalive) != 0) { 580 st->print("nokeepalive "); 581 } 582 } 583 #endif // !PRODUCT