1 /* 2 * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "code/vmreg.inline.hpp" 26 #include "gc/shared/barrierSet.hpp" 27 #include "gc/shared/tlab_globals.hpp" 28 #include "gc/shared/c2/barrierSetC2.hpp" 29 #include "opto/arraycopynode.hpp" 30 #include "opto/block.hpp" 31 #include "opto/convertnode.hpp" 32 #include "opto/graphKit.hpp" 33 #include "opto/idealKit.hpp" 34 #include "opto/macro.hpp" 35 #include "opto/narrowptrnode.hpp" 36 #include "opto/output.hpp" 37 #include "opto/regalloc.hpp" 38 #include "opto/runtime.hpp" 39 #include "utilities/macros.hpp" 40 #include CPU_HEADER(gc/shared/barrierSetAssembler) 41 42 // By default this is a no-op. 43 void BarrierSetC2::resolve_address(C2Access& access) const { } 44 45 void* C2ParseAccess::barrier_set_state() const { 46 return _kit->barrier_set_state(); 47 } 48 49 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); } 50 51 bool C2Access::needs_cpu_membar() const { 52 bool mismatched = (_decorators & C2_MISMATCHED) != 0; 53 bool is_unordered = (_decorators & MO_UNORDERED) != 0; 54 55 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 56 bool in_heap = (_decorators & IN_HEAP) != 0; 57 bool in_native = (_decorators & IN_NATIVE) != 0; 58 bool is_mixed = !in_heap && !in_native; 59 60 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 61 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 62 bool is_atomic = is_read && is_write; 63 64 if (is_atomic) { 65 // Atomics always need to be wrapped in CPU membars 66 return true; 67 } 68 69 if (anonymous) { 70 // We will need memory barriers unless we can determine a unique 71 // alias category for this reference. (Note: If for some reason 72 // the barriers get omitted and the unsafe reference begins to "pollute" 73 // the alias analysis of the rest of the graph, either Compile::can_alias 74 // or Compile::must_alias will throw a diagnostic assert.) 75 if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) { 76 return true; 77 } 78 } else { 79 assert(!is_mixed, "not unsafe"); 80 } 81 82 return false; 83 } 84 85 static BarrierSetC2State* barrier_set_state() { 86 return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state()); 87 } 88 89 RegMask& BarrierStubC2::live() const { 90 return *barrier_set_state()->live(_node); 91 } 92 93 BarrierStubC2::BarrierStubC2(const MachNode* node) 94 : _node(node), 95 _entry(), 96 _continuation(), 97 _preserve(live()) {} 98 99 Label* BarrierStubC2::entry() { 100 // The _entry will never be bound when in_scratch_emit_size() is true. 101 // However, we still need to return a label that is not bound now, but 102 // will eventually be bound. Any eventually bound label will do, as it 103 // will only act as a placeholder, so we return the _continuation label. 104 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry; 105 } 106 107 Label* BarrierStubC2::continuation() { 108 return &_continuation; 109 } 110 111 uint8_t BarrierStubC2::barrier_data() const { 112 return _node->barrier_data(); 113 } 114 115 void BarrierStubC2::preserve(Register r) { 116 const VMReg vm_reg = r->as_VMReg(); 117 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 118 _preserve.Insert(OptoReg::as_OptoReg(vm_reg)); 119 } 120 121 void BarrierStubC2::dont_preserve(Register r) { 122 VMReg vm_reg = r->as_VMReg(); 123 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 124 // Subtract the given register and all its sub-registers (e.g. {R11, R11_H} 125 // for r11 in aarch64). 126 do { 127 _preserve.Remove(OptoReg::as_OptoReg(vm_reg)); 128 vm_reg = vm_reg->next(); 129 } while (vm_reg->is_Register() && !vm_reg->is_concrete()); 130 } 131 132 const RegMask& BarrierStubC2::preserve_set() const { 133 return _preserve; 134 } 135 136 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { 137 DecoratorSet decorators = access.decorators(); 138 139 bool mismatched = (decorators & C2_MISMATCHED) != 0; 140 bool unaligned = (decorators & C2_UNALIGNED) != 0; 141 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 142 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 143 144 MemNode::MemOrd mo = access.mem_node_mo(); 145 146 Node* store; 147 BasicType bt = access.type(); 148 if (access.is_parse_access()) { 149 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 150 151 GraphKit* kit = parse_access.kit(); 152 if (bt == T_DOUBLE) { 153 Node* new_val = kit->dprecision_rounding(val.node()); 154 val.set_node(new_val); 155 } 156 157 store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt, 158 mo, requires_atomic_access, unaligned, mismatched, 159 unsafe, access.barrier_data()); 160 } else { 161 assert(access.is_opt_access(), "either parse or opt access"); 162 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 163 Node* ctl = opt_access.ctl(); 164 MergeMemNode* mm = opt_access.mem(); 165 PhaseGVN& gvn = opt_access.gvn(); 166 const TypePtr* adr_type = access.addr().type(); 167 int alias = gvn.C->get_alias_index(adr_type); 168 Node* mem = mm->memory_at(alias); 169 170 StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access); 171 if (unaligned) { 172 st->set_unaligned_access(); 173 } 174 if (mismatched) { 175 st->set_mismatched_access(); 176 } 177 st->set_barrier_data(access.barrier_data()); 178 store = gvn.transform(st); 179 if (store == st) { 180 mm->set_memory_at(alias, st); 181 } 182 } 183 access.set_raw_access(store); 184 185 return store; 186 } 187 188 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { 189 DecoratorSet decorators = access.decorators(); 190 191 Node* adr = access.addr().node(); 192 const TypePtr* adr_type = access.addr().type(); 193 194 bool mismatched = (decorators & C2_MISMATCHED) != 0; 195 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 196 bool unaligned = (decorators & C2_UNALIGNED) != 0; 197 bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0; 198 bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0; 199 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 200 bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0; 201 202 MemNode::MemOrd mo = access.mem_node_mo(); 203 LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest; 204 205 Node* load; 206 if (access.is_parse_access()) { 207 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 208 GraphKit* kit = parse_access.kit(); 209 Node* control = control_dependent ? kit->control() : nullptr; 210 211 if (immutable) { 212 Compile* C = Compile::current(); 213 Node* mem = kit->immutable_memory(); 214 load = LoadNode::make(kit->gvn(), control, mem, adr, 215 adr_type, val_type, access.type(), mo, dep, requires_atomic_access, 216 unaligned, mismatched, unsafe, access.barrier_data()); 217 load = kit->gvn().transform(load); 218 } else { 219 load = kit->make_load(control, adr, val_type, access.type(), mo, 220 dep, requires_atomic_access, unaligned, mismatched, unsafe, 221 access.barrier_data()); 222 } 223 } else { 224 assert(access.is_opt_access(), "either parse or opt access"); 225 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 226 Node* control = control_dependent ? opt_access.ctl() : nullptr; 227 MergeMemNode* mm = opt_access.mem(); 228 PhaseGVN& gvn = opt_access.gvn(); 229 Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type)); 230 load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep, 231 requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data()); 232 load = gvn.transform(load); 233 } 234 access.set_raw_access(load); 235 236 return load; 237 } 238 239 class C2AccessFence: public StackObj { 240 C2Access& _access; 241 Node* _leading_membar; 242 243 public: 244 C2AccessFence(C2Access& access) : 245 _access(access), _leading_membar(nullptr) { 246 GraphKit* kit = nullptr; 247 if (access.is_parse_access()) { 248 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 249 kit = parse_access.kit(); 250 } 251 DecoratorSet decorators = access.decorators(); 252 253 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 254 bool is_read = (decorators & C2_READ_ACCESS) != 0; 255 bool is_atomic = is_read && is_write; 256 257 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 258 bool is_release = (decorators & MO_RELEASE) != 0; 259 260 if (is_atomic) { 261 assert(kit != nullptr, "unsupported at optimization time"); 262 // Memory-model-wise, a LoadStore acts like a little synchronized 263 // block, so needs barriers on each side. These don't translate 264 // into actual barriers on most machines, but we still need rest of 265 // compiler to respect ordering. 266 if (is_release) { 267 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 268 } else if (is_volatile) { 269 if (support_IRIW_for_not_multiple_copy_atomic_cpu) { 270 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 271 } else { 272 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 273 } 274 } 275 } else if (is_write) { 276 // If reference is volatile, prevent following memory ops from 277 // floating down past the volatile write. Also prevents commoning 278 // another volatile read. 279 if (is_volatile || is_release) { 280 assert(kit != nullptr, "unsupported at optimization time"); 281 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 282 } 283 } else { 284 // Memory barrier to prevent normal and 'unsafe' accesses from 285 // bypassing each other. Happens after null checks, so the 286 // exception paths do not take memory state from the memory barrier, 287 // so there's no problems making a strong assert about mixing users 288 // of safe & unsafe memory. 289 if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) { 290 assert(kit != nullptr, "unsupported at optimization time"); 291 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 292 } 293 } 294 295 if (access.needs_cpu_membar()) { 296 assert(kit != nullptr, "unsupported at optimization time"); 297 kit->insert_mem_bar(Op_MemBarCPUOrder); 298 } 299 300 if (is_atomic) { 301 // 4984716: MemBars must be inserted before this 302 // memory node in order to avoid a false 303 // dependency which will confuse the scheduler. 304 access.set_memory(); 305 } 306 } 307 308 ~C2AccessFence() { 309 GraphKit* kit = nullptr; 310 if (_access.is_parse_access()) { 311 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access); 312 kit = parse_access.kit(); 313 } 314 DecoratorSet decorators = _access.decorators(); 315 316 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 317 bool is_read = (decorators & C2_READ_ACCESS) != 0; 318 bool is_atomic = is_read && is_write; 319 320 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 321 bool is_acquire = (decorators & MO_ACQUIRE) != 0; 322 323 // If reference is volatile, prevent following volatiles ops from 324 // floating up before the volatile access. 325 if (_access.needs_cpu_membar()) { 326 kit->insert_mem_bar(Op_MemBarCPUOrder); 327 } 328 329 if (is_atomic) { 330 assert(kit != nullptr, "unsupported at optimization time"); 331 if (is_acquire || is_volatile) { 332 Node* n = _access.raw_access(); 333 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 334 if (_leading_membar != nullptr) { 335 MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 336 } 337 } 338 } else if (is_write) { 339 // If not multiple copy atomic, we do the MemBarVolatile before the load. 340 if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) { 341 assert(kit != nullptr, "unsupported at optimization time"); 342 Node* n = _access.raw_access(); 343 Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar 344 if (_leading_membar != nullptr) { 345 MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 346 } 347 } 348 } else { 349 if (is_volatile || is_acquire) { 350 assert(kit != nullptr, "unsupported at optimization time"); 351 Node* n = _access.raw_access(); 352 assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected"); 353 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 354 mb->as_MemBar()->set_trailing_load(); 355 } 356 } 357 } 358 }; 359 360 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const { 361 C2AccessFence fence(access); 362 resolve_address(access); 363 return store_at_resolved(access, val); 364 } 365 366 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const { 367 C2AccessFence fence(access); 368 resolve_address(access); 369 return load_at_resolved(access, val_type); 370 } 371 372 MemNode::MemOrd C2Access::mem_node_mo() const { 373 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 374 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 375 if ((_decorators & MO_SEQ_CST) != 0) { 376 if (is_write && is_read) { 377 // For atomic operations 378 return MemNode::seqcst; 379 } else if (is_write) { 380 return MemNode::release; 381 } else { 382 assert(is_read, "what else?"); 383 return MemNode::acquire; 384 } 385 } else if ((_decorators & MO_RELEASE) != 0) { 386 return MemNode::release; 387 } else if ((_decorators & MO_ACQUIRE) != 0) { 388 return MemNode::acquire; 389 } else if (is_write) { 390 // Volatile fields need releasing stores. 391 // Non-volatile fields also need releasing stores if they hold an 392 // object reference, because the object reference might point to 393 // a freshly created object. 394 // Conservatively release stores of object references. 395 return StoreNode::release_if_reference(_type); 396 } else { 397 return MemNode::unordered; 398 } 399 } 400 401 void C2Access::fixup_decorators() { 402 bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0; 403 bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo; 404 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 405 406 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 407 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 408 409 if (AlwaysAtomicAccesses && is_unordered) { 410 _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits 411 _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess 412 } 413 414 _decorators = AccessInternal::decorator_fixup(_decorators, _type); 415 416 if (is_read && !is_write && anonymous) { 417 // To be valid, unsafe loads may depend on other conditions than 418 // the one that guards them: pin the Load node 419 _decorators |= C2_CONTROL_DEPENDENT_LOAD; 420 _decorators |= C2_UNKNOWN_CONTROL_LOAD; 421 const TypePtr* adr_type = _addr.type(); 422 Node* adr = _addr.node(); 423 if (!needs_cpu_membar() && adr_type->isa_instptr()) { 424 assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null"); 425 intptr_t offset = Type::OffsetBot; 426 AddPNode::Ideal_base_and_offset(adr, &gvn(), offset); 427 if (offset >= 0) { 428 int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper()); 429 if (offset < s) { 430 // Guaranteed to be a valid access, no need to pin it 431 _decorators ^= C2_CONTROL_DEPENDENT_LOAD; 432 _decorators ^= C2_UNKNOWN_CONTROL_LOAD; 433 } 434 } 435 } 436 } 437 } 438 439 //--------------------------- atomic operations--------------------------------- 440 441 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const { 442 // SCMemProjNodes represent the memory state of a LoadStore. Their 443 // main role is to prevent LoadStore nodes from being optimized away 444 // when their results aren't used. 445 assert(access.is_parse_access(), "entry not supported at optimization time"); 446 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 447 GraphKit* kit = parse_access.kit(); 448 Node* load_store = access.raw_access(); 449 assert(load_store != nullptr, "must pin atomic op"); 450 Node* proj = kit->gvn().transform(new SCMemProjNode(load_store)); 451 kit->set_memory(proj, access.alias_idx()); 452 } 453 454 void C2AtomicParseAccess::set_memory() { 455 Node *mem = _kit->memory(_alias_idx); 456 _memory = mem; 457 } 458 459 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 460 Node* new_val, const Type* value_type) const { 461 GraphKit* kit = access.kit(); 462 MemNode::MemOrd mo = access.mem_node_mo(); 463 Node* mem = access.memory(); 464 465 Node* adr = access.addr().node(); 466 const TypePtr* adr_type = access.addr().type(); 467 468 Node* load_store = nullptr; 469 470 if (access.is_oop()) { 471 #ifdef _LP64 472 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 473 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 474 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 475 load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo); 476 } else 477 #endif 478 { 479 load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo); 480 } 481 } else { 482 switch (access.type()) { 483 case T_BYTE: { 484 load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 485 break; 486 } 487 case T_SHORT: { 488 load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 489 break; 490 } 491 case T_INT: { 492 load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 493 break; 494 } 495 case T_LONG: { 496 load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 497 break; 498 } 499 default: 500 ShouldNotReachHere(); 501 } 502 } 503 504 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 505 load_store = kit->gvn().transform(load_store); 506 507 access.set_raw_access(load_store); 508 pin_atomic_op(access); 509 510 #ifdef _LP64 511 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 512 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 513 } 514 #endif 515 516 return load_store; 517 } 518 519 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 520 Node* new_val, const Type* value_type) const { 521 GraphKit* kit = access.kit(); 522 DecoratorSet decorators = access.decorators(); 523 MemNode::MemOrd mo = access.mem_node_mo(); 524 Node* mem = access.memory(); 525 bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0; 526 Node* load_store = nullptr; 527 Node* adr = access.addr().node(); 528 529 if (access.is_oop()) { 530 #ifdef _LP64 531 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 532 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 533 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 534 if (is_weak_cas) { 535 load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 536 } else { 537 load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 538 } 539 } else 540 #endif 541 { 542 if (is_weak_cas) { 543 load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 544 } else { 545 load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 546 } 547 } 548 } else { 549 switch(access.type()) { 550 case T_BYTE: { 551 if (is_weak_cas) { 552 load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 553 } else { 554 load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 555 } 556 break; 557 } 558 case T_SHORT: { 559 if (is_weak_cas) { 560 load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 561 } else { 562 load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 563 } 564 break; 565 } 566 case T_INT: { 567 if (is_weak_cas) { 568 load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 569 } else { 570 load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 571 } 572 break; 573 } 574 case T_LONG: { 575 if (is_weak_cas) { 576 load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 577 } else { 578 load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 579 } 580 break; 581 } 582 default: 583 ShouldNotReachHere(); 584 } 585 } 586 587 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 588 load_store = kit->gvn().transform(load_store); 589 590 access.set_raw_access(load_store); 591 pin_atomic_op(access); 592 593 return load_store; 594 } 595 596 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 597 GraphKit* kit = access.kit(); 598 Node* mem = access.memory(); 599 Node* adr = access.addr().node(); 600 const TypePtr* adr_type = access.addr().type(); 601 Node* load_store = nullptr; 602 603 if (access.is_oop()) { 604 #ifdef _LP64 605 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 606 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 607 load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop())); 608 } else 609 #endif 610 { 611 load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()); 612 } 613 } else { 614 switch (access.type()) { 615 case T_BYTE: 616 load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type); 617 break; 618 case T_SHORT: 619 load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type); 620 break; 621 case T_INT: 622 load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type); 623 break; 624 case T_LONG: 625 load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type); 626 break; 627 default: 628 ShouldNotReachHere(); 629 } 630 } 631 632 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 633 load_store = kit->gvn().transform(load_store); 634 635 access.set_raw_access(load_store); 636 pin_atomic_op(access); 637 638 #ifdef _LP64 639 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 640 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 641 } 642 #endif 643 644 return load_store; 645 } 646 647 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 648 Node* load_store = nullptr; 649 GraphKit* kit = access.kit(); 650 Node* adr = access.addr().node(); 651 const TypePtr* adr_type = access.addr().type(); 652 Node* mem = access.memory(); 653 654 switch(access.type()) { 655 case T_BYTE: 656 load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type); 657 break; 658 case T_SHORT: 659 load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type); 660 break; 661 case T_INT: 662 load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type); 663 break; 664 case T_LONG: 665 load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type); 666 break; 667 default: 668 ShouldNotReachHere(); 669 } 670 671 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 672 load_store = kit->gvn().transform(load_store); 673 674 access.set_raw_access(load_store); 675 pin_atomic_op(access); 676 677 return load_store; 678 } 679 680 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val, 681 Node* new_val, const Type* value_type) const { 682 C2AccessFence fence(access); 683 resolve_address(access); 684 return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); 685 } 686 687 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val, 688 Node* new_val, const Type* value_type) const { 689 C2AccessFence fence(access); 690 resolve_address(access); 691 return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); 692 } 693 694 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 695 C2AccessFence fence(access); 696 resolve_address(access); 697 return atomic_xchg_at_resolved(access, new_val, value_type); 698 } 699 700 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 701 C2AccessFence fence(access); 702 resolve_address(access); 703 return atomic_add_at_resolved(access, new_val, value_type); 704 } 705 706 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) { 707 // Exclude the header but include array length to copy by 8 bytes words. 708 // Can't use base_offset_in_bytes(bt) since basic type is unknown. 709 int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() : 710 instanceOopDesc::base_offset_in_bytes(); 711 // base_off: 712 // 8 - 32-bit VM or 64-bit VM, compact headers 713 // 12 - 64-bit VM, compressed klass 714 // 16 - 64-bit VM, normal klass 715 if (base_off % BytesPerLong != 0) { 716 assert(UseCompressedClassPointers, ""); 717 assert(!UseCompactObjectHeaders, ""); 718 if (is_array) { 719 // Exclude length to copy by 8 bytes words. 720 base_off += sizeof(int); 721 } else { 722 // Include klass to copy by 8 bytes words. 723 base_off = instanceOopDesc::klass_offset_in_bytes(); 724 } 725 assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment"); 726 } 727 return base_off; 728 } 729 730 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const { 731 int base_off = arraycopy_payload_base_offset(is_array); 732 Node* payload_size = size; 733 Node* offset = kit->MakeConX(base_off); 734 payload_size = kit->gvn().transform(new SubXNode(payload_size, offset)); 735 if (is_array) { 736 // Ensure the array payload size is rounded up to the next BytesPerLong 737 // multiple when converting to double-words. This is necessary because array 738 // size does not include object alignment padding, so it might not be a 739 // multiple of BytesPerLong for sub-long element types. 740 payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1))); 741 } 742 payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong))); 743 ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false); 744 if (is_array) { 745 ac->set_clone_array(); 746 } else { 747 ac->set_clone_inst(); 748 } 749 Node* n = kit->gvn().transform(ac); 750 if (n == ac) { 751 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 752 ac->set_adr_type(TypeRawPtr::BOTTOM); 753 kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type); 754 } else { 755 kit->set_all_memory(n); 756 } 757 } 758 759 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes, 760 Node*& i_o, Node*& needgc_ctrl, 761 Node*& fast_oop_ctrl, Node*& fast_oop_rawmem, 762 intx prefetch_lines) const { 763 assert(UseTLAB, "Only for TLAB enabled allocations"); 764 765 Node* thread = macro->transform_later(new ThreadLocalNode()); 766 Node* tlab_top_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_top_offset())); 767 Node* tlab_end_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_end_offset())); 768 769 // Load TLAB end. 770 // 771 // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around 772 // a bug where these values were being moved across 773 // a safepoint. These are not oops, so they cannot be include in the oop 774 // map, but they can be changed by a GC. The proper way to fix this would 775 // be to set the raw memory state when generating a SafepointNode. However 776 // this will require extensive changes to the loop optimization in order to 777 // prevent a degradation of the optimization. 778 // See comment in memnode.hpp, around line 227 in class LoadPNode. 779 Node* tlab_end = macro->make_load(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); 780 781 // Load the TLAB top. 782 Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered); 783 macro->transform_later(old_tlab_top); 784 785 // Add to heap top to get a new TLAB top 786 Node* new_tlab_top = new AddPNode(macro->top(), old_tlab_top, size_in_bytes); 787 macro->transform_later(new_tlab_top); 788 789 // Check against TLAB end 790 Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end); 791 macro->transform_later(tlab_full); 792 793 Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge); 794 macro->transform_later(needgc_bol); 795 IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN); 796 macro->transform_later(needgc_iff); 797 798 // Plug the failing-heap-space-need-gc test into the slow-path region 799 Node* needgc_true = new IfTrueNode(needgc_iff); 800 macro->transform_later(needgc_true); 801 needgc_ctrl = needgc_true; 802 803 // No need for a GC. 804 Node* needgc_false = new IfFalseNode(needgc_iff); 805 macro->transform_later(needgc_false); 806 807 // Fast path: 808 i_o = macro->prefetch_allocation(i_o, needgc_false, mem, 809 old_tlab_top, new_tlab_top, prefetch_lines); 810 811 // Store the modified TLAB top back down. 812 Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr, 813 TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered); 814 macro->transform_later(store_tlab_top); 815 816 fast_oop_ctrl = needgc_false; 817 fast_oop_rawmem = store_tlab_top; 818 return old_tlab_top; 819 } 820 821 static const TypeFunc* clone_type() { 822 // Create input type (domain) 823 int argcnt = NOT_LP64(3) LP64_ONLY(4); 824 const Type** const domain_fields = TypeTuple::fields(argcnt); 825 int argp = TypeFunc::Parms; 826 domain_fields[argp++] = TypeInstPtr::NOTNULL; // src 827 domain_fields[argp++] = TypeInstPtr::NOTNULL; // dst 828 domain_fields[argp++] = TypeX_X; // size lower 829 LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper 830 assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); 831 const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields); 832 833 // Create result type (range) 834 const Type** const range_fields = TypeTuple::fields(0); 835 const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields); 836 837 return TypeFunc::make(domain, range); 838 } 839 840 #define XTOP LP64_ONLY(COMMA phase->top()) 841 842 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac, 843 address clone_addr, const char* clone_name) const { 844 Node* const ctrl = ac->in(TypeFunc::Control); 845 Node* const mem = ac->in(TypeFunc::Memory); 846 Node* const src = ac->in(ArrayCopyNode::Src); 847 Node* const dst = ac->in(ArrayCopyNode::Dest); 848 Node* const size = ac->in(ArrayCopyNode::Length); 849 850 assert(size->bottom_type()->base() == Type_X, 851 "Should be of object size type (int for 32 bits, long for 64 bits)"); 852 853 // The native clone we are calling here expects the object size in words. 854 // Add header/offset size to payload size to get object size. 855 Node* const base_offset = phase->MakeConX(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong); 856 Node* const full_size = phase->transform_later(new AddXNode(size, base_offset)); 857 // HeapAccess<>::clone expects size in heap words. 858 // For 64-bits platforms, this is a no-operation. 859 // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2). 860 Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong))); 861 862 Node* const call = phase->make_leaf_call(ctrl, 863 mem, 864 clone_type(), 865 clone_addr, 866 clone_name, 867 TypeRawPtr::BOTTOM, 868 src, dst, full_size_in_heap_words XTOP); 869 phase->transform_later(call); 870 phase->igvn().replace_node(ac, call); 871 } 872 873 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const { 874 Node* ctrl = ac->in(TypeFunc::Control); 875 Node* mem = ac->in(TypeFunc::Memory); 876 Node* src = ac->in(ArrayCopyNode::Src); 877 Node* src_offset = ac->in(ArrayCopyNode::SrcPos); 878 Node* dest = ac->in(ArrayCopyNode::Dest); 879 Node* dest_offset = ac->in(ArrayCopyNode::DestPos); 880 Node* length = ac->in(ArrayCopyNode::Length); 881 882 Node* payload_src = phase->basic_plus_adr(src, src_offset); 883 Node* payload_dst = phase->basic_plus_adr(dest, dest_offset); 884 885 const char* copyfunc_name = "arraycopy"; 886 address copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true); 887 888 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 889 const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type(); 890 891 Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP); 892 phase->transform_later(call); 893 894 phase->igvn().replace_node(ac, call); 895 } 896 897 #undef XTOP 898 899 void BarrierSetC2::compute_liveness_at_stubs() const { 900 ResourceMark rm; 901 Compile* const C = Compile::current(); 902 Arena* const A = Thread::current()->resource_area(); 903 PhaseCFG* const cfg = C->cfg(); 904 PhaseRegAlloc* const regalloc = C->regalloc(); 905 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask)); 906 BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler(); 907 BarrierSetC2State* bs_state = barrier_set_state(); 908 Block_List worklist; 909 910 for (uint i = 0; i < cfg->number_of_blocks(); ++i) { 911 new ((void*)(live + i)) RegMask(); 912 worklist.push(cfg->get_block(i)); 913 } 914 915 while (worklist.size() > 0) { 916 const Block* const block = worklist.pop(); 917 RegMask& old_live = live[block->_pre_order]; 918 RegMask new_live; 919 920 // Initialize to union of successors 921 for (uint i = 0; i < block->_num_succs; i++) { 922 const uint succ_id = block->_succs[i]->_pre_order; 923 new_live.OR(live[succ_id]); 924 } 925 926 // Walk block backwards, computing liveness 927 for (int i = block->number_of_nodes() - 1; i >= 0; --i) { 928 const Node* const node = block->get_node(i); 929 930 // If this node tracks out-liveness, update it 931 if (!bs_state->needs_livein_data()) { 932 RegMask* const regs = bs_state->live(node); 933 if (regs != nullptr) { 934 regs->OR(new_live); 935 } 936 } 937 938 // Remove def bits 939 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node)); 940 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node)); 941 if (first != OptoReg::Bad) { 942 new_live.Remove(first); 943 } 944 if (second != OptoReg::Bad) { 945 new_live.Remove(second); 946 } 947 948 // Add use bits 949 for (uint j = 1; j < node->req(); ++j) { 950 const Node* const use = node->in(j); 951 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use)); 952 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use)); 953 if (first != OptoReg::Bad) { 954 new_live.Insert(first); 955 } 956 if (second != OptoReg::Bad) { 957 new_live.Insert(second); 958 } 959 } 960 961 // If this node tracks in-liveness, update it 962 if (bs_state->needs_livein_data()) { 963 RegMask* const regs = bs_state->live(node); 964 if (regs != nullptr) { 965 regs->OR(new_live); 966 } 967 } 968 } 969 970 // Now at block top, see if we have any changes 971 new_live.SUBTRACT(old_live); 972 if (new_live.is_NotEmpty()) { 973 // Liveness has refined, update and propagate to prior blocks 974 old_live.OR(new_live); 975 for (uint i = 1; i < block->num_preds(); ++i) { 976 Block* const pred = cfg->get_block_for_node(block->pred(i)); 977 worklist.push(pred); 978 } 979 } 980 } 981 }