1 /* 2 * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "code/vmreg.inline.hpp" 27 #include "gc/shared/barrierSet.hpp" 28 #include "gc/shared/tlab_globals.hpp" 29 #include "gc/shared/c2/barrierSetC2.hpp" 30 #include "opto/arraycopynode.hpp" 31 #include "opto/block.hpp" 32 #include "opto/convertnode.hpp" 33 #include "opto/graphKit.hpp" 34 #include "opto/idealKit.hpp" 35 #include "opto/macro.hpp" 36 #include "opto/narrowptrnode.hpp" 37 #include "opto/output.hpp" 38 #include "opto/regalloc.hpp" 39 #include "opto/runtime.hpp" 40 #include "utilities/macros.hpp" 41 #include CPU_HEADER(gc/shared/barrierSetAssembler) 42 43 // By default this is a no-op. 44 void BarrierSetC2::resolve_address(C2Access& access) const { } 45 46 void* C2ParseAccess::barrier_set_state() const { 47 return _kit->barrier_set_state(); 48 } 49 50 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); } 51 52 bool C2Access::needs_cpu_membar() const { 53 bool mismatched = (_decorators & C2_MISMATCHED) != 0; 54 bool is_unordered = (_decorators & MO_UNORDERED) != 0; 55 56 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 57 bool in_heap = (_decorators & IN_HEAP) != 0; 58 bool in_native = (_decorators & IN_NATIVE) != 0; 59 bool is_mixed = !in_heap && !in_native; 60 61 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 62 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 63 bool is_atomic = is_read && is_write; 64 65 if (is_atomic) { 66 // Atomics always need to be wrapped in CPU membars 67 return true; 68 } 69 70 if (anonymous) { 71 // We will need memory barriers unless we can determine a unique 72 // alias category for this reference. (Note: If for some reason 73 // the barriers get omitted and the unsafe reference begins to "pollute" 74 // the alias analysis of the rest of the graph, either Compile::can_alias 75 // or Compile::must_alias will throw a diagnostic assert.) 76 if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) { 77 return true; 78 } 79 } else { 80 assert(!is_mixed, "not unsafe"); 81 } 82 83 return false; 84 } 85 86 static BarrierSetC2State* barrier_set_state() { 87 return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state()); 88 } 89 90 RegMask& BarrierStubC2::live() const { 91 return *barrier_set_state()->live(_node); 92 } 93 94 BarrierStubC2::BarrierStubC2(const MachNode* node) 95 : _node(node), 96 _entry(), 97 _continuation(), 98 _preserve(live()) {} 99 100 Label* BarrierStubC2::entry() { 101 // The _entry will never be bound when in_scratch_emit_size() is true. 102 // However, we still need to return a label that is not bound now, but 103 // will eventually be bound. Any eventually bound label will do, as it 104 // will only act as a placeholder, so we return the _continuation label. 105 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry; 106 } 107 108 Label* BarrierStubC2::continuation() { 109 return &_continuation; 110 } 111 112 uint8_t BarrierStubC2::barrier_data() const { 113 return _node->barrier_data(); 114 } 115 116 void BarrierStubC2::preserve(Register r) { 117 const VMReg vm_reg = r->as_VMReg(); 118 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 119 _preserve.Insert(OptoReg::as_OptoReg(vm_reg)); 120 } 121 122 void BarrierStubC2::dont_preserve(Register r) { 123 VMReg vm_reg = r->as_VMReg(); 124 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 125 // Subtract the given register and all its sub-registers (e.g. {R11, R11_H} 126 // for r11 in aarch64). 127 do { 128 _preserve.Remove(OptoReg::as_OptoReg(vm_reg)); 129 vm_reg = vm_reg->next(); 130 } while (vm_reg->is_Register() && !vm_reg->is_concrete()); 131 } 132 133 const RegMask& BarrierStubC2::preserve_set() const { 134 return _preserve; 135 } 136 137 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { 138 DecoratorSet decorators = access.decorators(); 139 140 bool mismatched = (decorators & C2_MISMATCHED) != 0; 141 bool unaligned = (decorators & C2_UNALIGNED) != 0; 142 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 143 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 144 145 MemNode::MemOrd mo = access.mem_node_mo(); 146 147 Node* store; 148 BasicType bt = access.type(); 149 if (access.is_parse_access()) { 150 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 151 152 GraphKit* kit = parse_access.kit(); 153 if (bt == T_DOUBLE) { 154 Node* new_val = kit->dprecision_rounding(val.node()); 155 val.set_node(new_val); 156 } 157 158 store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt, 159 mo, requires_atomic_access, unaligned, mismatched, 160 unsafe, access.barrier_data()); 161 } else { 162 assert(access.is_opt_access(), "either parse or opt access"); 163 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 164 Node* ctl = opt_access.ctl(); 165 MergeMemNode* mm = opt_access.mem(); 166 PhaseGVN& gvn = opt_access.gvn(); 167 const TypePtr* adr_type = access.addr().type(); 168 int alias = gvn.C->get_alias_index(adr_type); 169 Node* mem = mm->memory_at(alias); 170 171 StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access); 172 if (unaligned) { 173 st->set_unaligned_access(); 174 } 175 if (mismatched) { 176 st->set_mismatched_access(); 177 } 178 st->set_barrier_data(access.barrier_data()); 179 store = gvn.transform(st); 180 if (store == st) { 181 mm->set_memory_at(alias, st); 182 } 183 } 184 access.set_raw_access(store); 185 186 return store; 187 } 188 189 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { 190 DecoratorSet decorators = access.decorators(); 191 192 Node* adr = access.addr().node(); 193 const TypePtr* adr_type = access.addr().type(); 194 195 bool mismatched = (decorators & C2_MISMATCHED) != 0; 196 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 197 bool unaligned = (decorators & C2_UNALIGNED) != 0; 198 bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0; 199 bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0; 200 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 201 bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0; 202 203 MemNode::MemOrd mo = access.mem_node_mo(); 204 LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest; 205 206 Node* load; 207 if (access.is_parse_access()) { 208 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 209 GraphKit* kit = parse_access.kit(); 210 Node* control = control_dependent ? kit->control() : nullptr; 211 212 if (immutable) { 213 Compile* C = Compile::current(); 214 Node* mem = kit->immutable_memory(); 215 load = LoadNode::make(kit->gvn(), control, mem, adr, 216 adr_type, val_type, access.type(), mo, dep, requires_atomic_access, 217 unaligned, mismatched, unsafe, access.barrier_data()); 218 load = kit->gvn().transform(load); 219 } else { 220 load = kit->make_load(control, adr, val_type, access.type(), mo, 221 dep, requires_atomic_access, unaligned, mismatched, unsafe, 222 access.barrier_data()); 223 } 224 } else { 225 assert(access.is_opt_access(), "either parse or opt access"); 226 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 227 Node* control = control_dependent ? opt_access.ctl() : nullptr; 228 MergeMemNode* mm = opt_access.mem(); 229 PhaseGVN& gvn = opt_access.gvn(); 230 Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type)); 231 load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep, 232 requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data()); 233 load = gvn.transform(load); 234 } 235 access.set_raw_access(load); 236 237 return load; 238 } 239 240 class C2AccessFence: public StackObj { 241 C2Access& _access; 242 Node* _leading_membar; 243 244 public: 245 C2AccessFence(C2Access& access) : 246 _access(access), _leading_membar(nullptr) { 247 GraphKit* kit = nullptr; 248 if (access.is_parse_access()) { 249 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 250 kit = parse_access.kit(); 251 } 252 DecoratorSet decorators = access.decorators(); 253 254 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 255 bool is_read = (decorators & C2_READ_ACCESS) != 0; 256 bool is_atomic = is_read && is_write; 257 258 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 259 bool is_release = (decorators & MO_RELEASE) != 0; 260 261 if (is_atomic) { 262 assert(kit != nullptr, "unsupported at optimization time"); 263 // Memory-model-wise, a LoadStore acts like a little synchronized 264 // block, so needs barriers on each side. These don't translate 265 // into actual barriers on most machines, but we still need rest of 266 // compiler to respect ordering. 267 if (is_release) { 268 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 269 } else if (is_volatile) { 270 if (support_IRIW_for_not_multiple_copy_atomic_cpu) { 271 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 272 } else { 273 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 274 } 275 } 276 } else if (is_write) { 277 // If reference is volatile, prevent following memory ops from 278 // floating down past the volatile write. Also prevents commoning 279 // another volatile read. 280 if (is_volatile || is_release) { 281 assert(kit != nullptr, "unsupported at optimization time"); 282 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 283 } 284 } else { 285 // Memory barrier to prevent normal and 'unsafe' accesses from 286 // bypassing each other. Happens after null checks, so the 287 // exception paths do not take memory state from the memory barrier, 288 // so there's no problems making a strong assert about mixing users 289 // of safe & unsafe memory. 290 if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) { 291 assert(kit != nullptr, "unsupported at optimization time"); 292 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 293 } 294 } 295 296 if (access.needs_cpu_membar()) { 297 assert(kit != nullptr, "unsupported at optimization time"); 298 kit->insert_mem_bar(Op_MemBarCPUOrder); 299 } 300 301 if (is_atomic) { 302 // 4984716: MemBars must be inserted before this 303 // memory node in order to avoid a false 304 // dependency which will confuse the scheduler. 305 access.set_memory(); 306 } 307 } 308 309 ~C2AccessFence() { 310 GraphKit* kit = nullptr; 311 if (_access.is_parse_access()) { 312 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access); 313 kit = parse_access.kit(); 314 } 315 DecoratorSet decorators = _access.decorators(); 316 317 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 318 bool is_read = (decorators & C2_READ_ACCESS) != 0; 319 bool is_atomic = is_read && is_write; 320 321 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 322 bool is_acquire = (decorators & MO_ACQUIRE) != 0; 323 324 // If reference is volatile, prevent following volatiles ops from 325 // floating up before the volatile access. 326 if (_access.needs_cpu_membar()) { 327 kit->insert_mem_bar(Op_MemBarCPUOrder); 328 } 329 330 if (is_atomic) { 331 assert(kit != nullptr, "unsupported at optimization time"); 332 if (is_acquire || is_volatile) { 333 Node* n = _access.raw_access(); 334 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 335 if (_leading_membar != nullptr) { 336 MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 337 } 338 } 339 } else if (is_write) { 340 // If not multiple copy atomic, we do the MemBarVolatile before the load. 341 if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) { 342 assert(kit != nullptr, "unsupported at optimization time"); 343 Node* n = _access.raw_access(); 344 Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar 345 if (_leading_membar != nullptr) { 346 MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 347 } 348 } 349 } else { 350 if (is_volatile || is_acquire) { 351 assert(kit != nullptr, "unsupported at optimization time"); 352 Node* n = _access.raw_access(); 353 assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected"); 354 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 355 mb->as_MemBar()->set_trailing_load(); 356 } 357 } 358 } 359 }; 360 361 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const { 362 C2AccessFence fence(access); 363 resolve_address(access); 364 return store_at_resolved(access, val); 365 } 366 367 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const { 368 C2AccessFence fence(access); 369 resolve_address(access); 370 return load_at_resolved(access, val_type); 371 } 372 373 MemNode::MemOrd C2Access::mem_node_mo() const { 374 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 375 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 376 if ((_decorators & MO_SEQ_CST) != 0) { 377 if (is_write && is_read) { 378 // For atomic operations 379 return MemNode::seqcst; 380 } else if (is_write) { 381 return MemNode::release; 382 } else { 383 assert(is_read, "what else?"); 384 return MemNode::acquire; 385 } 386 } else if ((_decorators & MO_RELEASE) != 0) { 387 return MemNode::release; 388 } else if ((_decorators & MO_ACQUIRE) != 0) { 389 return MemNode::acquire; 390 } else if (is_write) { 391 // Volatile fields need releasing stores. 392 // Non-volatile fields also need releasing stores if they hold an 393 // object reference, because the object reference might point to 394 // a freshly created object. 395 // Conservatively release stores of object references. 396 return StoreNode::release_if_reference(_type); 397 } else { 398 return MemNode::unordered; 399 } 400 } 401 402 void C2Access::fixup_decorators() { 403 bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0; 404 bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo; 405 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 406 407 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 408 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 409 410 if (AlwaysAtomicAccesses && is_unordered) { 411 _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits 412 _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess 413 } 414 415 _decorators = AccessInternal::decorator_fixup(_decorators, _type); 416 417 if (is_read && !is_write && anonymous) { 418 // To be valid, unsafe loads may depend on other conditions than 419 // the one that guards them: pin the Load node 420 _decorators |= C2_CONTROL_DEPENDENT_LOAD; 421 _decorators |= C2_UNKNOWN_CONTROL_LOAD; 422 const TypePtr* adr_type = _addr.type(); 423 Node* adr = _addr.node(); 424 if (!needs_cpu_membar() && adr_type->isa_instptr()) { 425 assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null"); 426 intptr_t offset = Type::OffsetBot; 427 AddPNode::Ideal_base_and_offset(adr, &gvn(), offset); 428 if (offset >= 0) { 429 int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper()); 430 if (offset < s) { 431 // Guaranteed to be a valid access, no need to pin it 432 _decorators ^= C2_CONTROL_DEPENDENT_LOAD; 433 _decorators ^= C2_UNKNOWN_CONTROL_LOAD; 434 } 435 } 436 } 437 } 438 } 439 440 //--------------------------- atomic operations--------------------------------- 441 442 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const { 443 // SCMemProjNodes represent the memory state of a LoadStore. Their 444 // main role is to prevent LoadStore nodes from being optimized away 445 // when their results aren't used. 446 assert(access.is_parse_access(), "entry not supported at optimization time"); 447 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 448 GraphKit* kit = parse_access.kit(); 449 Node* load_store = access.raw_access(); 450 assert(load_store != nullptr, "must pin atomic op"); 451 Node* proj = kit->gvn().transform(new SCMemProjNode(load_store)); 452 kit->set_memory(proj, access.alias_idx()); 453 } 454 455 void C2AtomicParseAccess::set_memory() { 456 Node *mem = _kit->memory(_alias_idx); 457 _memory = mem; 458 } 459 460 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 461 Node* new_val, const Type* value_type) const { 462 GraphKit* kit = access.kit(); 463 MemNode::MemOrd mo = access.mem_node_mo(); 464 Node* mem = access.memory(); 465 466 Node* adr = access.addr().node(); 467 const TypePtr* adr_type = access.addr().type(); 468 469 Node* load_store = nullptr; 470 471 if (access.is_oop()) { 472 #ifdef _LP64 473 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 474 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 475 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 476 load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo); 477 } else 478 #endif 479 { 480 load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo); 481 } 482 } else { 483 switch (access.type()) { 484 case T_BYTE: { 485 load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 486 break; 487 } 488 case T_SHORT: { 489 load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 490 break; 491 } 492 case T_INT: { 493 load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 494 break; 495 } 496 case T_LONG: { 497 load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 498 break; 499 } 500 default: 501 ShouldNotReachHere(); 502 } 503 } 504 505 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 506 load_store = kit->gvn().transform(load_store); 507 508 access.set_raw_access(load_store); 509 pin_atomic_op(access); 510 511 #ifdef _LP64 512 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 513 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 514 } 515 #endif 516 517 return load_store; 518 } 519 520 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 521 Node* new_val, const Type* value_type) const { 522 GraphKit* kit = access.kit(); 523 DecoratorSet decorators = access.decorators(); 524 MemNode::MemOrd mo = access.mem_node_mo(); 525 Node* mem = access.memory(); 526 bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0; 527 Node* load_store = nullptr; 528 Node* adr = access.addr().node(); 529 530 if (access.is_oop()) { 531 #ifdef _LP64 532 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 533 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 534 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 535 if (is_weak_cas) { 536 load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 537 } else { 538 load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 539 } 540 } else 541 #endif 542 { 543 if (is_weak_cas) { 544 load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 545 } else { 546 load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 547 } 548 } 549 } else { 550 switch(access.type()) { 551 case T_BYTE: { 552 if (is_weak_cas) { 553 load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 554 } else { 555 load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 556 } 557 break; 558 } 559 case T_SHORT: { 560 if (is_weak_cas) { 561 load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 562 } else { 563 load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 564 } 565 break; 566 } 567 case T_INT: { 568 if (is_weak_cas) { 569 load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 570 } else { 571 load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 572 } 573 break; 574 } 575 case T_LONG: { 576 if (is_weak_cas) { 577 load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 578 } else { 579 load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 580 } 581 break; 582 } 583 default: 584 ShouldNotReachHere(); 585 } 586 } 587 588 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 589 load_store = kit->gvn().transform(load_store); 590 591 access.set_raw_access(load_store); 592 pin_atomic_op(access); 593 594 return load_store; 595 } 596 597 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 598 GraphKit* kit = access.kit(); 599 Node* mem = access.memory(); 600 Node* adr = access.addr().node(); 601 const TypePtr* adr_type = access.addr().type(); 602 Node* load_store = nullptr; 603 604 if (access.is_oop()) { 605 #ifdef _LP64 606 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 607 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 608 load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop())); 609 } else 610 #endif 611 { 612 load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()); 613 } 614 } else { 615 switch (access.type()) { 616 case T_BYTE: 617 load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type); 618 break; 619 case T_SHORT: 620 load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type); 621 break; 622 case T_INT: 623 load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type); 624 break; 625 case T_LONG: 626 load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type); 627 break; 628 default: 629 ShouldNotReachHere(); 630 } 631 } 632 633 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 634 load_store = kit->gvn().transform(load_store); 635 636 access.set_raw_access(load_store); 637 pin_atomic_op(access); 638 639 #ifdef _LP64 640 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 641 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 642 } 643 #endif 644 645 return load_store; 646 } 647 648 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 649 Node* load_store = nullptr; 650 GraphKit* kit = access.kit(); 651 Node* adr = access.addr().node(); 652 const TypePtr* adr_type = access.addr().type(); 653 Node* mem = access.memory(); 654 655 switch(access.type()) { 656 case T_BYTE: 657 load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type); 658 break; 659 case T_SHORT: 660 load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type); 661 break; 662 case T_INT: 663 load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type); 664 break; 665 case T_LONG: 666 load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type); 667 break; 668 default: 669 ShouldNotReachHere(); 670 } 671 672 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 673 load_store = kit->gvn().transform(load_store); 674 675 access.set_raw_access(load_store); 676 pin_atomic_op(access); 677 678 return load_store; 679 } 680 681 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val, 682 Node* new_val, const Type* value_type) const { 683 C2AccessFence fence(access); 684 resolve_address(access); 685 return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); 686 } 687 688 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val, 689 Node* new_val, const Type* value_type) const { 690 C2AccessFence fence(access); 691 resolve_address(access); 692 return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); 693 } 694 695 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 696 C2AccessFence fence(access); 697 resolve_address(access); 698 return atomic_xchg_at_resolved(access, new_val, value_type); 699 } 700 701 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 702 C2AccessFence fence(access); 703 resolve_address(access); 704 return atomic_add_at_resolved(access, new_val, value_type); 705 } 706 707 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) { 708 // Exclude the header but include array length to copy by 8 bytes words. 709 // Can't use base_offset_in_bytes(bt) since basic type is unknown. 710 int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() : 711 instanceOopDesc::base_offset_in_bytes(); 712 // base_off: 713 // 8 - 32-bit VM or 64-bit VM, compact headers 714 // 12 - 64-bit VM, compressed klass 715 // 16 - 64-bit VM, normal klass 716 if (base_off % BytesPerLong != 0) { 717 assert(UseCompressedClassPointers, ""); 718 assert(!UseCompactObjectHeaders, ""); 719 if (is_array) { 720 // Exclude length to copy by 8 bytes words. 721 base_off += sizeof(int); 722 } else { 723 // Include klass to copy by 8 bytes words. 724 base_off = instanceOopDesc::klass_offset_in_bytes(); 725 } 726 assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment"); 727 } 728 return base_off; 729 } 730 731 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const { 732 int base_off = arraycopy_payload_base_offset(is_array); 733 Node* payload_size = size; 734 Node* offset = kit->MakeConX(base_off); 735 payload_size = kit->gvn().transform(new SubXNode(payload_size, offset)); 736 if (is_array) { 737 // Ensure the array payload size is rounded up to the next BytesPerLong 738 // multiple when converting to double-words. This is necessary because array 739 // size does not include object alignment padding, so it might not be a 740 // multiple of BytesPerLong for sub-long element types. 741 payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1))); 742 } 743 payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong))); 744 ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false); 745 if (is_array) { 746 ac->set_clone_array(); 747 } else { 748 ac->set_clone_inst(); 749 } 750 Node* n = kit->gvn().transform(ac); 751 if (n == ac) { 752 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 753 ac->set_adr_type(TypeRawPtr::BOTTOM); 754 kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type); 755 } else { 756 kit->set_all_memory(n); 757 } 758 } 759 760 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes, 761 Node*& i_o, Node*& needgc_ctrl, 762 Node*& fast_oop_ctrl, Node*& fast_oop_rawmem, 763 intx prefetch_lines) const { 764 assert(UseTLAB, "Only for TLAB enabled allocations"); 765 766 Node* thread = macro->transform_later(new ThreadLocalNode()); 767 Node* tlab_top_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_top_offset())); 768 Node* tlab_end_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_end_offset())); 769 770 // Load TLAB end. 771 // 772 // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around 773 // a bug where these values were being moved across 774 // a safepoint. These are not oops, so they cannot be include in the oop 775 // map, but they can be changed by a GC. The proper way to fix this would 776 // be to set the raw memory state when generating a SafepointNode. However 777 // this will require extensive changes to the loop optimization in order to 778 // prevent a degradation of the optimization. 779 // See comment in memnode.hpp, around line 227 in class LoadPNode. 780 Node* tlab_end = macro->make_load(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); 781 782 // Load the TLAB top. 783 Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered); 784 macro->transform_later(old_tlab_top); 785 786 // Add to heap top to get a new TLAB top 787 Node* new_tlab_top = new AddPNode(macro->top(), old_tlab_top, size_in_bytes); 788 macro->transform_later(new_tlab_top); 789 790 // Check against TLAB end 791 Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end); 792 macro->transform_later(tlab_full); 793 794 Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge); 795 macro->transform_later(needgc_bol); 796 IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN); 797 macro->transform_later(needgc_iff); 798 799 // Plug the failing-heap-space-need-gc test into the slow-path region 800 Node* needgc_true = new IfTrueNode(needgc_iff); 801 macro->transform_later(needgc_true); 802 needgc_ctrl = needgc_true; 803 804 // No need for a GC. 805 Node* needgc_false = new IfFalseNode(needgc_iff); 806 macro->transform_later(needgc_false); 807 808 // Fast path: 809 i_o = macro->prefetch_allocation(i_o, needgc_false, mem, 810 old_tlab_top, new_tlab_top, prefetch_lines); 811 812 // Store the modified TLAB top back down. 813 Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr, 814 TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered); 815 macro->transform_later(store_tlab_top); 816 817 fast_oop_ctrl = needgc_false; 818 fast_oop_rawmem = store_tlab_top; 819 return old_tlab_top; 820 } 821 822 static const TypeFunc* clone_type() { 823 // Create input type (domain) 824 int argcnt = NOT_LP64(3) LP64_ONLY(4); 825 const Type** const domain_fields = TypeTuple::fields(argcnt); 826 int argp = TypeFunc::Parms; 827 domain_fields[argp++] = TypeInstPtr::NOTNULL; // src 828 domain_fields[argp++] = TypeInstPtr::NOTNULL; // dst 829 domain_fields[argp++] = TypeX_X; // size lower 830 LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper 831 assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); 832 const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields); 833 834 // Create result type (range) 835 const Type** const range_fields = TypeTuple::fields(0); 836 const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields); 837 838 return TypeFunc::make(domain, range); 839 } 840 841 #define XTOP LP64_ONLY(COMMA phase->top()) 842 843 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac, 844 address clone_addr, const char* clone_name) const { 845 Node* const ctrl = ac->in(TypeFunc::Control); 846 Node* const mem = ac->in(TypeFunc::Memory); 847 Node* const src = ac->in(ArrayCopyNode::Src); 848 Node* const dst = ac->in(ArrayCopyNode::Dest); 849 Node* const size = ac->in(ArrayCopyNode::Length); 850 851 assert(size->bottom_type()->base() == Type_X, 852 "Should be of object size type (int for 32 bits, long for 64 bits)"); 853 854 // The native clone we are calling here expects the object size in words. 855 // Add header/offset size to payload size to get object size. 856 Node* const base_offset = phase->MakeConX(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong); 857 Node* const full_size = phase->transform_later(new AddXNode(size, base_offset)); 858 // HeapAccess<>::clone expects size in heap words. 859 // For 64-bits platforms, this is a no-operation. 860 // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2). 861 Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong))); 862 863 Node* const call = phase->make_leaf_call(ctrl, 864 mem, 865 clone_type(), 866 clone_addr, 867 clone_name, 868 TypeRawPtr::BOTTOM, 869 src, dst, full_size_in_heap_words XTOP); 870 phase->transform_later(call); 871 phase->igvn().replace_node(ac, call); 872 } 873 874 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const { 875 Node* ctrl = ac->in(TypeFunc::Control); 876 Node* mem = ac->in(TypeFunc::Memory); 877 Node* src = ac->in(ArrayCopyNode::Src); 878 Node* src_offset = ac->in(ArrayCopyNode::SrcPos); 879 Node* dest = ac->in(ArrayCopyNode::Dest); 880 Node* dest_offset = ac->in(ArrayCopyNode::DestPos); 881 Node* length = ac->in(ArrayCopyNode::Length); 882 883 Node* payload_src = phase->basic_plus_adr(src, src_offset); 884 Node* payload_dst = phase->basic_plus_adr(dest, dest_offset); 885 886 const char* copyfunc_name = "arraycopy"; 887 address copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true); 888 889 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 890 const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type(); 891 892 Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP); 893 phase->transform_later(call); 894 895 phase->igvn().replace_node(ac, call); 896 } 897 898 #undef XTOP 899 900 void BarrierSetC2::compute_liveness_at_stubs() const { 901 ResourceMark rm; 902 Compile* const C = Compile::current(); 903 Arena* const A = Thread::current()->resource_area(); 904 PhaseCFG* const cfg = C->cfg(); 905 PhaseRegAlloc* const regalloc = C->regalloc(); 906 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask)); 907 BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler(); 908 BarrierSetC2State* bs_state = barrier_set_state(); 909 Block_List worklist; 910 911 for (uint i = 0; i < cfg->number_of_blocks(); ++i) { 912 new ((void*)(live + i)) RegMask(); 913 worklist.push(cfg->get_block(i)); 914 } 915 916 while (worklist.size() > 0) { 917 const Block* const block = worklist.pop(); 918 RegMask& old_live = live[block->_pre_order]; 919 RegMask new_live; 920 921 // Initialize to union of successors 922 for (uint i = 0; i < block->_num_succs; i++) { 923 const uint succ_id = block->_succs[i]->_pre_order; 924 new_live.OR(live[succ_id]); 925 } 926 927 // Walk block backwards, computing liveness 928 for (int i = block->number_of_nodes() - 1; i >= 0; --i) { 929 const Node* const node = block->get_node(i); 930 931 // If this node tracks out-liveness, update it 932 if (!bs_state->needs_livein_data()) { 933 RegMask* const regs = bs_state->live(node); 934 if (regs != nullptr) { 935 regs->OR(new_live); 936 } 937 } 938 939 // Remove def bits 940 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node)); 941 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node)); 942 if (first != OptoReg::Bad) { 943 new_live.Remove(first); 944 } 945 if (second != OptoReg::Bad) { 946 new_live.Remove(second); 947 } 948 949 // Add use bits 950 for (uint j = 1; j < node->req(); ++j) { 951 const Node* const use = node->in(j); 952 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use)); 953 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use)); 954 if (first != OptoReg::Bad) { 955 new_live.Insert(first); 956 } 957 if (second != OptoReg::Bad) { 958 new_live.Insert(second); 959 } 960 } 961 962 // If this node tracks in-liveness, update it 963 if (bs_state->needs_livein_data()) { 964 RegMask* const regs = bs_state->live(node); 965 if (regs != nullptr) { 966 regs->OR(new_live); 967 } 968 } 969 } 970 971 // Now at block top, see if we have any changes 972 new_live.SUBTRACT(old_live); 973 if (new_live.is_NotEmpty()) { 974 // Liveness has refined, update and propagate to prior blocks 975 old_live.OR(new_live); 976 for (uint i = 1; i < block->num_preds(); ++i) { 977 Block* const pred = cfg->get_block_for_node(block->pred(i)); 978 worklist.push(pred); 979 } 980 } 981 } 982 }