1 /* 2 * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "code/vmreg.inline.hpp" 27 #include "gc/shared/barrierSet.hpp" 28 #include "gc/shared/tlab_globals.hpp" 29 #include "gc/shared/c2/barrierSetC2.hpp" 30 #include "opto/arraycopynode.hpp" 31 #include "opto/block.hpp" 32 #include "opto/convertnode.hpp" 33 #include "opto/graphKit.hpp" 34 #include "opto/idealKit.hpp" 35 #include "opto/macro.hpp" 36 #include "opto/narrowptrnode.hpp" 37 #include "opto/output.hpp" 38 #include "opto/regalloc.hpp" 39 #include "opto/runtime.hpp" 40 #include "utilities/macros.hpp" 41 #include CPU_HEADER(gc/shared/barrierSetAssembler) 42 43 // By default this is a no-op. 44 void BarrierSetC2::resolve_address(C2Access& access) const { } 45 46 void* C2ParseAccess::barrier_set_state() const { 47 return _kit->barrier_set_state(); 48 } 49 50 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); } 51 52 Node* C2ParseAccess::control() const { 53 return _ctl == nullptr ? _kit->control() : _ctl; 54 } 55 56 bool C2Access::needs_cpu_membar() const { 57 bool mismatched = (_decorators & C2_MISMATCHED) != 0; 58 bool is_unordered = (_decorators & MO_UNORDERED) != 0; 59 60 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 61 bool in_heap = (_decorators & IN_HEAP) != 0; 62 bool in_native = (_decorators & IN_NATIVE) != 0; 63 bool is_mixed = !in_heap && !in_native; 64 65 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 66 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 67 bool is_atomic = is_read && is_write; 68 69 if (is_atomic) { 70 // Atomics always need to be wrapped in CPU membars 71 return true; 72 } 73 74 if (anonymous) { 75 // We will need memory barriers unless we can determine a unique 76 // alias category for this reference. (Note: If for some reason 77 // the barriers get omitted and the unsafe reference begins to "pollute" 78 // the alias analysis of the rest of the graph, either Compile::can_alias 79 // or Compile::must_alias will throw a diagnostic assert.) 80 if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) { 81 return true; 82 } 83 } else { 84 assert(!is_mixed, "not unsafe"); 85 } 86 87 return false; 88 } 89 90 static BarrierSetC2State* barrier_set_state() { 91 return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state()); 92 } 93 94 RegMask& BarrierStubC2::live() const { 95 return *barrier_set_state()->live(_node); 96 } 97 98 BarrierStubC2::BarrierStubC2(const MachNode* node) 99 : _node(node), 100 _entry(), 101 _continuation(), 102 _preserve(live()) {} 103 104 Label* BarrierStubC2::entry() { 105 // The _entry will never be bound when in_scratch_emit_size() is true. 106 // However, we still need to return a label that is not bound now, but 107 // will eventually be bound. Any eventually bound label will do, as it 108 // will only act as a placeholder, so we return the _continuation label. 109 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry; 110 } 111 112 Label* BarrierStubC2::continuation() { 113 return &_continuation; 114 } 115 116 uint8_t BarrierStubC2::barrier_data() const { 117 return _node->barrier_data(); 118 } 119 120 void BarrierStubC2::preserve(Register r) { 121 const VMReg vm_reg = r->as_VMReg(); 122 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 123 _preserve.Insert(OptoReg::as_OptoReg(vm_reg)); 124 } 125 126 void BarrierStubC2::dont_preserve(Register r) { 127 VMReg vm_reg = r->as_VMReg(); 128 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 129 // Subtract the given register and all its sub-registers (e.g. {R11, R11_H} 130 // for r11 in aarch64). 131 do { 132 _preserve.Remove(OptoReg::as_OptoReg(vm_reg)); 133 vm_reg = vm_reg->next(); 134 } while (vm_reg->is_Register() && !vm_reg->is_concrete()); 135 } 136 137 const RegMask& BarrierStubC2::preserve_set() const { 138 return _preserve; 139 } 140 141 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { 142 DecoratorSet decorators = access.decorators(); 143 144 bool mismatched = (decorators & C2_MISMATCHED) != 0; 145 bool unaligned = (decorators & C2_UNALIGNED) != 0; 146 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 147 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 148 149 MemNode::MemOrd mo = access.mem_node_mo(); 150 151 Node* store; 152 BasicType bt = access.type(); 153 if (access.is_parse_access()) { 154 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 155 156 GraphKit* kit = parse_access.kit(); 157 if (bt == T_DOUBLE) { 158 Node* new_val = kit->dprecision_rounding(val.node()); 159 val.set_node(new_val); 160 } 161 162 store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt, 163 access.addr().type(), mo, requires_atomic_access, unaligned, 164 mismatched, unsafe, access.barrier_data()); 165 } else { 166 assert(access.is_opt_access(), "either parse or opt access"); 167 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 168 Node* ctl = opt_access.ctl(); 169 MergeMemNode* mm = opt_access.mem(); 170 PhaseGVN& gvn = opt_access.gvn(); 171 const TypePtr* adr_type = access.addr().type(); 172 int alias = gvn.C->get_alias_index(adr_type); 173 Node* mem = mm->memory_at(alias); 174 175 StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access); 176 if (unaligned) { 177 st->set_unaligned_access(); 178 } 179 if (mismatched) { 180 st->set_mismatched_access(); 181 } 182 st->set_barrier_data(access.barrier_data()); 183 store = gvn.transform(st); 184 if (store == st) { 185 mm->set_memory_at(alias, st); 186 } 187 } 188 access.set_raw_access(store); 189 190 return store; 191 } 192 193 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { 194 DecoratorSet decorators = access.decorators(); 195 196 Node* adr = access.addr().node(); 197 const TypePtr* adr_type = access.addr().type(); 198 199 bool mismatched = (decorators & C2_MISMATCHED) != 0; 200 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 201 bool unaligned = (decorators & C2_UNALIGNED) != 0; 202 bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0; 203 bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0; 204 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 205 bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0; 206 207 MemNode::MemOrd mo = access.mem_node_mo(); 208 LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest; 209 210 Node* load; 211 if (access.is_parse_access()) { 212 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 213 GraphKit* kit = parse_access.kit(); 214 Node* control = control_dependent ? parse_access.control() : nullptr; 215 216 if (immutable) { 217 Compile* C = Compile::current(); 218 Node* mem = kit->immutable_memory(); 219 load = LoadNode::make(kit->gvn(), control, mem, adr, 220 adr_type, val_type, access.type(), mo, dep, requires_atomic_access, 221 unaligned, mismatched, unsafe, access.barrier_data()); 222 load = kit->gvn().transform(load); 223 } else { 224 load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo, 225 dep, requires_atomic_access, unaligned, mismatched, unsafe, 226 access.barrier_data()); 227 } 228 } else { 229 assert(access.is_opt_access(), "either parse or opt access"); 230 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 231 Node* control = control_dependent ? opt_access.ctl() : nullptr; 232 MergeMemNode* mm = opt_access.mem(); 233 PhaseGVN& gvn = opt_access.gvn(); 234 Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type)); 235 load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep, 236 requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data()); 237 load = gvn.transform(load); 238 } 239 access.set_raw_access(load); 240 241 return load; 242 } 243 244 class C2AccessFence: public StackObj { 245 C2Access& _access; 246 Node* _leading_membar; 247 248 public: 249 C2AccessFence(C2Access& access) : 250 _access(access), _leading_membar(nullptr) { 251 GraphKit* kit = nullptr; 252 if (access.is_parse_access()) { 253 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 254 kit = parse_access.kit(); 255 } 256 DecoratorSet decorators = access.decorators(); 257 258 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 259 bool is_read = (decorators & C2_READ_ACCESS) != 0; 260 bool is_atomic = is_read && is_write; 261 262 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 263 bool is_release = (decorators & MO_RELEASE) != 0; 264 265 if (is_atomic) { 266 assert(kit != nullptr, "unsupported at optimization time"); 267 // Memory-model-wise, a LoadStore acts like a little synchronized 268 // block, so needs barriers on each side. These don't translate 269 // into actual barriers on most machines, but we still need rest of 270 // compiler to respect ordering. 271 if (is_release) { 272 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 273 } else if (is_volatile) { 274 if (support_IRIW_for_not_multiple_copy_atomic_cpu) { 275 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 276 } else { 277 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 278 } 279 } 280 } else if (is_write) { 281 // If reference is volatile, prevent following memory ops from 282 // floating down past the volatile write. Also prevents commoning 283 // another volatile read. 284 if (is_volatile || is_release) { 285 assert(kit != nullptr, "unsupported at optimization time"); 286 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 287 } 288 } else { 289 // Memory barrier to prevent normal and 'unsafe' accesses from 290 // bypassing each other. Happens after null checks, so the 291 // exception paths do not take memory state from the memory barrier, 292 // so there's no problems making a strong assert about mixing users 293 // of safe & unsafe memory. 294 if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) { 295 assert(kit != nullptr, "unsupported at optimization time"); 296 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 297 } 298 } 299 300 if (access.needs_cpu_membar()) { 301 assert(kit != nullptr, "unsupported at optimization time"); 302 kit->insert_mem_bar(Op_MemBarCPUOrder); 303 } 304 305 if (is_atomic) { 306 // 4984716: MemBars must be inserted before this 307 // memory node in order to avoid a false 308 // dependency which will confuse the scheduler. 309 access.set_memory(); 310 } 311 } 312 313 ~C2AccessFence() { 314 GraphKit* kit = nullptr; 315 if (_access.is_parse_access()) { 316 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access); 317 kit = parse_access.kit(); 318 } 319 DecoratorSet decorators = _access.decorators(); 320 321 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 322 bool is_read = (decorators & C2_READ_ACCESS) != 0; 323 bool is_atomic = is_read && is_write; 324 325 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 326 bool is_acquire = (decorators & MO_ACQUIRE) != 0; 327 328 // If reference is volatile, prevent following volatiles ops from 329 // floating up before the volatile access. 330 if (_access.needs_cpu_membar()) { 331 kit->insert_mem_bar(Op_MemBarCPUOrder); 332 } 333 334 if (is_atomic) { 335 assert(kit != nullptr, "unsupported at optimization time"); 336 if (is_acquire || is_volatile) { 337 Node* n = _access.raw_access(); 338 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 339 if (_leading_membar != nullptr) { 340 MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 341 } 342 } 343 } else if (is_write) { 344 // If not multiple copy atomic, we do the MemBarVolatile before the load. 345 if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) { 346 assert(kit != nullptr, "unsupported at optimization time"); 347 Node* n = _access.raw_access(); 348 Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar 349 if (_leading_membar != nullptr) { 350 MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 351 } 352 } 353 } else { 354 if (is_volatile || is_acquire) { 355 assert(kit != nullptr, "unsupported at optimization time"); 356 Node* n = _access.raw_access(); 357 assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected"); 358 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 359 mb->as_MemBar()->set_trailing_load(); 360 } 361 } 362 } 363 }; 364 365 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const { 366 C2AccessFence fence(access); 367 resolve_address(access); 368 return store_at_resolved(access, val); 369 } 370 371 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const { 372 C2AccessFence fence(access); 373 resolve_address(access); 374 return load_at_resolved(access, val_type); 375 } 376 377 MemNode::MemOrd C2Access::mem_node_mo() const { 378 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 379 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 380 if ((_decorators & MO_SEQ_CST) != 0) { 381 if (is_write && is_read) { 382 // For atomic operations 383 return MemNode::seqcst; 384 } else if (is_write) { 385 return MemNode::release; 386 } else { 387 assert(is_read, "what else?"); 388 return MemNode::acquire; 389 } 390 } else if ((_decorators & MO_RELEASE) != 0) { 391 return MemNode::release; 392 } else if ((_decorators & MO_ACQUIRE) != 0) { 393 return MemNode::acquire; 394 } else if (is_write) { 395 // Volatile fields need releasing stores. 396 // Non-volatile fields also need releasing stores if they hold an 397 // object reference, because the object reference might point to 398 // a freshly created object. 399 // Conservatively release stores of object references. 400 return StoreNode::release_if_reference(_type); 401 } else { 402 return MemNode::unordered; 403 } 404 } 405 406 void C2Access::fixup_decorators() { 407 bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0; 408 bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo; 409 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 410 411 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 412 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 413 414 if (AlwaysAtomicAccesses && is_unordered) { 415 _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits 416 _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess 417 } 418 419 _decorators = AccessInternal::decorator_fixup(_decorators, _type); 420 421 if (is_read && !is_write && anonymous) { 422 // To be valid, unsafe loads may depend on other conditions than 423 // the one that guards them: pin the Load node 424 _decorators |= C2_CONTROL_DEPENDENT_LOAD; 425 _decorators |= C2_UNKNOWN_CONTROL_LOAD; 426 const TypePtr* adr_type = _addr.type(); 427 Node* adr = _addr.node(); 428 if (!needs_cpu_membar() && adr_type->isa_instptr()) { 429 assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null"); 430 intptr_t offset = Type::OffsetBot; 431 AddPNode::Ideal_base_and_offset(adr, &gvn(), offset); 432 if (offset >= 0) { 433 int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper()); 434 if (offset < s) { 435 // Guaranteed to be a valid access, no need to pin it 436 _decorators ^= C2_CONTROL_DEPENDENT_LOAD; 437 _decorators ^= C2_UNKNOWN_CONTROL_LOAD; 438 } 439 } 440 } 441 } 442 } 443 444 //--------------------------- atomic operations--------------------------------- 445 446 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const { 447 // SCMemProjNodes represent the memory state of a LoadStore. Their 448 // main role is to prevent LoadStore nodes from being optimized away 449 // when their results aren't used. 450 assert(access.is_parse_access(), "entry not supported at optimization time"); 451 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 452 GraphKit* kit = parse_access.kit(); 453 Node* load_store = access.raw_access(); 454 assert(load_store != nullptr, "must pin atomic op"); 455 Node* proj = kit->gvn().transform(new SCMemProjNode(load_store)); 456 kit->set_memory(proj, access.alias_idx()); 457 } 458 459 void C2AtomicParseAccess::set_memory() { 460 Node *mem = _kit->memory(_alias_idx); 461 _memory = mem; 462 } 463 464 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 465 Node* new_val, const Type* value_type) const { 466 GraphKit* kit = access.kit(); 467 MemNode::MemOrd mo = access.mem_node_mo(); 468 Node* mem = access.memory(); 469 470 Node* adr = access.addr().node(); 471 const TypePtr* adr_type = access.addr().type(); 472 473 Node* load_store = nullptr; 474 475 if (access.is_oop()) { 476 #ifdef _LP64 477 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 478 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 479 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 480 load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo); 481 } else 482 #endif 483 { 484 load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo); 485 } 486 } else { 487 switch (access.type()) { 488 case T_BYTE: { 489 load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 490 break; 491 } 492 case T_SHORT: { 493 load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 494 break; 495 } 496 case T_INT: { 497 load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 498 break; 499 } 500 case T_LONG: { 501 load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 502 break; 503 } 504 default: 505 ShouldNotReachHere(); 506 } 507 } 508 509 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 510 load_store = kit->gvn().transform(load_store); 511 512 access.set_raw_access(load_store); 513 pin_atomic_op(access); 514 515 #ifdef _LP64 516 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 517 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 518 } 519 #endif 520 521 return load_store; 522 } 523 524 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 525 Node* new_val, const Type* value_type) const { 526 GraphKit* kit = access.kit(); 527 DecoratorSet decorators = access.decorators(); 528 MemNode::MemOrd mo = access.mem_node_mo(); 529 Node* mem = access.memory(); 530 bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0; 531 Node* load_store = nullptr; 532 Node* adr = access.addr().node(); 533 534 if (access.is_oop()) { 535 #ifdef _LP64 536 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 537 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 538 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 539 if (is_weak_cas) { 540 load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 541 } else { 542 load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 543 } 544 } else 545 #endif 546 { 547 if (is_weak_cas) { 548 load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 549 } else { 550 load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 551 } 552 } 553 } else { 554 switch(access.type()) { 555 case T_BYTE: { 556 if (is_weak_cas) { 557 load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 558 } else { 559 load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 560 } 561 break; 562 } 563 case T_SHORT: { 564 if (is_weak_cas) { 565 load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 566 } else { 567 load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 568 } 569 break; 570 } 571 case T_INT: { 572 if (is_weak_cas) { 573 load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 574 } else { 575 load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 576 } 577 break; 578 } 579 case T_LONG: { 580 if (is_weak_cas) { 581 load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 582 } else { 583 load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 584 } 585 break; 586 } 587 default: 588 ShouldNotReachHere(); 589 } 590 } 591 592 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 593 load_store = kit->gvn().transform(load_store); 594 595 access.set_raw_access(load_store); 596 pin_atomic_op(access); 597 598 return load_store; 599 } 600 601 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 602 GraphKit* kit = access.kit(); 603 Node* mem = access.memory(); 604 Node* adr = access.addr().node(); 605 const TypePtr* adr_type = access.addr().type(); 606 Node* load_store = nullptr; 607 608 if (access.is_oop()) { 609 #ifdef _LP64 610 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 611 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 612 load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop())); 613 } else 614 #endif 615 { 616 load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()); 617 } 618 } else { 619 switch (access.type()) { 620 case T_BYTE: 621 load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type); 622 break; 623 case T_SHORT: 624 load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type); 625 break; 626 case T_INT: 627 load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type); 628 break; 629 case T_LONG: 630 load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type); 631 break; 632 default: 633 ShouldNotReachHere(); 634 } 635 } 636 637 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 638 load_store = kit->gvn().transform(load_store); 639 640 access.set_raw_access(load_store); 641 pin_atomic_op(access); 642 643 #ifdef _LP64 644 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 645 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 646 } 647 #endif 648 649 return load_store; 650 } 651 652 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 653 Node* load_store = nullptr; 654 GraphKit* kit = access.kit(); 655 Node* adr = access.addr().node(); 656 const TypePtr* adr_type = access.addr().type(); 657 Node* mem = access.memory(); 658 659 switch(access.type()) { 660 case T_BYTE: 661 load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type); 662 break; 663 case T_SHORT: 664 load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type); 665 break; 666 case T_INT: 667 load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type); 668 break; 669 case T_LONG: 670 load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type); 671 break; 672 default: 673 ShouldNotReachHere(); 674 } 675 676 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 677 load_store = kit->gvn().transform(load_store); 678 679 access.set_raw_access(load_store); 680 pin_atomic_op(access); 681 682 return load_store; 683 } 684 685 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val, 686 Node* new_val, const Type* value_type) const { 687 C2AccessFence fence(access); 688 resolve_address(access); 689 return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); 690 } 691 692 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val, 693 Node* new_val, const Type* value_type) const { 694 C2AccessFence fence(access); 695 resolve_address(access); 696 return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); 697 } 698 699 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 700 C2AccessFence fence(access); 701 resolve_address(access); 702 return atomic_xchg_at_resolved(access, new_val, value_type); 703 } 704 705 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 706 C2AccessFence fence(access); 707 resolve_address(access); 708 return atomic_add_at_resolved(access, new_val, value_type); 709 } 710 711 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) { 712 // Exclude the header but include array length to copy by 8 bytes words. 713 // Can't use base_offset_in_bytes(bt) since basic type is unknown. 714 int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() : 715 instanceOopDesc::base_offset_in_bytes(); 716 // base_off: 717 // 8 - 32-bit VM 718 // 12 - 64-bit VM, compressed klass 719 // 16 - 64-bit VM, normal klass 720 if (base_off % BytesPerLong != 0) { 721 assert(UseCompressedClassPointers, ""); 722 if (is_array) { 723 // Exclude length to copy by 8 bytes words. 724 base_off += sizeof(int); 725 } else { 726 // Include klass to copy by 8 bytes words. 727 base_off = instanceOopDesc::klass_offset_in_bytes(); 728 } 729 assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment"); 730 } 731 return base_off; 732 } 733 734 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const { 735 int base_off = arraycopy_payload_base_offset(is_array); 736 Node* payload_size = size; 737 Node* offset = kit->MakeConX(base_off); 738 payload_size = kit->gvn().transform(new SubXNode(payload_size, offset)); 739 if (is_array) { 740 // Ensure the array payload size is rounded up to the next BytesPerLong 741 // multiple when converting to double-words. This is necessary because array 742 // size does not include object alignment padding, so it might not be a 743 // multiple of BytesPerLong for sub-long element types. 744 payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1))); 745 } 746 payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong))); 747 ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false); 748 if (is_array) { 749 ac->set_clone_array(); 750 } else { 751 ac->set_clone_inst(); 752 } 753 Node* n = kit->gvn().transform(ac); 754 if (n == ac) { 755 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 756 ac->set_adr_type(TypeRawPtr::BOTTOM); 757 kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type); 758 } else { 759 kit->set_all_memory(n); 760 } 761 } 762 763 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes, 764 Node*& i_o, Node*& needgc_ctrl, 765 Node*& fast_oop_ctrl, Node*& fast_oop_rawmem, 766 intx prefetch_lines) const { 767 assert(UseTLAB, "Only for TLAB enabled allocations"); 768 769 Node* thread = macro->transform_later(new ThreadLocalNode()); 770 Node* tlab_top_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_top_offset())); 771 Node* tlab_end_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_end_offset())); 772 773 // Load TLAB end. 774 // 775 // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around 776 // a bug where these values were being moved across 777 // a safepoint. These are not oops, so they cannot be include in the oop 778 // map, but they can be changed by a GC. The proper way to fix this would 779 // be to set the raw memory state when generating a SafepointNode. However 780 // this will require extensive changes to the loop optimization in order to 781 // prevent a degradation of the optimization. 782 // See comment in memnode.hpp, around line 227 in class LoadPNode. 783 Node* tlab_end = macro->make_load(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); 784 785 // Load the TLAB top. 786 Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered); 787 macro->transform_later(old_tlab_top); 788 789 // Add to heap top to get a new TLAB top 790 Node* new_tlab_top = new AddPNode(macro->top(), old_tlab_top, size_in_bytes); 791 macro->transform_later(new_tlab_top); 792 793 // Check against TLAB end 794 Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end); 795 macro->transform_later(tlab_full); 796 797 Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge); 798 macro->transform_later(needgc_bol); 799 IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN); 800 macro->transform_later(needgc_iff); 801 802 // Plug the failing-heap-space-need-gc test into the slow-path region 803 Node* needgc_true = new IfTrueNode(needgc_iff); 804 macro->transform_later(needgc_true); 805 needgc_ctrl = needgc_true; 806 807 // No need for a GC. 808 Node* needgc_false = new IfFalseNode(needgc_iff); 809 macro->transform_later(needgc_false); 810 811 // Fast path: 812 i_o = macro->prefetch_allocation(i_o, needgc_false, mem, 813 old_tlab_top, new_tlab_top, prefetch_lines); 814 815 // Store the modified TLAB top back down. 816 Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr, 817 TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered); 818 macro->transform_later(store_tlab_top); 819 820 fast_oop_ctrl = needgc_false; 821 fast_oop_rawmem = store_tlab_top; 822 return old_tlab_top; 823 } 824 825 static const TypeFunc* clone_type() { 826 // Create input type (domain) 827 int argcnt = NOT_LP64(3) LP64_ONLY(4); 828 const Type** const domain_fields = TypeTuple::fields(argcnt); 829 int argp = TypeFunc::Parms; 830 domain_fields[argp++] = TypeInstPtr::NOTNULL; // src 831 domain_fields[argp++] = TypeInstPtr::NOTNULL; // dst 832 domain_fields[argp++] = TypeX_X; // size lower 833 LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper 834 assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); 835 const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields); 836 837 // Create result type (range) 838 const Type** const range_fields = TypeTuple::fields(0); 839 const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields); 840 841 return TypeFunc::make(domain, range); 842 } 843 844 #define XTOP LP64_ONLY(COMMA phase->top()) 845 846 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac, 847 address clone_addr, const char* clone_name) const { 848 Node* const ctrl = ac->in(TypeFunc::Control); 849 Node* const mem = ac->in(TypeFunc::Memory); 850 Node* const src = ac->in(ArrayCopyNode::Src); 851 Node* const dst = ac->in(ArrayCopyNode::Dest); 852 Node* const size = ac->in(ArrayCopyNode::Length); 853 854 assert(size->bottom_type()->base() == Type_X, 855 "Should be of object size type (int for 32 bits, long for 64 bits)"); 856 857 // The native clone we are calling here expects the object size in words. 858 // Add header/offset size to payload size to get object size. 859 Node* const base_offset = phase->MakeConX(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong); 860 Node* const full_size = phase->transform_later(new AddXNode(size, base_offset)); 861 // HeapAccess<>::clone expects size in heap words. 862 // For 64-bits platforms, this is a no-operation. 863 // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2). 864 Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong))); 865 866 Node* const call = phase->make_leaf_call(ctrl, 867 mem, 868 clone_type(), 869 clone_addr, 870 clone_name, 871 TypeRawPtr::BOTTOM, 872 src, dst, full_size_in_heap_words XTOP); 873 phase->transform_later(call); 874 phase->replace_node(ac, call); 875 } 876 877 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const { 878 Node* ctrl = ac->in(TypeFunc::Control); 879 Node* mem = ac->in(TypeFunc::Memory); 880 Node* src = ac->in(ArrayCopyNode::Src); 881 Node* src_offset = ac->in(ArrayCopyNode::SrcPos); 882 Node* dest = ac->in(ArrayCopyNode::Dest); 883 Node* dest_offset = ac->in(ArrayCopyNode::DestPos); 884 Node* length = ac->in(ArrayCopyNode::Length); 885 886 Node* payload_src = phase->basic_plus_adr(src, src_offset); 887 Node* payload_dst = phase->basic_plus_adr(dest, dest_offset); 888 889 const char* copyfunc_name = "arraycopy"; 890 address copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true); 891 892 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 893 const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type(); 894 895 Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP); 896 phase->transform_later(call); 897 898 phase->replace_node(ac, call); 899 } 900 901 #undef XTOP 902 903 void BarrierSetC2::compute_liveness_at_stubs() const { 904 ResourceMark rm; 905 Compile* const C = Compile::current(); 906 Arena* const A = Thread::current()->resource_area(); 907 PhaseCFG* const cfg = C->cfg(); 908 PhaseRegAlloc* const regalloc = C->regalloc(); 909 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask)); 910 BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler(); 911 BarrierSetC2State* bs_state = barrier_set_state(); 912 Block_List worklist; 913 914 for (uint i = 0; i < cfg->number_of_blocks(); ++i) { 915 new ((void*)(live + i)) RegMask(); 916 worklist.push(cfg->get_block(i)); 917 } 918 919 while (worklist.size() > 0) { 920 const Block* const block = worklist.pop(); 921 RegMask& old_live = live[block->_pre_order]; 922 RegMask new_live; 923 924 // Initialize to union of successors 925 for (uint i = 0; i < block->_num_succs; i++) { 926 const uint succ_id = block->_succs[i]->_pre_order; 927 new_live.OR(live[succ_id]); 928 } 929 930 // Walk block backwards, computing liveness 931 for (int i = block->number_of_nodes() - 1; i >= 0; --i) { 932 const Node* const node = block->get_node(i); 933 934 // If this node tracks out-liveness, update it 935 if (!bs_state->needs_livein_data()) { 936 RegMask* const regs = bs_state->live(node); 937 if (regs != nullptr) { 938 regs->OR(new_live); 939 } 940 } 941 942 // Remove def bits 943 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node)); 944 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node)); 945 if (first != OptoReg::Bad) { 946 new_live.Remove(first); 947 } 948 if (second != OptoReg::Bad) { 949 new_live.Remove(second); 950 } 951 952 // Add use bits 953 for (uint j = 1; j < node->req(); ++j) { 954 const Node* const use = node->in(j); 955 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use)); 956 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use)); 957 if (first != OptoReg::Bad) { 958 new_live.Insert(first); 959 } 960 if (second != OptoReg::Bad) { 961 new_live.Insert(second); 962 } 963 } 964 965 // If this node tracks in-liveness, update it 966 if (bs_state->needs_livein_data()) { 967 RegMask* const regs = bs_state->live(node); 968 if (regs != nullptr) { 969 regs->OR(new_live); 970 } 971 } 972 } 973 974 // Now at block top, see if we have any changes 975 new_live.SUBTRACT(old_live); 976 if (new_live.is_NotEmpty()) { 977 // Liveness has refined, update and propagate to prior blocks 978 old_live.OR(new_live); 979 for (uint i = 1; i < block->num_preds(); ++i) { 980 Block* const pred = cfg->get_block_for_node(block->pred(i)); 981 worklist.push(pred); 982 } 983 } 984 } 985 }