1 /* 2 * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "code/vmreg.inline.hpp" 27 #include "gc/shared/barrierSet.hpp" 28 #include "gc/shared/tlab_globals.hpp" 29 #include "gc/shared/c2/barrierSetC2.hpp" 30 #include "opto/arraycopynode.hpp" 31 #include "opto/block.hpp" 32 #include "opto/convertnode.hpp" 33 #include "opto/graphKit.hpp" 34 #include "opto/idealKit.hpp" 35 #include "opto/macro.hpp" 36 #include "opto/narrowptrnode.hpp" 37 #include "opto/output.hpp" 38 #include "opto/regalloc.hpp" 39 #include "opto/runtime.hpp" 40 #include "utilities/macros.hpp" 41 #include CPU_HEADER(gc/shared/barrierSetAssembler) 42 43 // By default this is a no-op. 44 void BarrierSetC2::resolve_address(C2Access& access) const { } 45 46 void* C2ParseAccess::barrier_set_state() const { 47 return _kit->barrier_set_state(); 48 } 49 50 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); } 51 52 bool C2Access::needs_cpu_membar() const { 53 bool mismatched = (_decorators & C2_MISMATCHED) != 0; 54 bool is_unordered = (_decorators & MO_UNORDERED) != 0; 55 56 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 57 bool in_heap = (_decorators & IN_HEAP) != 0; 58 bool in_native = (_decorators & IN_NATIVE) != 0; 59 bool is_mixed = !in_heap && !in_native; 60 61 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 62 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 63 bool is_atomic = is_read && is_write; 64 65 if (is_atomic) { 66 // Atomics always need to be wrapped in CPU membars 67 return true; 68 } 69 70 if (anonymous) { 71 // We will need memory barriers unless we can determine a unique 72 // alias category for this reference. (Note: If for some reason 73 // the barriers get omitted and the unsafe reference begins to "pollute" 74 // the alias analysis of the rest of the graph, either Compile::can_alias 75 // or Compile::must_alias will throw a diagnostic assert.) 76 if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) { 77 return true; 78 } 79 } else { 80 assert(!is_mixed, "not unsafe"); 81 } 82 83 return false; 84 } 85 86 static BarrierSetC2State* barrier_set_state() { 87 return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state()); 88 } 89 90 RegMask& BarrierStubC2::live() const { 91 return *barrier_set_state()->live(_node); 92 } 93 94 BarrierStubC2::BarrierStubC2(const MachNode* node) 95 : _node(node), 96 _entry(), 97 _continuation(), 98 _preserve(live()) {} 99 100 Label* BarrierStubC2::entry() { 101 // The _entry will never be bound when in_scratch_emit_size() is true. 102 // However, we still need to return a label that is not bound now, but 103 // will eventually be bound. Any eventually bound label will do, as it 104 // will only act as a placeholder, so we return the _continuation label. 105 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry; 106 } 107 108 Label* BarrierStubC2::continuation() { 109 return &_continuation; 110 } 111 112 void BarrierStubC2::preserve(Register r) { 113 const VMReg vm_reg = r->as_VMReg(); 114 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 115 _preserve.Insert(OptoReg::as_OptoReg(vm_reg)); 116 } 117 118 void BarrierStubC2::dont_preserve(Register r) { 119 VMReg vm_reg = r->as_VMReg(); 120 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 121 // Subtract the given register and all its sub-registers (e.g. {R11, R11_H} 122 // for r11 in aarch64). 123 do { 124 _preserve.Remove(OptoReg::as_OptoReg(vm_reg)); 125 vm_reg = vm_reg->next(); 126 } while (vm_reg->is_Register() && !vm_reg->is_concrete()); 127 } 128 129 const RegMask& BarrierStubC2::preserve_set() const { 130 return _preserve; 131 } 132 133 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { 134 DecoratorSet decorators = access.decorators(); 135 136 bool mismatched = (decorators & C2_MISMATCHED) != 0; 137 bool unaligned = (decorators & C2_UNALIGNED) != 0; 138 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 139 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 140 141 MemNode::MemOrd mo = access.mem_node_mo(); 142 143 Node* store; 144 BasicType bt = access.type(); 145 if (access.is_parse_access()) { 146 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 147 148 GraphKit* kit = parse_access.kit(); 149 if (bt == T_DOUBLE) { 150 Node* new_val = kit->dprecision_rounding(val.node()); 151 val.set_node(new_val); 152 } 153 154 store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt, 155 access.addr().type(), mo, requires_atomic_access, unaligned, 156 mismatched, unsafe, access.barrier_data()); 157 } else { 158 assert(access.is_opt_access(), "either parse or opt access"); 159 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 160 Node* ctl = opt_access.ctl(); 161 MergeMemNode* mm = opt_access.mem(); 162 PhaseGVN& gvn = opt_access.gvn(); 163 const TypePtr* adr_type = access.addr().type(); 164 int alias = gvn.C->get_alias_index(adr_type); 165 Node* mem = mm->memory_at(alias); 166 167 StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access); 168 if (unaligned) { 169 st->set_unaligned_access(); 170 } 171 if (mismatched) { 172 st->set_mismatched_access(); 173 } 174 st->set_barrier_data(access.barrier_data()); 175 store = gvn.transform(st); 176 if (store == st) { 177 mm->set_memory_at(alias, st); 178 } 179 } 180 access.set_raw_access(store); 181 182 return store; 183 } 184 185 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { 186 DecoratorSet decorators = access.decorators(); 187 188 Node* adr = access.addr().node(); 189 const TypePtr* adr_type = access.addr().type(); 190 191 bool mismatched = (decorators & C2_MISMATCHED) != 0; 192 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 193 bool unaligned = (decorators & C2_UNALIGNED) != 0; 194 bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0; 195 bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0; 196 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 197 bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0; 198 199 MemNode::MemOrd mo = access.mem_node_mo(); 200 LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest; 201 202 Node* load; 203 if (access.is_parse_access()) { 204 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 205 GraphKit* kit = parse_access.kit(); 206 Node* control = control_dependent ? kit->control() : nullptr; 207 208 if (immutable) { 209 Compile* C = Compile::current(); 210 Node* mem = kit->immutable_memory(); 211 load = LoadNode::make(kit->gvn(), control, mem, adr, 212 adr_type, val_type, access.type(), mo, dep, requires_atomic_access, 213 unaligned, mismatched, unsafe, access.barrier_data()); 214 load = kit->gvn().transform(load); 215 } else { 216 load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo, 217 dep, requires_atomic_access, unaligned, mismatched, unsafe, 218 access.barrier_data()); 219 } 220 } else { 221 assert(access.is_opt_access(), "either parse or opt access"); 222 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 223 Node* control = control_dependent ? opt_access.ctl() : nullptr; 224 MergeMemNode* mm = opt_access.mem(); 225 PhaseGVN& gvn = opt_access.gvn(); 226 Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type)); 227 load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep, 228 requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data()); 229 load = gvn.transform(load); 230 } 231 access.set_raw_access(load); 232 233 return load; 234 } 235 236 class C2AccessFence: public StackObj { 237 C2Access& _access; 238 Node* _leading_membar; 239 240 public: 241 C2AccessFence(C2Access& access) : 242 _access(access), _leading_membar(nullptr) { 243 GraphKit* kit = nullptr; 244 if (access.is_parse_access()) { 245 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 246 kit = parse_access.kit(); 247 } 248 DecoratorSet decorators = access.decorators(); 249 250 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 251 bool is_read = (decorators & C2_READ_ACCESS) != 0; 252 bool is_atomic = is_read && is_write; 253 254 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 255 bool is_release = (decorators & MO_RELEASE) != 0; 256 257 if (is_atomic) { 258 assert(kit != nullptr, "unsupported at optimization time"); 259 // Memory-model-wise, a LoadStore acts like a little synchronized 260 // block, so needs barriers on each side. These don't translate 261 // into actual barriers on most machines, but we still need rest of 262 // compiler to respect ordering. 263 if (is_release) { 264 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 265 } else if (is_volatile) { 266 if (support_IRIW_for_not_multiple_copy_atomic_cpu) { 267 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 268 } else { 269 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 270 } 271 } 272 } else if (is_write) { 273 // If reference is volatile, prevent following memory ops from 274 // floating down past the volatile write. Also prevents commoning 275 // another volatile read. 276 if (is_volatile || is_release) { 277 assert(kit != nullptr, "unsupported at optimization time"); 278 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 279 } 280 } else { 281 // Memory barrier to prevent normal and 'unsafe' accesses from 282 // bypassing each other. Happens after null checks, so the 283 // exception paths do not take memory state from the memory barrier, 284 // so there's no problems making a strong assert about mixing users 285 // of safe & unsafe memory. 286 if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) { 287 assert(kit != nullptr, "unsupported at optimization time"); 288 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 289 } 290 } 291 292 if (access.needs_cpu_membar()) { 293 assert(kit != nullptr, "unsupported at optimization time"); 294 kit->insert_mem_bar(Op_MemBarCPUOrder); 295 } 296 297 if (is_atomic) { 298 // 4984716: MemBars must be inserted before this 299 // memory node in order to avoid a false 300 // dependency which will confuse the scheduler. 301 access.set_memory(); 302 } 303 } 304 305 ~C2AccessFence() { 306 GraphKit* kit = nullptr; 307 if (_access.is_parse_access()) { 308 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access); 309 kit = parse_access.kit(); 310 } 311 DecoratorSet decorators = _access.decorators(); 312 313 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 314 bool is_read = (decorators & C2_READ_ACCESS) != 0; 315 bool is_atomic = is_read && is_write; 316 317 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 318 bool is_acquire = (decorators & MO_ACQUIRE) != 0; 319 320 // If reference is volatile, prevent following volatiles ops from 321 // floating up before the volatile access. 322 if (_access.needs_cpu_membar()) { 323 kit->insert_mem_bar(Op_MemBarCPUOrder); 324 } 325 326 if (is_atomic) { 327 assert(kit != nullptr, "unsupported at optimization time"); 328 if (is_acquire || is_volatile) { 329 Node* n = _access.raw_access(); 330 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 331 if (_leading_membar != nullptr) { 332 MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 333 } 334 } 335 } else if (is_write) { 336 // If not multiple copy atomic, we do the MemBarVolatile before the load. 337 if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) { 338 assert(kit != nullptr, "unsupported at optimization time"); 339 Node* n = _access.raw_access(); 340 Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar 341 if (_leading_membar != nullptr) { 342 MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 343 } 344 } 345 } else { 346 if (is_volatile || is_acquire) { 347 assert(kit != nullptr, "unsupported at optimization time"); 348 Node* n = _access.raw_access(); 349 assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected"); 350 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 351 mb->as_MemBar()->set_trailing_load(); 352 } 353 } 354 } 355 }; 356 357 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const { 358 C2AccessFence fence(access); 359 resolve_address(access); 360 return store_at_resolved(access, val); 361 } 362 363 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const { 364 C2AccessFence fence(access); 365 resolve_address(access); 366 return load_at_resolved(access, val_type); 367 } 368 369 MemNode::MemOrd C2Access::mem_node_mo() const { 370 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 371 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 372 if ((_decorators & MO_SEQ_CST) != 0) { 373 if (is_write && is_read) { 374 // For atomic operations 375 return MemNode::seqcst; 376 } else if (is_write) { 377 return MemNode::release; 378 } else { 379 assert(is_read, "what else?"); 380 return MemNode::acquire; 381 } 382 } else if ((_decorators & MO_RELEASE) != 0) { 383 return MemNode::release; 384 } else if ((_decorators & MO_ACQUIRE) != 0) { 385 return MemNode::acquire; 386 } else if (is_write) { 387 // Volatile fields need releasing stores. 388 // Non-volatile fields also need releasing stores if they hold an 389 // object reference, because the object reference might point to 390 // a freshly created object. 391 // Conservatively release stores of object references. 392 return StoreNode::release_if_reference(_type); 393 } else { 394 return MemNode::unordered; 395 } 396 } 397 398 void C2Access::fixup_decorators() { 399 bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0; 400 bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo; 401 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 402 403 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 404 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 405 406 if (AlwaysAtomicAccesses && is_unordered) { 407 _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits 408 _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess 409 } 410 411 _decorators = AccessInternal::decorator_fixup(_decorators, _type); 412 413 if (is_read && !is_write && anonymous) { 414 // To be valid, unsafe loads may depend on other conditions than 415 // the one that guards them: pin the Load node 416 _decorators |= C2_CONTROL_DEPENDENT_LOAD; 417 _decorators |= C2_UNKNOWN_CONTROL_LOAD; 418 const TypePtr* adr_type = _addr.type(); 419 Node* adr = _addr.node(); 420 if (!needs_cpu_membar() && adr_type->isa_instptr()) { 421 assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null"); 422 intptr_t offset = Type::OffsetBot; 423 AddPNode::Ideal_base_and_offset(adr, &gvn(), offset); 424 if (offset >= 0) { 425 int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper()); 426 if (offset < s) { 427 // Guaranteed to be a valid access, no need to pin it 428 _decorators ^= C2_CONTROL_DEPENDENT_LOAD; 429 _decorators ^= C2_UNKNOWN_CONTROL_LOAD; 430 } 431 } 432 } 433 } 434 } 435 436 //--------------------------- atomic operations--------------------------------- 437 438 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const { 439 // SCMemProjNodes represent the memory state of a LoadStore. Their 440 // main role is to prevent LoadStore nodes from being optimized away 441 // when their results aren't used. 442 assert(access.is_parse_access(), "entry not supported at optimization time"); 443 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 444 GraphKit* kit = parse_access.kit(); 445 Node* load_store = access.raw_access(); 446 assert(load_store != nullptr, "must pin atomic op"); 447 Node* proj = kit->gvn().transform(new SCMemProjNode(load_store)); 448 kit->set_memory(proj, access.alias_idx()); 449 } 450 451 void C2AtomicParseAccess::set_memory() { 452 Node *mem = _kit->memory(_alias_idx); 453 _memory = mem; 454 } 455 456 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 457 Node* new_val, const Type* value_type) const { 458 GraphKit* kit = access.kit(); 459 MemNode::MemOrd mo = access.mem_node_mo(); 460 Node* mem = access.memory(); 461 462 Node* adr = access.addr().node(); 463 const TypePtr* adr_type = access.addr().type(); 464 465 Node* load_store = nullptr; 466 467 if (access.is_oop()) { 468 #ifdef _LP64 469 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 470 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 471 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 472 load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo); 473 } else 474 #endif 475 { 476 load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo); 477 } 478 } else { 479 switch (access.type()) { 480 case T_BYTE: { 481 load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 482 break; 483 } 484 case T_SHORT: { 485 load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 486 break; 487 } 488 case T_INT: { 489 load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 490 break; 491 } 492 case T_LONG: { 493 load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 494 break; 495 } 496 default: 497 ShouldNotReachHere(); 498 } 499 } 500 501 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 502 load_store = kit->gvn().transform(load_store); 503 504 access.set_raw_access(load_store); 505 pin_atomic_op(access); 506 507 #ifdef _LP64 508 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 509 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 510 } 511 #endif 512 513 return load_store; 514 } 515 516 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 517 Node* new_val, const Type* value_type) const { 518 GraphKit* kit = access.kit(); 519 DecoratorSet decorators = access.decorators(); 520 MemNode::MemOrd mo = access.mem_node_mo(); 521 Node* mem = access.memory(); 522 bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0; 523 Node* load_store = nullptr; 524 Node* adr = access.addr().node(); 525 526 if (access.is_oop()) { 527 #ifdef _LP64 528 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 529 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 530 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 531 if (is_weak_cas) { 532 load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 533 } else { 534 load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 535 } 536 } else 537 #endif 538 { 539 if (is_weak_cas) { 540 load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 541 } else { 542 load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 543 } 544 } 545 } else { 546 switch(access.type()) { 547 case T_BYTE: { 548 if (is_weak_cas) { 549 load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 550 } else { 551 load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 552 } 553 break; 554 } 555 case T_SHORT: { 556 if (is_weak_cas) { 557 load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 558 } else { 559 load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 560 } 561 break; 562 } 563 case T_INT: { 564 if (is_weak_cas) { 565 load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 566 } else { 567 load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 568 } 569 break; 570 } 571 case T_LONG: { 572 if (is_weak_cas) { 573 load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 574 } else { 575 load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 576 } 577 break; 578 } 579 default: 580 ShouldNotReachHere(); 581 } 582 } 583 584 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 585 load_store = kit->gvn().transform(load_store); 586 587 access.set_raw_access(load_store); 588 pin_atomic_op(access); 589 590 return load_store; 591 } 592 593 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 594 GraphKit* kit = access.kit(); 595 Node* mem = access.memory(); 596 Node* adr = access.addr().node(); 597 const TypePtr* adr_type = access.addr().type(); 598 Node* load_store = nullptr; 599 600 if (access.is_oop()) { 601 #ifdef _LP64 602 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 603 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 604 load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop())); 605 } else 606 #endif 607 { 608 load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()); 609 } 610 } else { 611 switch (access.type()) { 612 case T_BYTE: 613 load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type); 614 break; 615 case T_SHORT: 616 load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type); 617 break; 618 case T_INT: 619 load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type); 620 break; 621 case T_LONG: 622 load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type); 623 break; 624 default: 625 ShouldNotReachHere(); 626 } 627 } 628 629 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 630 load_store = kit->gvn().transform(load_store); 631 632 access.set_raw_access(load_store); 633 pin_atomic_op(access); 634 635 #ifdef _LP64 636 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 637 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 638 } 639 #endif 640 641 return load_store; 642 } 643 644 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 645 Node* load_store = nullptr; 646 GraphKit* kit = access.kit(); 647 Node* adr = access.addr().node(); 648 const TypePtr* adr_type = access.addr().type(); 649 Node* mem = access.memory(); 650 651 switch(access.type()) { 652 case T_BYTE: 653 load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type); 654 break; 655 case T_SHORT: 656 load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type); 657 break; 658 case T_INT: 659 load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type); 660 break; 661 case T_LONG: 662 load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type); 663 break; 664 default: 665 ShouldNotReachHere(); 666 } 667 668 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 669 load_store = kit->gvn().transform(load_store); 670 671 access.set_raw_access(load_store); 672 pin_atomic_op(access); 673 674 return load_store; 675 } 676 677 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val, 678 Node* new_val, const Type* value_type) const { 679 C2AccessFence fence(access); 680 resolve_address(access); 681 return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); 682 } 683 684 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val, 685 Node* new_val, const Type* value_type) const { 686 C2AccessFence fence(access); 687 resolve_address(access); 688 return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); 689 } 690 691 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 692 C2AccessFence fence(access); 693 resolve_address(access); 694 return atomic_xchg_at_resolved(access, new_val, value_type); 695 } 696 697 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 698 C2AccessFence fence(access); 699 resolve_address(access); 700 return atomic_add_at_resolved(access, new_val, value_type); 701 } 702 703 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) { 704 // Exclude the header but include array length to copy by 8 bytes words. 705 // Can't use base_offset_in_bytes(bt) since basic type is unknown. 706 int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() : 707 instanceOopDesc::base_offset_in_bytes(); 708 // base_off: 709 // 8 - 32-bit VM 710 // 12 - 64-bit VM, compressed klass 711 // 16 - 64-bit VM, normal klass 712 if (base_off % BytesPerLong != 0) { 713 assert(UseCompressedClassPointers, ""); 714 assert(!UseCompactObjectHeaders, ""); 715 if (is_array) { 716 // Exclude length to copy by 8 bytes words. 717 base_off += sizeof(int); 718 } else { 719 // Include klass to copy by 8 bytes words. 720 base_off = instanceOopDesc::klass_offset_in_bytes(); 721 } 722 assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment"); 723 } 724 return base_off; 725 } 726 727 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const { 728 int base_off = arraycopy_payload_base_offset(is_array); 729 Node* payload_size = size; 730 Node* offset = kit->MakeConX(base_off); 731 payload_size = kit->gvn().transform(new SubXNode(payload_size, offset)); 732 if (is_array) { 733 // Ensure the array payload size is rounded up to the next BytesPerLong 734 // multiple when converting to double-words. This is necessary because array 735 // size does not include object alignment padding, so it might not be a 736 // multiple of BytesPerLong for sub-long element types. 737 payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1))); 738 } 739 payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong))); 740 ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false); 741 if (is_array) { 742 ac->set_clone_array(); 743 } else { 744 ac->set_clone_inst(); 745 } 746 Node* n = kit->gvn().transform(ac); 747 if (n == ac) { 748 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 749 ac->set_adr_type(TypeRawPtr::BOTTOM); 750 kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type); 751 } else { 752 kit->set_all_memory(n); 753 } 754 } 755 756 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes, 757 Node*& i_o, Node*& needgc_ctrl, 758 Node*& fast_oop_ctrl, Node*& fast_oop_rawmem, 759 intx prefetch_lines) const { 760 assert(UseTLAB, "Only for TLAB enabled allocations"); 761 762 Node* thread = macro->transform_later(new ThreadLocalNode()); 763 Node* tlab_top_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_top_offset())); 764 Node* tlab_end_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_end_offset())); 765 766 // Load TLAB end. 767 // 768 // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around 769 // a bug where these values were being moved across 770 // a safepoint. These are not oops, so they cannot be include in the oop 771 // map, but they can be changed by a GC. The proper way to fix this would 772 // be to set the raw memory state when generating a SafepointNode. However 773 // this will require extensive changes to the loop optimization in order to 774 // prevent a degradation of the optimization. 775 // See comment in memnode.hpp, around line 227 in class LoadPNode. 776 Node* tlab_end = macro->make_load(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); 777 778 // Load the TLAB top. 779 Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered); 780 macro->transform_later(old_tlab_top); 781 782 // Add to heap top to get a new TLAB top 783 Node* new_tlab_top = new AddPNode(macro->top(), old_tlab_top, size_in_bytes); 784 macro->transform_later(new_tlab_top); 785 786 // Check against TLAB end 787 Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end); 788 macro->transform_later(tlab_full); 789 790 Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge); 791 macro->transform_later(needgc_bol); 792 IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN); 793 macro->transform_later(needgc_iff); 794 795 // Plug the failing-heap-space-need-gc test into the slow-path region 796 Node* needgc_true = new IfTrueNode(needgc_iff); 797 macro->transform_later(needgc_true); 798 needgc_ctrl = needgc_true; 799 800 // No need for a GC. 801 Node* needgc_false = new IfFalseNode(needgc_iff); 802 macro->transform_later(needgc_false); 803 804 // Fast path: 805 i_o = macro->prefetch_allocation(i_o, needgc_false, mem, 806 old_tlab_top, new_tlab_top, prefetch_lines); 807 808 // Store the modified TLAB top back down. 809 Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr, 810 TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered); 811 macro->transform_later(store_tlab_top); 812 813 fast_oop_ctrl = needgc_false; 814 fast_oop_rawmem = store_tlab_top; 815 return old_tlab_top; 816 } 817 818 static const TypeFunc* clone_type() { 819 // Create input type (domain) 820 int argcnt = NOT_LP64(3) LP64_ONLY(4); 821 const Type** const domain_fields = TypeTuple::fields(argcnt); 822 int argp = TypeFunc::Parms; 823 domain_fields[argp++] = TypeInstPtr::NOTNULL; // src 824 domain_fields[argp++] = TypeInstPtr::NOTNULL; // dst 825 domain_fields[argp++] = TypeX_X; // size lower 826 LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper 827 assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); 828 const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields); 829 830 // Create result type (range) 831 const Type** const range_fields = TypeTuple::fields(0); 832 const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields); 833 834 return TypeFunc::make(domain, range); 835 } 836 837 #define XTOP LP64_ONLY(COMMA phase->top()) 838 839 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac, 840 address clone_addr, const char* clone_name) const { 841 Node* const ctrl = ac->in(TypeFunc::Control); 842 Node* const mem = ac->in(TypeFunc::Memory); 843 Node* const src = ac->in(ArrayCopyNode::Src); 844 Node* const dst = ac->in(ArrayCopyNode::Dest); 845 Node* const size = ac->in(ArrayCopyNode::Length); 846 847 assert(size->bottom_type()->base() == Type_X, 848 "Should be of object size type (int for 32 bits, long for 64 bits)"); 849 850 // The native clone we are calling here expects the object size in words. 851 // Add header/offset size to payload size to get object size. 852 Node* const base_offset = phase->MakeConX(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong); 853 Node* const full_size = phase->transform_later(new AddXNode(size, base_offset)); 854 // HeapAccess<>::clone expects size in heap words. 855 // For 64-bits platforms, this is a no-operation. 856 // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2). 857 Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong))); 858 859 Node* const call = phase->make_leaf_call(ctrl, 860 mem, 861 clone_type(), 862 clone_addr, 863 clone_name, 864 TypeRawPtr::BOTTOM, 865 src, dst, full_size_in_heap_words XTOP); 866 phase->transform_later(call); 867 phase->igvn().replace_node(ac, call); 868 } 869 870 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const { 871 Node* ctrl = ac->in(TypeFunc::Control); 872 Node* mem = ac->in(TypeFunc::Memory); 873 Node* src = ac->in(ArrayCopyNode::Src); 874 Node* src_offset = ac->in(ArrayCopyNode::SrcPos); 875 Node* dest = ac->in(ArrayCopyNode::Dest); 876 Node* dest_offset = ac->in(ArrayCopyNode::DestPos); 877 Node* length = ac->in(ArrayCopyNode::Length); 878 879 Node* payload_src = phase->basic_plus_adr(src, src_offset); 880 Node* payload_dst = phase->basic_plus_adr(dest, dest_offset); 881 882 const char* copyfunc_name = "arraycopy"; 883 address copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true); 884 885 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 886 const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type(); 887 888 Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP); 889 phase->transform_later(call); 890 891 phase->igvn().replace_node(ac, call); 892 } 893 894 #undef XTOP 895 896 void BarrierSetC2::compute_liveness_at_stubs() const { 897 ResourceMark rm; 898 Compile* const C = Compile::current(); 899 Arena* const A = Thread::current()->resource_area(); 900 PhaseCFG* const cfg = C->cfg(); 901 PhaseRegAlloc* const regalloc = C->regalloc(); 902 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask)); 903 BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler(); 904 BarrierSetC2State* bs_state = barrier_set_state(); 905 Block_List worklist; 906 907 for (uint i = 0; i < cfg->number_of_blocks(); ++i) { 908 new ((void*)(live + i)) RegMask(); 909 worklist.push(cfg->get_block(i)); 910 } 911 912 while (worklist.size() > 0) { 913 const Block* const block = worklist.pop(); 914 RegMask& old_live = live[block->_pre_order]; 915 RegMask new_live; 916 917 // Initialize to union of successors 918 for (uint i = 0; i < block->_num_succs; i++) { 919 const uint succ_id = block->_succs[i]->_pre_order; 920 new_live.OR(live[succ_id]); 921 } 922 923 // Walk block backwards, computing liveness 924 for (int i = block->number_of_nodes() - 1; i >= 0; --i) { 925 const Node* const node = block->get_node(i); 926 927 // If this node tracks out-liveness, update it 928 if (!bs_state->needs_livein_data()) { 929 RegMask* const regs = bs_state->live(node); 930 if (regs != nullptr) { 931 regs->OR(new_live); 932 } 933 } 934 935 // Remove def bits 936 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node)); 937 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node)); 938 if (first != OptoReg::Bad) { 939 new_live.Remove(first); 940 } 941 if (second != OptoReg::Bad) { 942 new_live.Remove(second); 943 } 944 945 // Add use bits 946 for (uint j = 1; j < node->req(); ++j) { 947 const Node* const use = node->in(j); 948 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use)); 949 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use)); 950 if (first != OptoReg::Bad) { 951 new_live.Insert(first); 952 } 953 if (second != OptoReg::Bad) { 954 new_live.Insert(second); 955 } 956 } 957 958 // If this node tracks in-liveness, update it 959 if (bs_state->needs_livein_data()) { 960 RegMask* const regs = bs_state->live(node); 961 if (regs != nullptr) { 962 regs->OR(new_live); 963 } 964 } 965 } 966 967 // Now at block top, see if we have any changes 968 new_live.SUBTRACT(old_live); 969 if (new_live.is_NotEmpty()) { 970 // Liveness has refined, update and propagate to prior blocks 971 old_live.OR(new_live); 972 for (uint i = 1; i < block->num_preds(); ++i) { 973 Block* const pred = cfg->get_block_for_node(block->pred(i)); 974 worklist.push(pred); 975 } 976 } 977 } 978 }