1 /* 2 * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "code/vmreg.inline.hpp" 27 #include "gc/shared/barrierSet.hpp" 28 #include "gc/shared/tlab_globals.hpp" 29 #include "gc/shared/c2/barrierSetC2.hpp" 30 #include "opto/arraycopynode.hpp" 31 #include "opto/block.hpp" 32 #include "opto/convertnode.hpp" 33 #include "opto/graphKit.hpp" 34 #include "opto/idealKit.hpp" 35 #include "opto/macro.hpp" 36 #include "opto/narrowptrnode.hpp" 37 #include "opto/output.hpp" 38 #include "opto/regalloc.hpp" 39 #include "opto/runtime.hpp" 40 #include "utilities/macros.hpp" 41 #include CPU_HEADER(gc/shared/barrierSetAssembler) 42 43 // By default this is a no-op. 44 void BarrierSetC2::resolve_address(C2Access& access) const { } 45 46 void* C2ParseAccess::barrier_set_state() const { 47 return _kit->barrier_set_state(); 48 } 49 50 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); } 51 52 bool C2Access::needs_cpu_membar() const { 53 bool mismatched = (_decorators & C2_MISMATCHED) != 0; 54 bool is_unordered = (_decorators & MO_UNORDERED) != 0; 55 56 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 57 bool in_heap = (_decorators & IN_HEAP) != 0; 58 bool in_native = (_decorators & IN_NATIVE) != 0; 59 bool is_mixed = !in_heap && !in_native; 60 61 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 62 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 63 bool is_atomic = is_read && is_write; 64 65 if (is_atomic) { 66 // Atomics always need to be wrapped in CPU membars 67 return true; 68 } 69 70 if (anonymous) { 71 // We will need memory barriers unless we can determine a unique 72 // alias category for this reference. (Note: If for some reason 73 // the barriers get omitted and the unsafe reference begins to "pollute" 74 // the alias analysis of the rest of the graph, either Compile::can_alias 75 // or Compile::must_alias will throw a diagnostic assert.) 76 if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) { 77 return true; 78 } 79 } else { 80 assert(!is_mixed, "not unsafe"); 81 } 82 83 return false; 84 } 85 86 static BarrierSetC2State* barrier_set_state() { 87 return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state()); 88 } 89 90 RegMask& BarrierStubC2::live() const { 91 return *barrier_set_state()->live(_node); 92 } 93 94 BarrierStubC2::BarrierStubC2(const MachNode* node) 95 : _node(node), 96 _entry(), 97 _continuation(), 98 _preserve(live()) {} 99 100 Label* BarrierStubC2::entry() { 101 // The _entry will never be bound when in_scratch_emit_size() is true. 102 // However, we still need to return a label that is not bound now, but 103 // will eventually be bound. Any eventually bound label will do, as it 104 // will only act as a placeholder, so we return the _continuation label. 105 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry; 106 } 107 108 Label* BarrierStubC2::continuation() { 109 return &_continuation; 110 } 111 112 uint8_t BarrierStubC2::barrier_data() const { 113 return _node->barrier_data(); 114 } 115 116 void BarrierStubC2::preserve(Register r) { 117 const VMReg vm_reg = r->as_VMReg(); 118 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 119 _preserve.Insert(OptoReg::as_OptoReg(vm_reg)); 120 } 121 122 void BarrierStubC2::dont_preserve(Register r) { 123 VMReg vm_reg = r->as_VMReg(); 124 assert(vm_reg->is_Register(), "r must be a general-purpose register"); 125 // Subtract the given register and all its sub-registers (e.g. {R11, R11_H} 126 // for r11 in aarch64). 127 do { 128 _preserve.Remove(OptoReg::as_OptoReg(vm_reg)); 129 vm_reg = vm_reg->next(); 130 } while (vm_reg->is_Register() && !vm_reg->is_concrete()); 131 } 132 133 const RegMask& BarrierStubC2::preserve_set() const { 134 return _preserve; 135 } 136 137 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { 138 DecoratorSet decorators = access.decorators(); 139 140 bool mismatched = (decorators & C2_MISMATCHED) != 0; 141 bool unaligned = (decorators & C2_UNALIGNED) != 0; 142 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 143 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 144 145 MemNode::MemOrd mo = access.mem_node_mo(); 146 147 Node* store; 148 BasicType bt = access.type(); 149 if (access.is_parse_access()) { 150 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 151 152 GraphKit* kit = parse_access.kit(); 153 if (bt == T_DOUBLE) { 154 Node* new_val = kit->dprecision_rounding(val.node()); 155 val.set_node(new_val); 156 } 157 158 store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt, 159 access.addr().type(), mo, requires_atomic_access, unaligned, 160 mismatched, unsafe, access.barrier_data()); 161 } else { 162 assert(access.is_opt_access(), "either parse or opt access"); 163 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 164 Node* ctl = opt_access.ctl(); 165 MergeMemNode* mm = opt_access.mem(); 166 PhaseGVN& gvn = opt_access.gvn(); 167 const TypePtr* adr_type = access.addr().type(); 168 int alias = gvn.C->get_alias_index(adr_type); 169 Node* mem = mm->memory_at(alias); 170 171 StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access); 172 if (unaligned) { 173 st->set_unaligned_access(); 174 } 175 if (mismatched) { 176 st->set_mismatched_access(); 177 } 178 st->set_barrier_data(access.barrier_data()); 179 store = gvn.transform(st); 180 if (store == st) { 181 mm->set_memory_at(alias, st); 182 } 183 } 184 access.set_raw_access(store); 185 186 return store; 187 } 188 189 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { 190 DecoratorSet decorators = access.decorators(); 191 192 Node* adr = access.addr().node(); 193 const TypePtr* adr_type = access.addr().type(); 194 195 bool mismatched = (decorators & C2_MISMATCHED) != 0; 196 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; 197 bool unaligned = (decorators & C2_UNALIGNED) != 0; 198 bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0; 199 bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0; 200 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; 201 bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0; 202 203 MemNode::MemOrd mo = access.mem_node_mo(); 204 LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest; 205 206 Node* load; 207 if (access.is_parse_access()) { 208 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 209 GraphKit* kit = parse_access.kit(); 210 Node* control = control_dependent ? kit->control() : nullptr; 211 212 if (immutable) { 213 Compile* C = Compile::current(); 214 Node* mem = kit->immutable_memory(); 215 load = LoadNode::make(kit->gvn(), control, mem, adr, 216 adr_type, val_type, access.type(), mo, dep, requires_atomic_access, 217 unaligned, mismatched, unsafe, access.barrier_data()); 218 load = kit->gvn().transform(load); 219 } else { 220 load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo, 221 dep, requires_atomic_access, unaligned, mismatched, unsafe, 222 access.barrier_data()); 223 } 224 } else { 225 assert(access.is_opt_access(), "either parse or opt access"); 226 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access); 227 Node* control = control_dependent ? opt_access.ctl() : nullptr; 228 MergeMemNode* mm = opt_access.mem(); 229 PhaseGVN& gvn = opt_access.gvn(); 230 Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type)); 231 load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep, 232 requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data()); 233 load = gvn.transform(load); 234 } 235 access.set_raw_access(load); 236 237 return load; 238 } 239 240 class C2AccessFence: public StackObj { 241 C2Access& _access; 242 Node* _leading_membar; 243 244 public: 245 C2AccessFence(C2Access& access) : 246 _access(access), _leading_membar(nullptr) { 247 GraphKit* kit = nullptr; 248 if (access.is_parse_access()) { 249 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 250 kit = parse_access.kit(); 251 } 252 DecoratorSet decorators = access.decorators(); 253 254 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 255 bool is_read = (decorators & C2_READ_ACCESS) != 0; 256 bool is_atomic = is_read && is_write; 257 258 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 259 bool is_release = (decorators & MO_RELEASE) != 0; 260 261 if (is_atomic) { 262 assert(kit != nullptr, "unsupported at optimization time"); 263 // Memory-model-wise, a LoadStore acts like a little synchronized 264 // block, so needs barriers on each side. These don't translate 265 // into actual barriers on most machines, but we still need rest of 266 // compiler to respect ordering. 267 if (is_release) { 268 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 269 } else if (is_volatile) { 270 if (support_IRIW_for_not_multiple_copy_atomic_cpu) { 271 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 272 } else { 273 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 274 } 275 } 276 } else if (is_write) { 277 // If reference is volatile, prevent following memory ops from 278 // floating down past the volatile write. Also prevents commoning 279 // another volatile read. 280 if (is_volatile || is_release) { 281 assert(kit != nullptr, "unsupported at optimization time"); 282 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease); 283 } 284 } else { 285 // Memory barrier to prevent normal and 'unsafe' accesses from 286 // bypassing each other. Happens after null checks, so the 287 // exception paths do not take memory state from the memory barrier, 288 // so there's no problems making a strong assert about mixing users 289 // of safe & unsafe memory. 290 if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) { 291 assert(kit != nullptr, "unsupported at optimization time"); 292 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile); 293 } 294 } 295 296 if (access.needs_cpu_membar()) { 297 assert(kit != nullptr, "unsupported at optimization time"); 298 kit->insert_mem_bar(Op_MemBarCPUOrder); 299 } 300 301 if (is_atomic) { 302 // 4984716: MemBars must be inserted before this 303 // memory node in order to avoid a false 304 // dependency which will confuse the scheduler. 305 access.set_memory(); 306 } 307 } 308 309 ~C2AccessFence() { 310 GraphKit* kit = nullptr; 311 if (_access.is_parse_access()) { 312 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access); 313 kit = parse_access.kit(); 314 } 315 DecoratorSet decorators = _access.decorators(); 316 317 bool is_write = (decorators & C2_WRITE_ACCESS) != 0; 318 bool is_read = (decorators & C2_READ_ACCESS) != 0; 319 bool is_atomic = is_read && is_write; 320 321 bool is_volatile = (decorators & MO_SEQ_CST) != 0; 322 bool is_acquire = (decorators & MO_ACQUIRE) != 0; 323 324 // If reference is volatile, prevent following volatiles ops from 325 // floating up before the volatile access. 326 if (_access.needs_cpu_membar()) { 327 kit->insert_mem_bar(Op_MemBarCPUOrder); 328 } 329 330 if (is_atomic) { 331 assert(kit != nullptr, "unsupported at optimization time"); 332 if (is_acquire || is_volatile) { 333 Node* n = _access.raw_access(); 334 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 335 if (_leading_membar != nullptr) { 336 MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 337 } 338 } 339 } else if (is_write) { 340 // If not multiple copy atomic, we do the MemBarVolatile before the load. 341 if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) { 342 assert(kit != nullptr, "unsupported at optimization time"); 343 Node* n = _access.raw_access(); 344 Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar 345 if (_leading_membar != nullptr) { 346 MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar()); 347 } 348 } 349 } else { 350 if (is_volatile || is_acquire) { 351 assert(kit != nullptr, "unsupported at optimization time"); 352 Node* n = _access.raw_access(); 353 assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected"); 354 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n); 355 mb->as_MemBar()->set_trailing_load(); 356 } 357 } 358 } 359 }; 360 361 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const { 362 C2AccessFence fence(access); 363 resolve_address(access); 364 return store_at_resolved(access, val); 365 } 366 367 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const { 368 C2AccessFence fence(access); 369 resolve_address(access); 370 return load_at_resolved(access, val_type); 371 } 372 373 MemNode::MemOrd C2Access::mem_node_mo() const { 374 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 375 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 376 if ((_decorators & MO_SEQ_CST) != 0) { 377 if (is_write && is_read) { 378 // For atomic operations 379 return MemNode::seqcst; 380 } else if (is_write) { 381 return MemNode::release; 382 } else { 383 assert(is_read, "what else?"); 384 return MemNode::acquire; 385 } 386 } else if ((_decorators & MO_RELEASE) != 0) { 387 return MemNode::release; 388 } else if ((_decorators & MO_ACQUIRE) != 0) { 389 return MemNode::acquire; 390 } else if (is_write) { 391 // Volatile fields need releasing stores. 392 // Non-volatile fields also need releasing stores if they hold an 393 // object reference, because the object reference might point to 394 // a freshly created object. 395 // Conservatively release stores of object references. 396 return StoreNode::release_if_reference(_type); 397 } else { 398 return MemNode::unordered; 399 } 400 } 401 402 void C2Access::fixup_decorators() { 403 bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0; 404 bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo; 405 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; 406 407 bool is_read = (_decorators & C2_READ_ACCESS) != 0; 408 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; 409 410 if (AlwaysAtomicAccesses && is_unordered) { 411 _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits 412 _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess 413 } 414 415 _decorators = AccessInternal::decorator_fixup(_decorators, _type); 416 417 if (is_read && !is_write && anonymous) { 418 // To be valid, unsafe loads may depend on other conditions than 419 // the one that guards them: pin the Load node 420 _decorators |= C2_CONTROL_DEPENDENT_LOAD; 421 _decorators |= C2_UNKNOWN_CONTROL_LOAD; 422 const TypePtr* adr_type = _addr.type(); 423 Node* adr = _addr.node(); 424 if (!needs_cpu_membar() && adr_type->isa_instptr()) { 425 assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null"); 426 intptr_t offset = Type::OffsetBot; 427 AddPNode::Ideal_base_and_offset(adr, &gvn(), offset); 428 if (offset >= 0) { 429 int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper()); 430 if (offset < s) { 431 // Guaranteed to be a valid access, no need to pin it 432 _decorators ^= C2_CONTROL_DEPENDENT_LOAD; 433 _decorators ^= C2_UNKNOWN_CONTROL_LOAD; 434 } 435 } 436 } 437 } 438 } 439 440 //--------------------------- atomic operations--------------------------------- 441 442 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const { 443 // SCMemProjNodes represent the memory state of a LoadStore. Their 444 // main role is to prevent LoadStore nodes from being optimized away 445 // when their results aren't used. 446 assert(access.is_parse_access(), "entry not supported at optimization time"); 447 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access); 448 GraphKit* kit = parse_access.kit(); 449 Node* load_store = access.raw_access(); 450 assert(load_store != nullptr, "must pin atomic op"); 451 Node* proj = kit->gvn().transform(new SCMemProjNode(load_store)); 452 kit->set_memory(proj, access.alias_idx()); 453 } 454 455 void C2AtomicParseAccess::set_memory() { 456 Node *mem = _kit->memory(_alias_idx); 457 _memory = mem; 458 } 459 460 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 461 Node* new_val, const Type* value_type) const { 462 GraphKit* kit = access.kit(); 463 MemNode::MemOrd mo = access.mem_node_mo(); 464 Node* mem = access.memory(); 465 466 Node* adr = access.addr().node(); 467 const TypePtr* adr_type = access.addr().type(); 468 469 Node* load_store = nullptr; 470 471 if (access.is_oop()) { 472 #ifdef _LP64 473 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 474 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 475 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 476 load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo); 477 } else 478 #endif 479 { 480 load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo); 481 } 482 } else { 483 switch (access.type()) { 484 case T_BYTE: { 485 load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 486 break; 487 } 488 case T_SHORT: { 489 load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 490 break; 491 } 492 case T_INT: { 493 load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 494 break; 495 } 496 case T_LONG: { 497 load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); 498 break; 499 } 500 default: 501 ShouldNotReachHere(); 502 } 503 } 504 505 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 506 load_store = kit->gvn().transform(load_store); 507 508 access.set_raw_access(load_store); 509 pin_atomic_op(access); 510 511 #ifdef _LP64 512 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 513 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 514 } 515 #endif 516 517 return load_store; 518 } 519 520 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, 521 Node* new_val, const Type* value_type) const { 522 GraphKit* kit = access.kit(); 523 DecoratorSet decorators = access.decorators(); 524 MemNode::MemOrd mo = access.mem_node_mo(); 525 Node* mem = access.memory(); 526 bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0; 527 Node* load_store = nullptr; 528 Node* adr = access.addr().node(); 529 530 if (access.is_oop()) { 531 #ifdef _LP64 532 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 533 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 534 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); 535 if (is_weak_cas) { 536 load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 537 } else { 538 load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); 539 } 540 } else 541 #endif 542 { 543 if (is_weak_cas) { 544 load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 545 } else { 546 load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); 547 } 548 } 549 } else { 550 switch(access.type()) { 551 case T_BYTE: { 552 if (is_weak_cas) { 553 load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 554 } else { 555 load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); 556 } 557 break; 558 } 559 case T_SHORT: { 560 if (is_weak_cas) { 561 load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 562 } else { 563 load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); 564 } 565 break; 566 } 567 case T_INT: { 568 if (is_weak_cas) { 569 load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 570 } else { 571 load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); 572 } 573 break; 574 } 575 case T_LONG: { 576 if (is_weak_cas) { 577 load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 578 } else { 579 load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); 580 } 581 break; 582 } 583 default: 584 ShouldNotReachHere(); 585 } 586 } 587 588 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 589 load_store = kit->gvn().transform(load_store); 590 591 access.set_raw_access(load_store); 592 pin_atomic_op(access); 593 594 return load_store; 595 } 596 597 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 598 GraphKit* kit = access.kit(); 599 Node* mem = access.memory(); 600 Node* adr = access.addr().node(); 601 const TypePtr* adr_type = access.addr().type(); 602 Node* load_store = nullptr; 603 604 if (access.is_oop()) { 605 #ifdef _LP64 606 if (adr->bottom_type()->is_ptr_to_narrowoop()) { 607 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); 608 load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop())); 609 } else 610 #endif 611 { 612 load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()); 613 } 614 } else { 615 switch (access.type()) { 616 case T_BYTE: 617 load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type); 618 break; 619 case T_SHORT: 620 load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type); 621 break; 622 case T_INT: 623 load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type); 624 break; 625 case T_LONG: 626 load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type); 627 break; 628 default: 629 ShouldNotReachHere(); 630 } 631 } 632 633 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 634 load_store = kit->gvn().transform(load_store); 635 636 access.set_raw_access(load_store); 637 pin_atomic_op(access); 638 639 #ifdef _LP64 640 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { 641 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); 642 } 643 #endif 644 645 return load_store; 646 } 647 648 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 649 Node* load_store = nullptr; 650 GraphKit* kit = access.kit(); 651 Node* adr = access.addr().node(); 652 const TypePtr* adr_type = access.addr().type(); 653 Node* mem = access.memory(); 654 655 switch(access.type()) { 656 case T_BYTE: 657 load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type); 658 break; 659 case T_SHORT: 660 load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type); 661 break; 662 case T_INT: 663 load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type); 664 break; 665 case T_LONG: 666 load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type); 667 break; 668 default: 669 ShouldNotReachHere(); 670 } 671 672 load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); 673 load_store = kit->gvn().transform(load_store); 674 675 access.set_raw_access(load_store); 676 pin_atomic_op(access); 677 678 return load_store; 679 } 680 681 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val, 682 Node* new_val, const Type* value_type) const { 683 C2AccessFence fence(access); 684 resolve_address(access); 685 return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); 686 } 687 688 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val, 689 Node* new_val, const Type* value_type) const { 690 C2AccessFence fence(access); 691 resolve_address(access); 692 return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); 693 } 694 695 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 696 C2AccessFence fence(access); 697 resolve_address(access); 698 return atomic_xchg_at_resolved(access, new_val, value_type); 699 } 700 701 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { 702 C2AccessFence fence(access); 703 resolve_address(access); 704 return atomic_add_at_resolved(access, new_val, value_type); 705 } 706 707 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) { 708 // Exclude the header but include array length to copy by 8 bytes words. 709 // Can't use base_offset_in_bytes(bt) since basic type is unknown. 710 int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() : 711 instanceOopDesc::base_offset_in_bytes(); 712 // base_off: 713 // 8 - 32-bit VM 714 // 12 - 64-bit VM, compressed klass 715 // 16 - 64-bit VM, normal klass 716 if (base_off % BytesPerLong != 0) { 717 assert(UseCompressedClassPointers, ""); 718 if (is_array) { 719 // Exclude length to copy by 8 bytes words. 720 base_off += sizeof(int); 721 } else { 722 // Include klass to copy by 8 bytes words. 723 base_off = instanceOopDesc::klass_offset_in_bytes(); 724 } 725 assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment"); 726 } 727 return base_off; 728 } 729 730 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const { 731 int base_off = arraycopy_payload_base_offset(is_array); 732 Node* payload_size = size; 733 Node* offset = kit->MakeConX(base_off); 734 payload_size = kit->gvn().transform(new SubXNode(payload_size, offset)); 735 if (is_array) { 736 // Ensure the array payload size is rounded up to the next BytesPerLong 737 // multiple when converting to double-words. This is necessary because array 738 // size does not include object alignment padding, so it might not be a 739 // multiple of BytesPerLong for sub-long element types. 740 payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1))); 741 } 742 payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong))); 743 ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false); 744 if (is_array) { 745 ac->set_clone_array(); 746 } else { 747 ac->set_clone_inst(); 748 } 749 Node* n = kit->gvn().transform(ac); 750 if (n == ac) { 751 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 752 ac->set_adr_type(TypeRawPtr::BOTTOM); 753 kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type); 754 } else { 755 kit->set_all_memory(n); 756 } 757 } 758 759 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes, 760 Node*& i_o, Node*& needgc_ctrl, 761 Node*& fast_oop_ctrl, Node*& fast_oop_rawmem, 762 intx prefetch_lines) const { 763 assert(UseTLAB, "Only for TLAB enabled allocations"); 764 765 Node* thread = macro->transform_later(new ThreadLocalNode()); 766 Node* tlab_top_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_top_offset())); 767 Node* tlab_end_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_end_offset())); 768 769 // Load TLAB end. 770 // 771 // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around 772 // a bug where these values were being moved across 773 // a safepoint. These are not oops, so they cannot be include in the oop 774 // map, but they can be changed by a GC. The proper way to fix this would 775 // be to set the raw memory state when generating a SafepointNode. However 776 // this will require extensive changes to the loop optimization in order to 777 // prevent a degradation of the optimization. 778 // See comment in memnode.hpp, around line 227 in class LoadPNode. 779 Node* tlab_end = macro->make_load(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); 780 781 // Load the TLAB top. 782 Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered); 783 macro->transform_later(old_tlab_top); 784 785 // Add to heap top to get a new TLAB top 786 Node* new_tlab_top = new AddPNode(macro->top(), old_tlab_top, size_in_bytes); 787 macro->transform_later(new_tlab_top); 788 789 // Check against TLAB end 790 Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end); 791 macro->transform_later(tlab_full); 792 793 Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge); 794 macro->transform_later(needgc_bol); 795 IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN); 796 macro->transform_later(needgc_iff); 797 798 // Plug the failing-heap-space-need-gc test into the slow-path region 799 Node* needgc_true = new IfTrueNode(needgc_iff); 800 macro->transform_later(needgc_true); 801 needgc_ctrl = needgc_true; 802 803 // No need for a GC. 804 Node* needgc_false = new IfFalseNode(needgc_iff); 805 macro->transform_later(needgc_false); 806 807 // Fast path: 808 i_o = macro->prefetch_allocation(i_o, needgc_false, mem, 809 old_tlab_top, new_tlab_top, prefetch_lines); 810 811 // Store the modified TLAB top back down. 812 Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr, 813 TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered); 814 macro->transform_later(store_tlab_top); 815 816 fast_oop_ctrl = needgc_false; 817 fast_oop_rawmem = store_tlab_top; 818 return old_tlab_top; 819 } 820 821 static const TypeFunc* clone_type() { 822 // Create input type (domain) 823 int argcnt = NOT_LP64(3) LP64_ONLY(4); 824 const Type** const domain_fields = TypeTuple::fields(argcnt); 825 int argp = TypeFunc::Parms; 826 domain_fields[argp++] = TypeInstPtr::NOTNULL; // src 827 domain_fields[argp++] = TypeInstPtr::NOTNULL; // dst 828 domain_fields[argp++] = TypeX_X; // size lower 829 LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper 830 assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); 831 const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields); 832 833 // Create result type (range) 834 const Type** const range_fields = TypeTuple::fields(0); 835 const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields); 836 837 return TypeFunc::make(domain, range); 838 } 839 840 #define XTOP LP64_ONLY(COMMA phase->top()) 841 842 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac, 843 address clone_addr, const char* clone_name) const { 844 Node* const ctrl = ac->in(TypeFunc::Control); 845 Node* const mem = ac->in(TypeFunc::Memory); 846 Node* const src = ac->in(ArrayCopyNode::Src); 847 Node* const dst = ac->in(ArrayCopyNode::Dest); 848 Node* const size = ac->in(ArrayCopyNode::Length); 849 850 assert(size->bottom_type()->base() == Type_X, 851 "Should be of object size type (int for 32 bits, long for 64 bits)"); 852 853 // The native clone we are calling here expects the object size in words. 854 // Add header/offset size to payload size to get object size. 855 Node* const base_offset = phase->MakeConX(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong); 856 Node* const full_size = phase->transform_later(new AddXNode(size, base_offset)); 857 // HeapAccess<>::clone expects size in heap words. 858 // For 64-bits platforms, this is a no-operation. 859 // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2). 860 Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong))); 861 862 Node* const call = phase->make_leaf_call(ctrl, 863 mem, 864 clone_type(), 865 clone_addr, 866 clone_name, 867 TypeRawPtr::BOTTOM, 868 src, dst, full_size_in_heap_words XTOP); 869 phase->transform_later(call); 870 phase->igvn().replace_node(ac, call); 871 } 872 873 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const { 874 Node* ctrl = ac->in(TypeFunc::Control); 875 Node* mem = ac->in(TypeFunc::Memory); 876 Node* src = ac->in(ArrayCopyNode::Src); 877 Node* src_offset = ac->in(ArrayCopyNode::SrcPos); 878 Node* dest = ac->in(ArrayCopyNode::Dest); 879 Node* dest_offset = ac->in(ArrayCopyNode::DestPos); 880 Node* length = ac->in(ArrayCopyNode::Length); 881 882 Node* payload_src = phase->basic_plus_adr(src, src_offset); 883 Node* payload_dst = phase->basic_plus_adr(dest, dest_offset); 884 885 const char* copyfunc_name = "arraycopy"; 886 address copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true); 887 888 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; 889 const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type(); 890 891 Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP); 892 phase->transform_later(call); 893 894 phase->igvn().replace_node(ac, call); 895 } 896 897 #undef XTOP 898 899 void BarrierSetC2::compute_liveness_at_stubs() const { 900 ResourceMark rm; 901 Compile* const C = Compile::current(); 902 Arena* const A = Thread::current()->resource_area(); 903 PhaseCFG* const cfg = C->cfg(); 904 PhaseRegAlloc* const regalloc = C->regalloc(); 905 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask)); 906 BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler(); 907 BarrierSetC2State* bs_state = barrier_set_state(); 908 Block_List worklist; 909 910 for (uint i = 0; i < cfg->number_of_blocks(); ++i) { 911 new ((void*)(live + i)) RegMask(); 912 worklist.push(cfg->get_block(i)); 913 } 914 915 while (worklist.size() > 0) { 916 const Block* const block = worklist.pop(); 917 RegMask& old_live = live[block->_pre_order]; 918 RegMask new_live; 919 920 // Initialize to union of successors 921 for (uint i = 0; i < block->_num_succs; i++) { 922 const uint succ_id = block->_succs[i]->_pre_order; 923 new_live.OR(live[succ_id]); 924 } 925 926 // Walk block backwards, computing liveness 927 for (int i = block->number_of_nodes() - 1; i >= 0; --i) { 928 const Node* const node = block->get_node(i); 929 930 // If this node tracks out-liveness, update it 931 if (!bs_state->needs_livein_data()) { 932 RegMask* const regs = bs_state->live(node); 933 if (regs != nullptr) { 934 regs->OR(new_live); 935 } 936 } 937 938 // Remove def bits 939 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node)); 940 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node)); 941 if (first != OptoReg::Bad) { 942 new_live.Remove(first); 943 } 944 if (second != OptoReg::Bad) { 945 new_live.Remove(second); 946 } 947 948 // Add use bits 949 for (uint j = 1; j < node->req(); ++j) { 950 const Node* const use = node->in(j); 951 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use)); 952 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use)); 953 if (first != OptoReg::Bad) { 954 new_live.Insert(first); 955 } 956 if (second != OptoReg::Bad) { 957 new_live.Insert(second); 958 } 959 } 960 961 // If this node tracks in-liveness, update it 962 if (bs_state->needs_livein_data()) { 963 RegMask* const regs = bs_state->live(node); 964 if (regs != nullptr) { 965 regs->OR(new_live); 966 } 967 } 968 } 969 970 // Now at block top, see if we have any changes 971 new_live.SUBTRACT(old_live); 972 if (new_live.is_NotEmpty()) { 973 // Liveness has refined, update and propagate to prior blocks 974 old_live.OR(new_live); 975 for (uint i = 1; i < block->num_preds(); ++i) { 976 Block* const pred = cfg->get_block_for_node(block->pred(i)); 977 worklist.push(pred); 978 } 979 } 980 } 981 }