1 /*
2 * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "code/vmreg.inline.hpp"
26 #include "gc/shared/barrierSet.hpp"
27 #include "gc/shared/c2/barrierSetC2.hpp"
28 #include "gc/shared/tlab_globals.hpp"
29 #include "opto/arraycopynode.hpp"
30 #include "opto/block.hpp"
31 #include "opto/convertnode.hpp"
32 #include "opto/graphKit.hpp"
33 #include "opto/idealKit.hpp"
34 #include "opto/macro.hpp"
35 #include "opto/narrowptrnode.hpp"
36 #include "opto/output.hpp"
37 #include "opto/regalloc.hpp"
38 #include "opto/runtime.hpp"
39 #include "utilities/macros.hpp"
40 #include CPU_HEADER(gc/shared/barrierSetAssembler)
41
42 // By default this is a no-op.
43 void BarrierSetC2::resolve_address(C2Access& access) const { }
44
45 void* C2ParseAccess::barrier_set_state() const {
46 return _kit->barrier_set_state();
47 }
48
49 PhaseGVN& C2ParseAccess::gvn() const { return _kit->gvn(); }
50
51 bool C2Access::needs_cpu_membar() const {
52 bool mismatched = (_decorators & C2_MISMATCHED) != 0;
53 bool is_unordered = (_decorators & MO_UNORDERED) != 0;
54
55 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
56 bool in_heap = (_decorators & IN_HEAP) != 0;
57 bool in_native = (_decorators & IN_NATIVE) != 0;
58 bool is_mixed = !in_heap && !in_native;
59
60 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
61 bool is_read = (_decorators & C2_READ_ACCESS) != 0;
62 bool is_atomic = is_read && is_write;
63
64 if (is_atomic) {
65 // Atomics always need to be wrapped in CPU membars
66 return true;
67 }
68
69 if (anonymous) {
70 // We will need memory barriers unless we can determine a unique
71 // alias category for this reference. (Note: If for some reason
72 // the barriers get omitted and the unsafe reference begins to "pollute"
73 // the alias analysis of the rest of the graph, either Compile::can_alias
74 // or Compile::must_alias will throw a diagnostic assert.)
75 if (is_mixed || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) {
76 return true;
77 }
78 } else {
79 assert(!is_mixed, "not unsafe");
80 }
81
82 return false;
83 }
84
85 static BarrierSetC2State* barrier_set_state() {
86 return reinterpret_cast<BarrierSetC2State*>(Compile::current()->barrier_set_state());
87 }
88
89 RegMask& BarrierStubC2::live() const {
90 return *barrier_set_state()->live(_node);
91 }
92
93 BarrierStubC2::BarrierStubC2(const MachNode* node)
94 : _node(node),
95 _entry(),
96 _continuation(),
97 _preserve(live()) {}
98
99 Label* BarrierStubC2::entry() {
100 // The _entry will never be bound when in_scratch_emit_size() is true.
101 // However, we still need to return a label that is not bound now, but
102 // will eventually be bound. Any eventually bound label will do, as it
103 // will only act as a placeholder, so we return the _continuation label.
104 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
105 }
106
107 Label* BarrierStubC2::continuation() {
108 return &_continuation;
109 }
110
111 uint8_t BarrierStubC2::barrier_data() const {
112 return _node->barrier_data();
113 }
114
115 void BarrierStubC2::preserve(Register r) {
116 const VMReg vm_reg = r->as_VMReg();
117 assert(vm_reg->is_Register(), "r must be a general-purpose register");
118 _preserve.insert(OptoReg::as_OptoReg(vm_reg));
119 }
120
121 void BarrierStubC2::dont_preserve(Register r) {
122 VMReg vm_reg = r->as_VMReg();
123 assert(vm_reg->is_Register(), "r must be a general-purpose register");
124 // Subtract the given register and all its sub-registers (e.g. {R11, R11_H}
125 // for r11 in aarch64).
126 do {
127 _preserve.remove(OptoReg::as_OptoReg(vm_reg));
128 vm_reg = vm_reg->next();
129 } while (vm_reg->is_Register() && !vm_reg->is_concrete());
130 }
131
132 const RegMask& BarrierStubC2::preserve_set() const {
133 return _preserve;
134 }
135
136 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
137 DecoratorSet decorators = access.decorators();
138
139 bool mismatched = (decorators & C2_MISMATCHED) != 0;
140 bool unaligned = (decorators & C2_UNALIGNED) != 0;
141 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
142 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
143
144 MemNode::MemOrd mo = access.mem_node_mo();
145
146 Node* store;
147 BasicType bt = access.type();
148 if (access.is_parse_access()) {
149 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
150
151 GraphKit* kit = parse_access.kit();
152 store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt,
153 mo, requires_atomic_access, unaligned, mismatched,
154 unsafe, access.barrier_data());
155 } else {
156 assert(access.is_opt_access(), "either parse or opt access");
157 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
158 Node* ctl = opt_access.ctl();
159 MergeMemNode* mm = opt_access.mem();
160 PhaseGVN& gvn = opt_access.gvn();
161 const TypePtr* adr_type = access.addr().type();
162 int alias = gvn.C->get_alias_index(adr_type);
163 Node* mem = mm->memory_at(alias);
164
165 StoreNode* st = StoreNode::make(gvn, ctl, mem, access.addr().node(), adr_type, val.node(), bt, mo, requires_atomic_access);
166 if (unaligned) {
167 st->set_unaligned_access();
168 }
169 if (mismatched) {
170 st->set_mismatched_access();
171 }
172 st->set_barrier_data(access.barrier_data());
173 store = gvn.transform(st);
174 if (store == st) {
175 mm->set_memory_at(alias, st);
176 }
177 }
178 access.set_raw_access(store);
179
180 return store;
181 }
182
183 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
184 DecoratorSet decorators = access.decorators();
185
186 Node* adr = access.addr().node();
187 const TypePtr* adr_type = access.addr().type();
188
189 bool mismatched = (decorators & C2_MISMATCHED) != 0;
190 bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
191 bool unaligned = (decorators & C2_UNALIGNED) != 0;
192 bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0;
193 bool unknown_control = (decorators & C2_UNKNOWN_CONTROL_LOAD) != 0;
194 bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
195 bool immutable = (decorators & C2_IMMUTABLE_MEMORY) != 0;
196
197 MemNode::MemOrd mo = access.mem_node_mo();
198 LoadNode::ControlDependency dep = unknown_control ? LoadNode::UnknownControl : LoadNode::DependsOnlyOnTest;
199
200 Node* load;
201 if (access.is_parse_access()) {
202 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
203 GraphKit* kit = parse_access.kit();
204 Node* control = control_dependent ? kit->control() : nullptr;
205
206 if (immutable) {
207 Compile* C = Compile::current();
208 Node* mem = kit->immutable_memory();
209 load = LoadNode::make(kit->gvn(), control, mem, adr,
210 adr_type, val_type, access.type(), mo, dep, requires_atomic_access,
211 unaligned, mismatched, unsafe, access.barrier_data());
212 load = kit->gvn().transform(load);
213 } else {
214 load = kit->make_load(control, adr, val_type, access.type(), mo,
215 dep, requires_atomic_access, unaligned, mismatched, unsafe,
216 access.barrier_data());
217 }
218 } else {
219 assert(access.is_opt_access(), "either parse or opt access");
220 C2OptAccess& opt_access = static_cast<C2OptAccess&>(access);
221 Node* control = control_dependent ? opt_access.ctl() : nullptr;
222 MergeMemNode* mm = opt_access.mem();
223 PhaseGVN& gvn = opt_access.gvn();
224 Node* mem = mm->memory_at(gvn.C->get_alias_index(adr_type));
225 load = LoadNode::make(gvn, control, mem, adr, adr_type, val_type, access.type(), mo, dep,
226 requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data());
227 load = gvn.transform(load);
228 }
229 access.set_raw_access(load);
230
231 return load;
232 }
233
234 class C2AccessFence: public StackObj {
235 C2Access& _access;
236 Node* _leading_membar;
237
238 public:
239 C2AccessFence(C2Access& access) :
240 _access(access), _leading_membar(nullptr) {
241 GraphKit* kit = nullptr;
242 if (access.is_parse_access()) {
243 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
244 kit = parse_access.kit();
245 }
246 DecoratorSet decorators = access.decorators();
247
248 bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
249 bool is_read = (decorators & C2_READ_ACCESS) != 0;
250 bool is_atomic = is_read && is_write;
251
252 bool is_volatile = (decorators & MO_SEQ_CST) != 0;
253 bool is_release = (decorators & MO_RELEASE) != 0;
254
255 if (is_atomic) {
256 assert(kit != nullptr, "unsupported at optimization time");
257 // Memory-model-wise, a LoadStore acts like a little synchronized
258 // block, so needs barriers on each side. These don't translate
259 // into actual barriers on most machines, but we still need rest of
260 // compiler to respect ordering.
261 if (is_release) {
262 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
263 } else if (is_volatile) {
264 if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
265 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
266 } else {
267 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
268 }
269 }
270 } else if (is_write) {
271 // If reference is volatile, prevent following memory ops from
272 // floating down past the volatile write. Also prevents commoning
273 // another volatile read.
274 if (is_volatile || is_release) {
275 assert(kit != nullptr, "unsupported at optimization time");
276 _leading_membar = kit->insert_mem_bar(Op_MemBarRelease);
277 }
278 } else {
279 // Memory barrier to prevent normal and 'unsafe' accesses from
280 // bypassing each other. Happens after null checks, so the
281 // exception paths do not take memory state from the memory barrier,
282 // so there's no problems making a strong assert about mixing users
283 // of safe & unsafe memory.
284 if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) {
285 assert(kit != nullptr, "unsupported at optimization time");
286 _leading_membar = kit->insert_mem_bar(Op_MemBarVolatile);
287 }
288 }
289
290 if (access.needs_cpu_membar()) {
291 assert(kit != nullptr, "unsupported at optimization time");
292 kit->insert_mem_bar(Op_MemBarCPUOrder);
293 }
294
295 if (is_atomic) {
296 // 4984716: MemBars must be inserted before this
297 // memory node in order to avoid a false
298 // dependency which will confuse the scheduler.
299 access.set_memory();
300 }
301 }
302
303 ~C2AccessFence() {
304 GraphKit* kit = nullptr;
305 if (_access.is_parse_access()) {
306 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(_access);
307 kit = parse_access.kit();
308 }
309 DecoratorSet decorators = _access.decorators();
310
311 bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
312 bool is_read = (decorators & C2_READ_ACCESS) != 0;
313 bool is_atomic = is_read && is_write;
314
315 bool is_volatile = (decorators & MO_SEQ_CST) != 0;
316 bool is_acquire = (decorators & MO_ACQUIRE) != 0;
317
318 // If reference is volatile, prevent following volatiles ops from
319 // floating up before the volatile access.
320 if (_access.needs_cpu_membar()) {
321 kit->insert_mem_bar(Op_MemBarCPUOrder);
322 }
323
324 if (is_atomic) {
325 assert(kit != nullptr, "unsupported at optimization time");
326 if (is_acquire || is_volatile) {
327 Node* n = _access.raw_access();
328 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
329 if (_leading_membar != nullptr) {
330 MemBarNode::set_load_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
331 }
332 }
333 } else if (is_write) {
334 // If not multiple copy atomic, we do the MemBarVolatile before the load.
335 if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) {
336 assert(kit != nullptr, "unsupported at optimization time");
337 Node* n = _access.raw_access();
338 Node* mb = kit->insert_mem_bar(Op_MemBarVolatile, n); // Use fat membar
339 if (_leading_membar != nullptr) {
340 MemBarNode::set_store_pair(_leading_membar->as_MemBar(), mb->as_MemBar());
341 }
342 }
343 } else {
344 if (is_volatile || is_acquire) {
345 assert(kit != nullptr, "unsupported at optimization time");
346 Node* n = _access.raw_access();
347 assert(_leading_membar == nullptr || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected");
348 Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
349 mb->as_MemBar()->set_trailing_load();
350 }
351 }
352 }
353 };
354
355 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const {
356 C2AccessFence fence(access);
357 resolve_address(access);
358 return store_at_resolved(access, val);
359 }
360
361 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const {
362 C2AccessFence fence(access);
363 resolve_address(access);
364 return load_at_resolved(access, val_type);
365 }
366
367 MemNode::MemOrd C2Access::mem_node_mo() const {
368 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
369 bool is_read = (_decorators & C2_READ_ACCESS) != 0;
370 if ((_decorators & MO_SEQ_CST) != 0) {
371 if (is_write && is_read) {
372 // For atomic operations
373 return MemNode::seqcst;
374 } else if (is_write) {
375 return MemNode::release;
376 } else {
377 assert(is_read, "what else?");
378 return MemNode::acquire;
379 }
380 } else if ((_decorators & MO_RELEASE) != 0) {
381 return MemNode::release;
382 } else if ((_decorators & MO_ACQUIRE) != 0) {
383 return MemNode::acquire;
384 } else if (is_write) {
385 // Volatile fields need releasing stores.
386 // Non-volatile fields also need releasing stores if they hold an
387 // object reference, because the object reference might point to
388 // a freshly created object.
389 // Conservatively release stores of object references.
390 return StoreNode::release_if_reference(_type);
391 } else {
392 return MemNode::unordered;
393 }
394 }
395
396 void C2Access::fixup_decorators() {
397 bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0;
398 bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo;
399 bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
400
401 bool is_read = (_decorators & C2_READ_ACCESS) != 0;
402 bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
403
404 if (AlwaysAtomicAccesses && is_unordered) {
405 _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits
406 _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess
407 }
408
409 _decorators = AccessInternal::decorator_fixup(_decorators, _type);
410
411 if (is_read && !is_write && anonymous) {
412 // To be valid, unsafe loads may depend on other conditions than
413 // the one that guards them: pin the Load node
414 _decorators |= C2_CONTROL_DEPENDENT_LOAD;
415 _decorators |= C2_UNKNOWN_CONTROL_LOAD;
416 const TypePtr* adr_type = _addr.type();
417 Node* adr = _addr.node();
418 if (!needs_cpu_membar() && adr_type->isa_instptr()) {
419 assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null");
420 intptr_t offset = Type::OffsetBot;
421 AddPNode::Ideal_base_and_offset(adr, &gvn(), offset);
422 if (offset >= 0) {
423 int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->instance_klass()->layout_helper());
424 if (offset < s) {
425 // Guaranteed to be a valid access, no need to pin it
426 _decorators ^= C2_CONTROL_DEPENDENT_LOAD;
427 _decorators ^= C2_UNKNOWN_CONTROL_LOAD;
428 }
429 }
430 }
431 }
432 }
433
434 //--------------------------- atomic operations---------------------------------
435
436 void BarrierSetC2::pin_atomic_op(C2AtomicParseAccess& access) const {
437 // SCMemProjNodes represent the memory state of a LoadStore. Their
438 // main role is to prevent LoadStore nodes from being optimized away
439 // when their results aren't used.
440 assert(access.is_parse_access(), "entry not supported at optimization time");
441 C2ParseAccess& parse_access = static_cast<C2ParseAccess&>(access);
442 GraphKit* kit = parse_access.kit();
443 Node* load_store = access.raw_access();
444 assert(load_store != nullptr, "must pin atomic op");
445 Node* proj = kit->gvn().transform(new SCMemProjNode(load_store));
446 kit->set_memory(proj, access.alias_idx());
447 }
448
449 void C2AtomicParseAccess::set_memory() {
450 Node *mem = _kit->memory(_alias_idx);
451 _memory = mem;
452 }
453
454 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
455 Node* new_val, const Type* value_type) const {
456 GraphKit* kit = access.kit();
457 MemNode::MemOrd mo = access.mem_node_mo();
458 Node* mem = access.memory();
459
460 Node* adr = access.addr().node();
461 const TypePtr* adr_type = access.addr().type();
462
463 Node* load_store = nullptr;
464
465 if (access.is_oop()) {
466 #ifdef _LP64
467 if (adr->bottom_type()->is_ptr_to_narrowoop()) {
468 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
469 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
470 load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo);
471 } else
472 #endif
473 {
474 load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo);
475 }
476 } else {
477 switch (access.type()) {
478 case T_BYTE: {
479 load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
480 break;
481 }
482 case T_SHORT: {
483 load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
484 break;
485 }
486 case T_INT: {
487 load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
488 break;
489 }
490 case T_LONG: {
491 load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
492 break;
493 }
494 default:
495 ShouldNotReachHere();
496 }
497 }
498
499 load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
500 load_store = kit->gvn().transform(load_store);
501
502 access.set_raw_access(load_store);
503 pin_atomic_op(access);
504
505 #ifdef _LP64
506 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
507 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
508 }
509 #endif
510
511 return load_store;
512 }
513
514 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
515 Node* new_val, const Type* value_type) const {
516 GraphKit* kit = access.kit();
517 DecoratorSet decorators = access.decorators();
518 MemNode::MemOrd mo = access.mem_node_mo();
519 Node* mem = access.memory();
520 bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0;
521 Node* load_store = nullptr;
522 Node* adr = access.addr().node();
523
524 if (access.is_oop()) {
525 #ifdef _LP64
526 if (adr->bottom_type()->is_ptr_to_narrowoop()) {
527 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
528 Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
529 if (is_weak_cas) {
530 load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
531 } else {
532 load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
533 }
534 } else
535 #endif
536 {
537 if (is_weak_cas) {
538 load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
539 } else {
540 load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
541 }
542 }
543 } else {
544 switch(access.type()) {
545 case T_BYTE: {
546 if (is_weak_cas) {
547 load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
548 } else {
549 load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
550 }
551 break;
552 }
553 case T_SHORT: {
554 if (is_weak_cas) {
555 load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
556 } else {
557 load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
558 }
559 break;
560 }
561 case T_INT: {
562 if (is_weak_cas) {
563 load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
564 } else {
565 load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
566 }
567 break;
568 }
569 case T_LONG: {
570 if (is_weak_cas) {
571 load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
572 } else {
573 load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
574 }
575 break;
576 }
577 default:
578 ShouldNotReachHere();
579 }
580 }
581
582 load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
583 load_store = kit->gvn().transform(load_store);
584
585 access.set_raw_access(load_store);
586 pin_atomic_op(access);
587
588 return load_store;
589 }
590
591 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
592 GraphKit* kit = access.kit();
593 Node* mem = access.memory();
594 Node* adr = access.addr().node();
595 const TypePtr* adr_type = access.addr().type();
596 Node* load_store = nullptr;
597
598 if (access.is_oop()) {
599 #ifdef _LP64
600 if (adr->bottom_type()->is_ptr_to_narrowoop()) {
601 Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
602 load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
603 } else
604 #endif
605 {
606 load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr());
607 }
608 } else {
609 switch (access.type()) {
610 case T_BYTE:
611 load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type);
612 break;
613 case T_SHORT:
614 load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type);
615 break;
616 case T_INT:
617 load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type);
618 break;
619 case T_LONG:
620 load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type);
621 break;
622 default:
623 ShouldNotReachHere();
624 }
625 }
626
627 load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
628 load_store = kit->gvn().transform(load_store);
629
630 access.set_raw_access(load_store);
631 pin_atomic_op(access);
632
633 #ifdef _LP64
634 if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
635 return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
636 }
637 #endif
638
639 return load_store;
640 }
641
642 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
643 Node* load_store = nullptr;
644 GraphKit* kit = access.kit();
645 Node* adr = access.addr().node();
646 const TypePtr* adr_type = access.addr().type();
647 Node* mem = access.memory();
648
649 switch(access.type()) {
650 case T_BYTE:
651 load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type);
652 break;
653 case T_SHORT:
654 load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type);
655 break;
656 case T_INT:
657 load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type);
658 break;
659 case T_LONG:
660 load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type);
661 break;
662 default:
663 ShouldNotReachHere();
664 }
665
666 load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
667 load_store = kit->gvn().transform(load_store);
668
669 access.set_raw_access(load_store);
670 pin_atomic_op(access);
671
672 return load_store;
673 }
674
675 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicParseAccess& access, Node* expected_val,
676 Node* new_val, const Type* value_type) const {
677 C2AccessFence fence(access);
678 resolve_address(access);
679 return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
680 }
681
682 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicParseAccess& access, Node* expected_val,
683 Node* new_val, const Type* value_type) const {
684 C2AccessFence fence(access);
685 resolve_address(access);
686 return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
687 }
688
689 Node* BarrierSetC2::atomic_xchg_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
690 C2AccessFence fence(access);
691 resolve_address(access);
692 return atomic_xchg_at_resolved(access, new_val, value_type);
693 }
694
695 Node* BarrierSetC2::atomic_add_at(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const {
696 C2AccessFence fence(access);
697 resolve_address(access);
698 return atomic_add_at_resolved(access, new_val, value_type);
699 }
700
701 int BarrierSetC2::arraycopy_payload_base_offset(bool is_array) {
702 // Exclude the header but include array length to copy by 8 bytes words.
703 // Can't use base_offset_in_bytes(bt) since basic type is unknown.
704 int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
705 instanceOopDesc::base_offset_in_bytes();
706 // base_off:
707 // 4 - compact headers
708 // 8 - 32-bit VM
709 // 12 - 64-bit VM, compressed klass
710 // 16 - 64-bit VM, normal klass
711 if (base_off % BytesPerLong != 0) {
712 assert(UseCompressedClassPointers, "");
713 if (is_array) {
714 // Exclude length to copy by 8 bytes words.
715 base_off += sizeof(int);
716 } else {
717 if (!UseCompactObjectHeaders) {
718 // Include klass to copy by 8 bytes words.
719 base_off = instanceOopDesc::klass_offset_in_bytes();
720 }
721 }
722 assert(base_off % BytesPerLong == 0 || UseCompactObjectHeaders, "expect 8 bytes alignment");
723 }
724 return base_off;
725 }
726
727 void BarrierSetC2::clone(GraphKit* kit, Node* src_base, Node* dst_base, Node* size, bool is_array) const {
728 int base_off = arraycopy_payload_base_offset(is_array);
729
730 Node* payload_size = size;
731 Node* offset = kit->MakeConX(base_off);
732 payload_size = kit->gvn().transform(new SubXNode(payload_size, offset));
733 if (is_array) {
734 // Ensure the array payload size is rounded up to the next BytesPerLong
735 // multiple when converting to double-words. This is necessary because array
736 // size does not include object alignment padding, so it might not be a
737 // multiple of BytesPerLong for sub-long element types.
738 payload_size = kit->gvn().transform(new AddXNode(payload_size, kit->MakeConX(BytesPerLong - 1)));
739 }
740 payload_size = kit->gvn().transform(new URShiftXNode(payload_size, kit->intcon(LogBytesPerLong)));
741 ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, offset, dst_base, offset, payload_size, true, false);
742 if (is_array) {
743 ac->set_clone_array();
744 } else {
745 ac->set_clone_inst();
746 }
747 Node* n = kit->gvn().transform(ac);
748 if (n == ac) {
749 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
750 ac->set_adr_type(TypeRawPtr::BOTTOM);
751 kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
752 } else {
753 kit->set_all_memory(n);
754 }
755 }
756
757 Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobig_false, Node* size_in_bytes,
758 Node*& i_o, Node*& needgc_ctrl,
759 Node*& fast_oop_ctrl, Node*& fast_oop_rawmem,
760 intx prefetch_lines) const {
761 assert(UseTLAB, "Only for TLAB enabled allocations");
762
763 Node* thread = macro->transform_later(new ThreadLocalNode());
764 Node* tlab_top_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_top_offset()));
765 Node* tlab_end_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread, in_bytes(JavaThread::tlab_end_offset()));
766
767 // Load TLAB end.
768 //
769 // Note: We set the control input on "tlab_end" and "old_tlab_top" to work around
770 // a bug where these values were being moved across
771 // a safepoint. These are not oops, so they cannot be include in the oop
772 // map, but they can be changed by a GC. The proper way to fix this would
773 // be to set the raw memory state when generating a SafepointNode. However
774 // this will require extensive changes to the loop optimization in order to
775 // prevent a degradation of the optimization.
776 // See comment in memnode.hpp, around line 227 in class LoadPNode.
777 Node* tlab_end = macro->make_load_raw(toobig_false, mem, tlab_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
778
779 // Load the TLAB top.
780 Node* old_tlab_top = new LoadPNode(toobig_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered);
781 macro->transform_later(old_tlab_top);
782
783 // Add to heap top to get a new TLAB top
784 Node* new_tlab_top = new AddPNode(macro->top(), old_tlab_top, size_in_bytes);
785 macro->transform_later(new_tlab_top);
786
787 // Check against TLAB end
788 Node* tlab_full = new CmpPNode(new_tlab_top, tlab_end);
789 macro->transform_later(tlab_full);
790
791 Node* needgc_bol = new BoolNode(tlab_full, BoolTest::ge);
792 macro->transform_later(needgc_bol);
793 IfNode* needgc_iff = new IfNode(toobig_false, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
794 macro->transform_later(needgc_iff);
795
796 // Plug the failing-heap-space-need-gc test into the slow-path region
797 Node* needgc_true = new IfTrueNode(needgc_iff);
798 macro->transform_later(needgc_true);
799 needgc_ctrl = needgc_true;
800
801 // No need for a GC.
802 Node* needgc_false = new IfFalseNode(needgc_iff);
803 macro->transform_later(needgc_false);
804
805 // Fast path:
806 i_o = macro->prefetch_allocation(i_o, needgc_false, mem,
807 old_tlab_top, new_tlab_top, prefetch_lines);
808
809 // Store the modified TLAB top back down.
810 Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr,
811 TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered);
812 macro->transform_later(store_tlab_top);
813
814 fast_oop_ctrl = needgc_false;
815 fast_oop_rawmem = store_tlab_top;
816 return old_tlab_top;
817 }
818
819 const TypeFunc* BarrierSetC2::_clone_type_Type = nullptr;
820
821 void BarrierSetC2::make_clone_type() {
822 assert(BarrierSetC2::_clone_type_Type == nullptr, "should be");
823 // Create input type (domain)
824 int argcnt = NOT_LP64(3) LP64_ONLY(4);
825 const Type** const domain_fields = TypeTuple::fields(argcnt);
826 int argp = TypeFunc::Parms;
827 domain_fields[argp++] = TypeInstPtr::NOTNULL; // src
828 domain_fields[argp++] = TypeInstPtr::NOTNULL; // dst
829 domain_fields[argp++] = TypeX_X; // size lower
830 LP64_ONLY(domain_fields[argp++] = Type::HALF); // size upper
831 assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
832 const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + argcnt, domain_fields);
833
834 // Create result type (range)
835 const Type** const range_fields = TypeTuple::fields(0);
836 const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
837
838 BarrierSetC2::_clone_type_Type = TypeFunc::make(domain, range);
839 }
840
841 inline const TypeFunc* BarrierSetC2::clone_type() {
842 assert(BarrierSetC2::_clone_type_Type != nullptr, "should be initialized");
843 return BarrierSetC2::_clone_type_Type;
844 }
845
846 #define XTOP LP64_ONLY(COMMA phase->top())
847
848 void BarrierSetC2::clone_in_runtime(PhaseMacroExpand* phase, ArrayCopyNode* ac,
849 address clone_addr, const char* clone_name) const {
850 Node* const ctrl = ac->in(TypeFunc::Control);
851 Node* const mem = ac->in(TypeFunc::Memory);
852 Node* const src = ac->in(ArrayCopyNode::Src);
853 Node* const dst = ac->in(ArrayCopyNode::Dest);
854 Node* const size = ac->in(ArrayCopyNode::Length);
855
856 assert(size->bottom_type()->base() == Type_X,
857 "Should be of object size type (int for 32 bits, long for 64 bits)");
858
859 // The native clone we are calling here expects the object size in words.
860 // Add header/offset size to payload size to get object size.
861
862 // We need the full object size - payload (already aligned) plus base offset (which is not always aligned, so round *up*),
863 // because clone_in_runtime copies the whole object from 0 to end.
864 Node* const base_offset = phase->MakeConX((arraycopy_payload_base_offset(ac->is_clone_array()) + (BytesPerLong - 1)) >> LogBytesPerLong);
865 Node* const full_size = phase->transform_later(new AddXNode(size, base_offset));
866
867 // HeapAccess<>::clone expects size in heap words.
868 // For 64-bits platforms, this is a no-operation.
869 // For 32-bits platforms, we need to multiply full_size by HeapWordsPerLong (2).
870 Node* const full_size_in_heap_words = phase->transform_later(new LShiftXNode(full_size, phase->intcon(LogHeapWordsPerLong)));
871
872 Node* const call = phase->make_leaf_call(ctrl,
873 mem,
874 clone_type(),
875 clone_addr,
876 clone_name,
877 TypeRawPtr::BOTTOM,
878 src, dst, full_size_in_heap_words XTOP);
879 phase->transform_later(call);
880 phase->igvn().replace_node(ac, call);
881 }
882
883 void BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
884 Node* ctrl = ac->in(TypeFunc::Control);
885 Node* mem = ac->in(TypeFunc::Memory);
886 Node* src = ac->in(ArrayCopyNode::Src);
887 Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
888 Node* dest = ac->in(ArrayCopyNode::Dest);
889 Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
890 Node* length = ac->in(ArrayCopyNode::Length);
891
892 Node* payload_src = phase->basic_plus_adr(src, src_offset);
893 Node* payload_dst = phase->basic_plus_adr(dest, dest_offset);
894
895 if (should_copy_int_prefix(phase, ac)) {
896 mem = arraycopy_copy_int_prefix(phase, ctrl, mem, payload_src, payload_dst);
897
898 // We've copied the prefix, bump the pointers.
899 payload_src = phase->basic_plus_adr(src, payload_src, BytesPerInt);
900 payload_dst = phase->basic_plus_adr(dest, payload_dst, BytesPerInt);
901 }
902
903 // Bulk copy.
904 const char* copyfunc_name = "arraycopy";
905 address copyfunc_addr = phase->basictype2arraycopy(T_LONG, nullptr, nullptr, true, copyfunc_name, true);
906
907 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
908 const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type();
909
910 Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
911 phase->transform_later(call);
912
913 phase->igvn().replace_node(ac, call);
914 }
915
916 bool BarrierSetC2::should_copy_int_prefix(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
917 // We do our bulk copy in longs. If base offset is not aligned, then we must copy the prefix separately.
918 // With CompactObjectHeaders, the base offset for an instance is 4 bytes.
919 // We cannot simply expand the copy to the previous long-alignment, as that will copy the object header,
920 // which is stateful with COH - it contains hash and lock bits that are specific to the instance.
921
922 // Skip this when src has an array type. With StressReflectiveCode, the
923 // instance path of the clone can be live in the IR even when the type system
924 // knows src is an array. The pre-copy is unnecessary on such paths (they
925 // are unreachable at runtime), and creating a LoadNode at the array length
926 // offset would assert (LoadRangeNode required).
927 Node* src = ac->in(ArrayCopyNode::Src);
928 if (phase->igvn().type(src)->isa_aryptr()) {
929 return false;
930 }
931
932 int base_off = arraycopy_payload_base_offset(ac->is_clone_array());
933 if (is_aligned(base_off, BytesPerLong)) {
934 // We're aligned, no need to copy anything separately.
935 return false;
936 }
937
938 assert(UseCompactObjectHeaders, "non-aligned base offset only possible with compact object headers");
939 assert(is_aligned(base_off, BytesPerInt), "must be 4-bytes aligned");
940 return true;
941 }
942
943 MergeMemNode* BarrierSetC2::arraycopy_copy_int_prefix(PhaseMacroExpand* phase, Node* ctrl, Node* mem, Node* src, Node* dst) const {
944 // Manual load/store of one int.
945 MergeMemNode* mm = phase->transform_later(MergeMemNode::make(mem))->as_MergeMem();
946 const TypePtr* s_adr_type = phase->igvn().type(src)->is_ptr();
947 const TypePtr* d_adr_type = phase->igvn().type(dst)->is_ptr();
948 uint s_alias_idx = phase->C->get_alias_index(s_adr_type);
949 uint d_alias_idx = phase->C->get_alias_index(d_adr_type);
950 // This copies the first 4 bytes after the compact header (hash field or first instance field) as a raw int.
951 // The actual field at this offset may be a narrowOop, so the load/store must be marked as mismatched to
952 // avoid StoreN-vs-StoreI assertion failures during IGVN.
953 Node* load_prefix = phase->transform_later(
954 LoadNode::make(phase->igvn(), ctrl, mm->memory_at(s_alias_idx), src, s_adr_type,
955 TypeInt::INT, T_INT, MemNode::unordered, LoadNode::DependsOnlyOnTest,
956 false /*require_atomic_access*/, false /*unaligned*/, true /*mismatched*/));
957 Node* store_prefix = phase->transform_later(
958 StoreNode::make(phase->igvn(), ctrl, mm->memory_at(d_alias_idx), dst, d_adr_type,
959 load_prefix, T_INT, MemNode::unordered));
960 store_prefix->as_Store()->set_mismatched_access();
961 mm->set_memory_at(d_alias_idx, store_prefix);
962 return mm;
963 }
964
965 #undef XTOP
966
967 static bool block_has_safepoint(const Block* block, uint from, uint to) {
968 for (uint i = from; i < to; i++) {
969 if (block->get_node(i)->is_MachSafePoint()) {
970 // Safepoint found
971 return true;
972 }
973 }
974
975 // Safepoint not found
976 return false;
977 }
978
979 static bool block_has_safepoint(const Block* block) {
980 return block_has_safepoint(block, 0, block->number_of_nodes());
981 }
982
983 static uint block_index(const Block* block, const Node* node) {
984 for (uint j = 0; j < block->number_of_nodes(); ++j) {
985 if (block->get_node(j) == node) {
986 return j;
987 }
988 }
989 ShouldNotReachHere();
990 return 0;
991 }
992
993 // Look through various node aliases
994 static const Node* look_through_node(const Node* node) {
995 while (node != nullptr) {
996 const Node* new_node = node;
997 if (node->is_Mach()) {
998 const MachNode* const node_mach = node->as_Mach();
999 if (node_mach->ideal_Opcode() == Op_CheckCastPP) {
1000 new_node = node->in(1);
1001 }
1002 if (node_mach->is_SpillCopy()) {
1003 new_node = node->in(1);
1004 }
1005 }
1006 if (new_node == node || new_node == nullptr) {
1007 break;
1008 } else {
1009 node = new_node;
1010 }
1011 }
1012
1013 return node;
1014 }
1015
1016 // Whether the given offset is undefined.
1017 static bool is_undefined(intptr_t offset) {
1018 return offset == Type::OffsetTop;
1019 }
1020
1021 // Whether the given offset is unknown.
1022 static bool is_unknown(intptr_t offset) {
1023 return offset == Type::OffsetBot;
1024 }
1025
1026 // Whether the given offset is concrete (defined and compile-time known).
1027 static bool is_concrete(intptr_t offset) {
1028 return !is_undefined(offset) && !is_unknown(offset);
1029 }
1030
1031 // Compute base + offset components of the memory address accessed by mach.
1032 // Return a node representing the base address, or null if the base cannot be
1033 // found or the offset is undefined or a concrete negative value. If a non-null
1034 // base is returned, the offset is a concrete, nonnegative value or unknown.
1035 static const Node* get_base_and_offset(const MachNode* mach, intptr_t& offset) {
1036 const TypePtr* adr_type = nullptr;
1037 offset = 0;
1038 const Node* base = mach->get_base_and_disp(offset, adr_type);
1039
1040 if (base == nullptr || base == NodeSentinel) {
1041 return nullptr;
1042 }
1043
1044 if (offset == 0 && base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_AddP) {
1045 // The memory address is computed by 'base' and fed to 'mach' via an
1046 // indirect memory operand (indicated by offset == 0). The ultimate base and
1047 // offset can be fetched directly from the inputs and Ideal type of 'base'.
1048 const TypeOopPtr* oopptr = base->bottom_type()->isa_oopptr();
1049 if (oopptr == nullptr) return nullptr;
1050 offset = oopptr->offset();
1051 // Even if 'base' is not an Ideal AddP node anymore, Matcher::ReduceInst()
1052 // guarantees that the base address is still available at the same slot.
1053 base = base->in(AddPNode::Base);
1054 assert(base != nullptr, "");
1055 }
1056
1057 if (is_undefined(offset) || (is_concrete(offset) && offset < 0)) {
1058 return nullptr;
1059 }
1060
1061 return look_through_node(base);
1062 }
1063
1064 // Whether a phi node corresponds to an array allocation.
1065 // This test is incomplete: in some edge cases, it might return false even
1066 // though the node does correspond to an array allocation.
1067 static bool is_array_allocation(const Node* phi) {
1068 precond(phi->is_Phi());
1069 // Check whether phi has a successor cast (CheckCastPP) to Java array pointer,
1070 // possibly below spill copies and other cast nodes. Limit the exploration to
1071 // a single path from the phi node consisting of these node types.
1072 const Node* current = phi;
1073 while (true) {
1074 const Node* next = nullptr;
1075 for (DUIterator_Fast imax, i = current->fast_outs(imax); i < imax; i++) {
1076 if (!current->fast_out(i)->isa_Mach()) {
1077 continue;
1078 }
1079 const MachNode* succ = current->fast_out(i)->as_Mach();
1080 if (succ->ideal_Opcode() == Op_CheckCastPP) {
1081 if (succ->get_ptr_type()->isa_aryptr()) {
1082 // Cast to Java array pointer: phi corresponds to an array allocation.
1083 return true;
1084 }
1085 // Other cast: record as candidate for further exploration.
1086 next = succ;
1087 } else if (succ->is_SpillCopy() && next == nullptr) {
1088 // Spill copy, and no better candidate found: record as candidate.
1089 next = succ;
1090 }
1091 }
1092 if (next == nullptr) {
1093 // No evidence found that phi corresponds to an array allocation, and no
1094 // candidates available to continue exploring.
1095 return false;
1096 }
1097 // Continue exploring from the best candidate found.
1098 current = next;
1099 }
1100 ShouldNotReachHere();
1101 }
1102
1103 bool BarrierSetC2::is_allocation(const Node* node) {
1104 assert(node->is_Phi(), "expected phi node");
1105 if (node->req() != 3) {
1106 return false;
1107 }
1108 const Node* const fast_node = node->in(2);
1109 if (!fast_node->is_Mach()) {
1110 return false;
1111 }
1112 const MachNode* const fast_mach = fast_node->as_Mach();
1113 if (fast_mach->ideal_Opcode() != Op_LoadP) {
1114 return false;
1115 }
1116 intptr_t offset;
1117 const Node* const base = get_base_and_offset(fast_mach, offset);
1118 if (base == nullptr || !base->is_Mach() || !is_concrete(offset)) {
1119 return false;
1120 }
1121 const MachNode* const base_mach = base->as_Mach();
1122 if (base_mach->ideal_Opcode() != Op_ThreadLocal) {
1123 return false;
1124 }
1125 return offset == in_bytes(Thread::tlab_top_offset());
1126 }
1127
1128 void BarrierSetC2::elide_dominated_barriers(Node_List& accesses, Node_List& access_dominators) const {
1129 Compile* const C = Compile::current();
1130 PhaseCFG* const cfg = C->cfg();
1131
1132 for (uint i = 0; i < accesses.size(); i++) {
1133 MachNode* const access = accesses.at(i)->as_Mach();
1134 intptr_t access_offset;
1135 const Node* const access_obj = get_base_and_offset(access, access_offset);
1136 Block* const access_block = cfg->get_block_for_node(access);
1137 const uint access_index = block_index(access_block, access);
1138
1139 if (access_obj == nullptr) {
1140 // No information available
1141 continue;
1142 }
1143
1144 for (uint j = 0; j < access_dominators.size(); j++) {
1145 const Node* const mem = access_dominators.at(j);
1146 if (mem->is_Phi()) {
1147 assert(is_allocation(mem), "expected allocation phi node");
1148 if (mem != access_obj) {
1149 continue;
1150 }
1151 if (is_unknown(access_offset) && !is_array_allocation(mem)) {
1152 // The accessed address has an unknown offset, but the allocated
1153 // object cannot be determined to be an array. Avoid eliding in this
1154 // case, to be on the safe side.
1155 continue;
1156 }
1157 assert((is_concrete(access_offset) && access_offset >= 0) || (is_unknown(access_offset) && is_array_allocation(mem)),
1158 "candidate allocation-dominated access offsets must be either concrete and nonnegative, or unknown (for array allocations only)");
1159 } else {
1160 // Access node
1161 const MachNode* const mem_mach = mem->as_Mach();
1162 intptr_t mem_offset;
1163 const Node* const mem_obj = get_base_and_offset(mem_mach, mem_offset);
1164
1165 if (mem_obj == nullptr ||
1166 !is_concrete(access_offset) ||
1167 !is_concrete(mem_offset)) {
1168 // No information available
1169 continue;
1170 }
1171
1172 if (mem_obj != access_obj || mem_offset != access_offset) {
1173 // Not the same addresses, not a candidate
1174 continue;
1175 }
1176 assert(is_concrete(access_offset) && access_offset >= 0,
1177 "candidate non-allocation-dominated access offsets must be concrete and nonnegative");
1178 }
1179
1180 Block* mem_block = cfg->get_block_for_node(mem);
1181 const uint mem_index = block_index(mem_block, mem);
1182
1183 if (access_block == mem_block) {
1184 // Earlier accesses in the same block
1185 if (mem_index < access_index && !block_has_safepoint(mem_block, mem_index + 1, access_index)) {
1186 elide_dominated_barrier(access);
1187 }
1188 } else if (mem_block->dominates(access_block)) {
1189 // Dominating block? Look around for safepoints
1190 ResourceMark rm;
1191 Block_List stack;
1192 VectorSet visited;
1193 stack.push(access_block);
1194 bool safepoint_found = block_has_safepoint(access_block);
1195 while (!safepoint_found && stack.size() > 0) {
1196 const Block* const block = stack.pop();
1197 if (visited.test_set(block->_pre_order)) {
1198 continue;
1199 }
1200 if (block_has_safepoint(block)) {
1201 safepoint_found = true;
1202 break;
1203 }
1204 if (block == mem_block) {
1205 continue;
1206 }
1207
1208 // Push predecessor blocks
1209 for (uint p = 1; p < block->num_preds(); ++p) {
1210 Block* const pred = cfg->get_block_for_node(block->pred(p));
1211 stack.push(pred);
1212 }
1213 }
1214
1215 if (!safepoint_found) {
1216 elide_dominated_barrier(access);
1217 }
1218 }
1219 }
1220 }
1221 }
1222
1223 void BarrierSetC2::compute_liveness_at_stubs() const {
1224 ResourceMark rm;
1225 Compile* const C = Compile::current();
1226 Arena* const A = Thread::current()->resource_area();
1227 PhaseCFG* const cfg = C->cfg();
1228 PhaseRegAlloc* const regalloc = C->regalloc();
1229 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
1230 BarrierSetAssembler* const bs = BarrierSet::barrier_set()->barrier_set_assembler();
1231 BarrierSetC2State* bs_state = barrier_set_state();
1232 Block_List worklist;
1233
1234 for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
1235 new ((void*)(live + i)) RegMask();
1236 worklist.push(cfg->get_block(i));
1237 }
1238
1239 while (worklist.size() > 0) {
1240 const Block* const block = worklist.pop();
1241 RegMask& old_live = live[block->_pre_order];
1242 RegMask new_live;
1243
1244 // Initialize to union of successors
1245 for (uint i = 0; i < block->_num_succs; i++) {
1246 const uint succ_id = block->_succs[i]->_pre_order;
1247 new_live.or_with(live[succ_id]);
1248 }
1249
1250 // Walk block backwards, computing liveness
1251 for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
1252 const Node* const node = block->get_node(i);
1253
1254 // If this node tracks out-liveness, update it
1255 if (!bs_state->needs_livein_data()) {
1256 RegMask* const regs = bs_state->live(node);
1257 if (regs != nullptr) {
1258 regs->or_with(new_live);
1259 }
1260 }
1261
1262 // Remove def bits
1263 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
1264 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
1265 if (first != OptoReg::Bad) {
1266 new_live.remove(first);
1267 }
1268 if (second != OptoReg::Bad) {
1269 new_live.remove(second);
1270 }
1271
1272 // Add use bits
1273 for (uint j = 1; j < node->req(); ++j) {
1274 const Node* const use = node->in(j);
1275 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
1276 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
1277 if (first != OptoReg::Bad) {
1278 new_live.insert(first);
1279 }
1280 if (second != OptoReg::Bad) {
1281 new_live.insert(second);
1282 }
1283 }
1284
1285 // If this node tracks in-liveness, update it
1286 if (bs_state->needs_livein_data()) {
1287 RegMask* const regs = bs_state->live(node);
1288 if (regs != nullptr) {
1289 regs->or_with(new_live);
1290 }
1291 }
1292 }
1293
1294 // Now at block top, see if we have any changes
1295 new_live.subtract(old_live);
1296 if (!new_live.is_empty()) {
1297 // Liveness has refined, update and propagate to prior blocks
1298 old_live.or_with(new_live);
1299 for (uint i = 1; i < block->num_preds(); ++i) {
1300 Block* const pred = cfg->get_block_for_node(block->pred(i));
1301 worklist.push(pred);
1302 }
1303 }
1304 }
1305 }