1 /*
2 * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "asm/macroAssembler.hpp"
26 #include "classfile/javaClasses.hpp"
27 #include "gc/z/c2/zBarrierSetC2.hpp"
28 #include "gc/z/zBarrierSet.hpp"
29 #include "gc/z/zBarrierSetAssembler.hpp"
30 #include "gc/z/zBarrierSetRuntime.hpp"
31 #include "opto/arraycopynode.hpp"
32 #include "opto/addnode.hpp"
33 #include "opto/block.hpp"
34 #include "opto/compile.hpp"
35 #include "opto/graphKit.hpp"
36 #include "opto/machnode.hpp"
37 #include "opto/macro.hpp"
38 #include "opto/memnode.hpp"
39 #include "opto/node.hpp"
40 #include "opto/output.hpp"
41 #include "opto/regalloc.hpp"
42 #include "opto/rootnode.hpp"
43 #include "opto/runtime.hpp"
44 #include "opto/type.hpp"
45 #include "utilities/debug.hpp"
46 #include "utilities/growableArray.hpp"
47 #include "utilities/macros.hpp"
48
49 template<typename K, typename V, size_t _table_size>
50 class ZArenaHashtable : public ResourceObj {
51 class ZArenaHashtableEntry : public ResourceObj {
52 public:
53 ZArenaHashtableEntry* _next;
54 K _key;
55 V _value;
56 };
57
58 static const size_t _table_mask = _table_size - 1;
59
60 Arena* _arena;
61 ZArenaHashtableEntry* _table[_table_size];
62
63 public:
64 class Iterator {
65 ZArenaHashtable* _table;
66 ZArenaHashtableEntry* _current_entry;
67 size_t _current_index;
68
69 public:
70 Iterator(ZArenaHashtable* table)
71 : _table(table),
72 _current_entry(table->_table[0]),
73 _current_index(0) {
74 if (_current_entry == nullptr) {
75 next();
76 }
77 }
78
79 bool has_next() { return _current_entry != nullptr; }
80 K key() { return _current_entry->_key; }
81 V value() { return _current_entry->_value; }
82
83 void next() {
84 if (_current_entry != nullptr) {
85 _current_entry = _current_entry->_next;
86 }
87 while (_current_entry == nullptr && ++_current_index < _table_size) {
88 _current_entry = _table->_table[_current_index];
89 }
90 }
91 };
92
93 ZArenaHashtable(Arena* arena)
94 : _arena(arena),
95 _table() {
96 Copy::zero_to_bytes(&_table, sizeof(_table));
97 }
98
99 void add(K key, V value) {
100 ZArenaHashtableEntry* entry = new (_arena) ZArenaHashtableEntry();
101 entry->_key = key;
102 entry->_value = value;
103 entry->_next = _table[key & _table_mask];
104 _table[key & _table_mask] = entry;
105 }
106
107 V* get(K key) const {
108 for (ZArenaHashtableEntry* e = _table[key & _table_mask]; e != nullptr; e = e->_next) {
109 if (e->_key == key) {
110 return &(e->_value);
111 }
112 }
113 return nullptr;
114 }
115
116 Iterator iterator() {
117 return Iterator(this);
118 }
119 };
120
121 typedef ZArenaHashtable<intptr_t, bool, 4> ZOffsetTable;
122
123 class ZBarrierSetC2State : public ArenaObj {
124 private:
125 GrowableArray<ZBarrierStubC2*>* _stubs;
126 Node_Array _live;
127 int _trampoline_stubs_count;
128 int _stubs_start_offset;
129
130 public:
131 ZBarrierSetC2State(Arena* arena)
132 : _stubs(new (arena) GrowableArray<ZBarrierStubC2*>(arena, 8, 0, nullptr)),
133 _live(arena),
134 _trampoline_stubs_count(0),
135 _stubs_start_offset(0) {}
136
137 GrowableArray<ZBarrierStubC2*>* stubs() {
138 return _stubs;
139 }
140
141 RegMask* live(const Node* node) {
142 if (!node->is_Mach()) {
143 // Don't need liveness for non-MachNodes
144 return nullptr;
145 }
146
147 const MachNode* const mach = node->as_Mach();
148 if (mach->barrier_data() == ZBarrierElided) {
149 // Don't need liveness data for nodes without barriers
150 return nullptr;
151 }
152
153 RegMask* live = (RegMask*)_live[node->_idx];
154 if (live == nullptr) {
155 live = new (Compile::current()->comp_arena()->AmallocWords(sizeof(RegMask))) RegMask();
156 _live.map(node->_idx, (Node*)live);
157 }
158
159 return live;
160 }
161
162 void inc_trampoline_stubs_count() {
163 assert(_trampoline_stubs_count != INT_MAX, "Overflow");
164 ++_trampoline_stubs_count;
165 }
166
167 int trampoline_stubs_count() {
168 return _trampoline_stubs_count;
169 }
170
171 void set_stubs_start_offset(int offset) {
172 _stubs_start_offset = offset;
173 }
174
175 int stubs_start_offset() {
176 return _stubs_start_offset;
177 }
178 };
179
180 static ZBarrierSetC2State* barrier_set_state() {
181 return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
182 }
183
184 void ZBarrierStubC2::register_stub(ZBarrierStubC2* stub) {
185 if (!Compile::current()->output()->in_scratch_emit_size()) {
186 barrier_set_state()->stubs()->append(stub);
187 }
188 }
189
190 void ZBarrierStubC2::inc_trampoline_stubs_count() {
191 if (!Compile::current()->output()->in_scratch_emit_size()) {
192 barrier_set_state()->inc_trampoline_stubs_count();
193 }
194 }
195
196 int ZBarrierStubC2::trampoline_stubs_count() {
197 return barrier_set_state()->trampoline_stubs_count();
198 }
199
200 int ZBarrierStubC2::stubs_start_offset() {
201 return barrier_set_state()->stubs_start_offset();
202 }
203
204 ZBarrierStubC2::ZBarrierStubC2(const MachNode* node)
205 : _node(node),
206 _entry(),
207 _continuation() {}
208
209 Register ZBarrierStubC2::result() const {
210 return noreg;
211 }
212
213 RegMask& ZBarrierStubC2::live() const {
214 return *barrier_set_state()->live(_node);
215 }
216
217 Label* ZBarrierStubC2::entry() {
218 // The _entry will never be bound when in_scratch_emit_size() is true.
219 // However, we still need to return a label that is not bound now, but
220 // will eventually be bound. Any eventually bound label will do, as it
221 // will only act as a placeholder, so we return the _continuation label.
222 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
223 }
224
225 Label* ZBarrierStubC2::continuation() {
226 return &_continuation;
227 }
228
229 ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref) {
230 AARCH64_ONLY(fatal("Should use ZLoadBarrierStubC2Aarch64::create"));
231 ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref);
232 register_stub(stub);
233
234 return stub;
235 }
236
237 ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref)
238 : ZBarrierStubC2(node),
239 _ref_addr(ref_addr),
240 _ref(ref) {
241 assert_different_registers(ref, ref_addr.base());
242 assert_different_registers(ref, ref_addr.index());
243 }
244
245 Address ZLoadBarrierStubC2::ref_addr() const {
246 return _ref_addr;
247 }
248
249 Register ZLoadBarrierStubC2::ref() const {
250 return _ref;
251 }
252
253 Register ZLoadBarrierStubC2::result() const {
254 return ref();
255 }
256
257 address ZLoadBarrierStubC2::slow_path() const {
258 const uint8_t barrier_data = _node->barrier_data();
259 DecoratorSet decorators = DECORATORS_NONE;
260 if (barrier_data & ZBarrierStrong) {
261 decorators |= ON_STRONG_OOP_REF;
262 }
263 if (barrier_data & ZBarrierWeak) {
264 decorators |= ON_WEAK_OOP_REF;
265 }
266 if (barrier_data & ZBarrierPhantom) {
267 decorators |= ON_PHANTOM_OOP_REF;
268 }
269 if (barrier_data & ZBarrierNoKeepalive) {
270 decorators |= AS_NO_KEEPALIVE;
271 }
272 return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
273 }
274
275 void ZLoadBarrierStubC2::emit_code(MacroAssembler& masm) {
276 ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, static_cast<ZLoadBarrierStubC2*>(this));
277 }
278
279 ZStoreBarrierStubC2* ZStoreBarrierStubC2::create(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic) {
280 AARCH64_ONLY(fatal("Should use ZStoreBarrierStubC2Aarch64::create"));
281 ZStoreBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZStoreBarrierStubC2(node, ref_addr, new_zaddress, new_zpointer, is_native, is_atomic);
282 register_stub(stub);
283
284 return stub;
285 }
286
287 ZStoreBarrierStubC2::ZStoreBarrierStubC2(const MachNode* node, Address ref_addr, Register new_zaddress, Register new_zpointer, bool is_native, bool is_atomic)
288 : ZBarrierStubC2(node),
289 _ref_addr(ref_addr),
290 _new_zaddress(new_zaddress),
291 _new_zpointer(new_zpointer),
292 _is_native(is_native),
293 _is_atomic(is_atomic) {}
294
295 Address ZStoreBarrierStubC2::ref_addr() const {
296 return _ref_addr;
297 }
298
299 Register ZStoreBarrierStubC2::new_zaddress() const {
300 return _new_zaddress;
301 }
302
303 Register ZStoreBarrierStubC2::new_zpointer() const {
304 return _new_zpointer;
305 }
306
307 bool ZStoreBarrierStubC2::is_native() const {
308 return _is_native;
309 }
310
311 bool ZStoreBarrierStubC2::is_atomic() const {
312 return _is_atomic;
313 }
314
315 Register ZStoreBarrierStubC2::result() const {
316 return noreg;
317 }
318
319 void ZStoreBarrierStubC2::emit_code(MacroAssembler& masm) {
320 ZBarrierSet::assembler()->generate_c2_store_barrier_stub(&masm, static_cast<ZStoreBarrierStubC2*>(this));
321 }
322
323 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
324 return new (comp_arena) ZBarrierSetC2State(comp_arena);
325 }
326
327 void ZBarrierSetC2::late_barrier_analysis() const {
328 compute_liveness_at_stubs();
329 analyze_dominating_barriers();
330 }
331
332 void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
333 MacroAssembler masm(&cb);
334 GrowableArray<ZBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
335 barrier_set_state()->set_stubs_start_offset(masm.offset());
336
337 for (int i = 0; i < stubs->length(); i++) {
338 // Make sure there is enough space in the code buffer
339 if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) {
340 ciEnv::current()->record_failure("CodeCache is full");
341 return;
342 }
343
344 stubs->at(i)->emit_code(masm);
345 }
346
347 masm.flush();
348 }
349
350 int ZBarrierSetC2::estimate_stub_size() const {
351 Compile* const C = Compile::current();
352 BufferBlob* const blob = C->output()->scratch_buffer_blob();
353 GrowableArray<ZBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
354 int size = 0;
355
356 for (int i = 0; i < stubs->length(); i++) {
357 CodeBuffer cb(blob->content_begin(), (address)C->output()->scratch_locs_memory() - blob->content_begin());
358 MacroAssembler masm(&cb);
359 stubs->at(i)->emit_code(masm);
360 size += cb.insts_size();
361 }
362
363 return size;
364 }
365
366 static void set_barrier_data(C2Access& access) {
367 if (!ZBarrierSet::barrier_needed(access.decorators(), access.type())) {
368 return;
369 }
370
371 if (access.decorators() & C2_TIGHTLY_COUPLED_ALLOC) {
372 access.set_barrier_data(ZBarrierElided);
373 return;
374 }
375
376 uint8_t barrier_data = 0;
377
378 if (access.decorators() & ON_PHANTOM_OOP_REF) {
379 barrier_data |= ZBarrierPhantom;
380 } else if (access.decorators() & ON_WEAK_OOP_REF) {
381 barrier_data |= ZBarrierWeak;
382 } else {
383 barrier_data |= ZBarrierStrong;
384 }
385
386 if (access.decorators() & IN_NATIVE) {
387 barrier_data |= ZBarrierNative;
388 }
389
390 if (access.decorators() & AS_NO_KEEPALIVE) {
391 barrier_data |= ZBarrierNoKeepalive;
392 }
393
394 access.set_barrier_data(barrier_data);
395 }
396
397 Node* ZBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
398 set_barrier_data(access);
399 return BarrierSetC2::store_at_resolved(access, val);
400 }
401
402 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
403 set_barrier_data(access);
404 return BarrierSetC2::load_at_resolved(access, val_type);
405 }
406
407 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
408 Node* new_val, const Type* val_type) const {
409 set_barrier_data(access);
410 return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
411 }
412
413 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
414 Node* new_val, const Type* value_type) const {
415 set_barrier_data(access);
416 return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
417 }
418
419 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
420 set_barrier_data(access);
421 return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
422 }
423
424 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type,
425 bool is_clone, bool is_clone_instance,
426 ArrayCopyPhase phase) const {
427 if (phase == ArrayCopyPhase::Parsing) {
428 return false;
429 }
430 if (phase == ArrayCopyPhase::Optimization) {
431 return is_clone_instance;
432 }
433 // else ArrayCopyPhase::Expansion
434 return type == T_OBJECT || type == T_ARRAY;
435 }
436
437 // This TypeFunc assumes a 64bit system
438 static const TypeFunc* clone_type() {
439 // Create input type (domain)
440 const Type** const domain_fields = TypeTuple::fields(4);
441 domain_fields[TypeFunc::Parms + 0] = TypeInstPtr::NOTNULL; // src
442 domain_fields[TypeFunc::Parms + 1] = TypeInstPtr::NOTNULL; // dst
443 domain_fields[TypeFunc::Parms + 2] = TypeLong::LONG; // size lower
444 domain_fields[TypeFunc::Parms + 3] = Type::HALF; // size upper
445 const TypeTuple* const domain = TypeTuple::make(TypeFunc::Parms + 4, domain_fields);
446
447 // Create result type (range)
448 const Type** const range_fields = TypeTuple::fields(0);
449 const TypeTuple* const range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
450
451 return TypeFunc::make(domain, range);
452 }
453
454 #define XTOP LP64_ONLY(COMMA phase->top())
455
456 void ZBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
457 Node* const src = ac->in(ArrayCopyNode::Src);
458 const TypeAryPtr* const ary_ptr = src->get_ptr_type()->isa_aryptr();
459
460 if (ac->is_clone_array() && ary_ptr != nullptr) {
461 BasicType bt = ary_ptr->elem()->array_element_basic_type();
462 if (is_reference_type(bt)) {
463 // Clone object array
464 bt = T_OBJECT;
465 } else {
466 // Clone primitive array
467 bt = T_LONG;
468 }
469
470 Node* const ctrl = ac->in(TypeFunc::Control);
471 Node* const mem = ac->in(TypeFunc::Memory);
472 Node* const src = ac->in(ArrayCopyNode::Src);
473 Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
474 Node* const dest = ac->in(ArrayCopyNode::Dest);
475 Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
476 Node* length = ac->in(ArrayCopyNode::Length);
477
478 if (bt == T_OBJECT) {
479 // BarrierSetC2::clone sets the offsets via BarrierSetC2::arraycopy_payload_base_offset
480 // which 8-byte aligns them to allow for word size copies. Make sure the offsets point
481 // to the first element in the array when cloning object arrays. Otherwise, load
482 // barriers are applied to parts of the header. Also adjust the length accordingly.
483 assert(src_offset == dest_offset, "should be equal");
484 const jlong offset = src_offset->get_long();
485 if (offset != arrayOopDesc::base_offset_in_bytes(T_OBJECT)) {
486 assert(!UseCompressedClassPointers || UseCompactObjectHeaders, "should only happen without compressed class pointers");
487 assert((arrayOopDesc::base_offset_in_bytes(T_OBJECT) - offset) == BytesPerLong, "unexpected offset");
488 length = phase->transform_later(new SubLNode(length, phase->longcon(1))); // Size is in longs
489 src_offset = phase->longcon(arrayOopDesc::base_offset_in_bytes(T_OBJECT));
490 dest_offset = src_offset;
491 }
492 }
493 Node* const payload_src = phase->basic_plus_adr(src, src_offset);
494 Node* const payload_dst = phase->basic_plus_adr(dest, dest_offset);
495
496 const char* copyfunc_name = "arraycopy";
497 const address copyfunc_addr = phase->basictype2arraycopy(bt, nullptr, nullptr, true, copyfunc_name, true);
498
499 const TypePtr* const raw_adr_type = TypeRawPtr::BOTTOM;
500 const TypeFunc* const call_type = OptoRuntime::fast_arraycopy_Type();
501
502 Node* const call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
503 phase->transform_later(call);
504
505 phase->igvn().replace_node(ac, call);
506 return;
507 }
508
509 // Clone instance
510 Node* const ctrl = ac->in(TypeFunc::Control);
511 Node* const mem = ac->in(TypeFunc::Memory);
512 Node* const dst = ac->in(ArrayCopyNode::Dest);
513 Node* const size = ac->in(ArrayCopyNode::Length);
514
515 assert(size->bottom_type()->is_long(), "Should be long");
516
517 // The native clone we are calling here expects the instance size in words
518 // Add header/offset size to payload size to get instance size.
519 Node* const base_offset = phase->longcon(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong);
520 Node* const full_size = phase->transform_later(new AddLNode(size, base_offset));
521
522 Node* const call = phase->make_leaf_call(ctrl,
523 mem,
524 clone_type(),
525 ZBarrierSetRuntime::clone_addr(),
526 "ZBarrierSetRuntime::clone",
527 TypeRawPtr::BOTTOM,
528 src,
529 dst,
530 full_size,
531 phase->top());
532 phase->transform_later(call);
533 phase->igvn().replace_node(ac, call);
534 }
535
536 #undef XTOP
537
538 // == Dominating barrier elision ==
539
540 static bool block_has_safepoint(const Block* block, uint from, uint to) {
541 for (uint i = from; i < to; i++) {
542 if (block->get_node(i)->is_MachSafePoint()) {
543 // Safepoint found
544 return true;
545 }
546 }
547
548 // Safepoint not found
549 return false;
550 }
551
552 static bool block_has_safepoint(const Block* block) {
553 return block_has_safepoint(block, 0, block->number_of_nodes());
554 }
555
556 static uint block_index(const Block* block, const Node* node) {
557 for (uint j = 0; j < block->number_of_nodes(); ++j) {
558 if (block->get_node(j) == node) {
559 return j;
560 }
561 }
562 ShouldNotReachHere();
563 return 0;
564 }
565
566 // Look through various node aliases
567 static const Node* look_through_node(const Node* node) {
568 while (node != nullptr) {
569 const Node* new_node = node;
570 if (node->is_Mach()) {
571 const MachNode* const node_mach = node->as_Mach();
572 if (node_mach->ideal_Opcode() == Op_CheckCastPP) {
573 new_node = node->in(1);
574 }
575 if (node_mach->is_SpillCopy()) {
576 new_node = node->in(1);
577 }
578 }
579 if (new_node == node || new_node == nullptr) {
580 break;
581 } else {
582 node = new_node;
583 }
584 }
585
586 return node;
587 }
588
589 // Whether the given offset is undefined.
590 static bool is_undefined(intptr_t offset) {
591 return offset == Type::OffsetTop;
592 }
593
594 // Whether the given offset is unknown.
595 static bool is_unknown(intptr_t offset) {
596 return offset == Type::OffsetBot;
597 }
598
599 // Whether the given offset is concrete (defined and compile-time known).
600 static bool is_concrete(intptr_t offset) {
601 return !is_undefined(offset) && !is_unknown(offset);
602 }
603
604 // Compute base + offset components of the memory address accessed by mach.
605 // Return a node representing the base address, or null if the base cannot be
606 // found or the offset is undefined or a concrete negative value. If a non-null
607 // base is returned, the offset is a concrete, nonnegative value or unknown.
608 static const Node* get_base_and_offset(const MachNode* mach, intptr_t& offset) {
609 const TypePtr* adr_type = nullptr;
610 offset = 0;
611 const Node* base = mach->get_base_and_disp(offset, adr_type);
612
613 if (base == nullptr || base == NodeSentinel) {
614 return nullptr;
615 }
616
617 if (offset == 0 && base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_AddP) {
618 // The memory address is computed by 'base' and fed to 'mach' via an
619 // indirect memory operand (indicated by offset == 0). The ultimate base and
620 // offset can be fetched directly from the inputs and Ideal type of 'base'.
621 offset = base->bottom_type()->isa_oopptr()->offset();
622 // Even if 'base' is not an Ideal AddP node anymore, Matcher::ReduceInst()
623 // guarantees that the base address is still available at the same slot.
624 base = base->in(AddPNode::Base);
625 assert(base != nullptr, "");
626 }
627
628 if (is_undefined(offset) || (is_concrete(offset) && offset < 0)) {
629 return nullptr;
630 }
631
632 return look_through_node(base);
633 }
634
635 // Whether a phi node corresponds to an array allocation.
636 // This test is incomplete: in some edge cases, it might return false even
637 // though the node does correspond to an array allocation.
638 static bool is_array_allocation(const Node* phi) {
639 precond(phi->is_Phi());
640 // Check whether phi has a successor cast (CheckCastPP) to Java array pointer,
641 // possibly below spill copies and other cast nodes. Limit the exploration to
642 // a single path from the phi node consisting of these node types.
643 const Node* current = phi;
644 while (true) {
645 const Node* next = nullptr;
646 for (DUIterator_Fast imax, i = current->fast_outs(imax); i < imax; i++) {
647 if (!current->fast_out(i)->isa_Mach()) {
648 continue;
649 }
650 const MachNode* succ = current->fast_out(i)->as_Mach();
651 if (succ->ideal_Opcode() == Op_CheckCastPP) {
652 if (succ->get_ptr_type()->isa_aryptr()) {
653 // Cast to Java array pointer: phi corresponds to an array allocation.
654 return true;
655 }
656 // Other cast: record as candidate for further exploration.
657 next = succ;
658 } else if (succ->is_SpillCopy() && next == nullptr) {
659 // Spill copy, and no better candidate found: record as candidate.
660 next = succ;
661 }
662 }
663 if (next == nullptr) {
664 // No evidence found that phi corresponds to an array allocation, and no
665 // candidates available to continue exploring.
666 return false;
667 }
668 // Continue exploring from the best candidate found.
669 current = next;
670 }
671 ShouldNotReachHere();
672 }
673
674 // Match the phi node that connects a TLAB allocation fast path with its slowpath
675 static bool is_allocation(const Node* node) {
676 if (node->req() != 3) {
677 return false;
678 }
679 const Node* const fast_node = node->in(2);
680 if (!fast_node->is_Mach()) {
681 return false;
682 }
683 const MachNode* const fast_mach = fast_node->as_Mach();
684 if (fast_mach->ideal_Opcode() != Op_LoadP) {
685 return false;
686 }
687 const TypePtr* const adr_type = nullptr;
688 intptr_t offset;
689 const Node* const base = get_base_and_offset(fast_mach, offset);
690 if (base == nullptr || !base->is_Mach() || !is_concrete(offset)) {
691 return false;
692 }
693 const MachNode* const base_mach = base->as_Mach();
694 if (base_mach->ideal_Opcode() != Op_ThreadLocal) {
695 return false;
696 }
697 return offset == in_bytes(Thread::tlab_top_offset());
698 }
699
700 static void elide_mach_barrier(MachNode* mach) {
701 mach->set_barrier_data(ZBarrierElided);
702 }
703
704 void ZBarrierSetC2::analyze_dominating_barriers_impl(Node_List& accesses, Node_List& access_dominators) const {
705 Compile* const C = Compile::current();
706 PhaseCFG* const cfg = C->cfg();
707
708 for (uint i = 0; i < accesses.size(); i++) {
709 MachNode* const access = accesses.at(i)->as_Mach();
710 intptr_t access_offset;
711 const Node* const access_obj = get_base_and_offset(access, access_offset);
712 Block* const access_block = cfg->get_block_for_node(access);
713 const uint access_index = block_index(access_block, access);
714
715 if (access_obj == nullptr) {
716 // No information available
717 continue;
718 }
719
720 for (uint j = 0; j < access_dominators.size(); j++) {
721 const Node* const mem = access_dominators.at(j);
722 if (mem->is_Phi()) {
723 // Allocation node
724 if (mem != access_obj) {
725 continue;
726 }
727 if (is_unknown(access_offset) && !is_array_allocation(mem)) {
728 // The accessed address has an unknown offset, but the allocated
729 // object cannot be determined to be an array. Avoid eliding in this
730 // case, to be on the safe side.
731 continue;
732 }
733 assert((is_concrete(access_offset) && access_offset >= 0) || (is_unknown(access_offset) && is_array_allocation(mem)),
734 "candidate allocation-dominated access offsets must be either concrete and nonnegative, or unknown (for array allocations only)");
735 } else {
736 // Access node
737 const MachNode* const mem_mach = mem->as_Mach();
738 intptr_t mem_offset;
739 const Node* const mem_obj = get_base_and_offset(mem_mach, mem_offset);
740
741 if (mem_obj == nullptr ||
742 !is_concrete(access_offset) ||
743 !is_concrete(mem_offset)) {
744 // No information available
745 continue;
746 }
747
748 if (mem_obj != access_obj || mem_offset != access_offset) {
749 // Not the same addresses, not a candidate
750 continue;
751 }
752 assert(is_concrete(access_offset) && access_offset >= 0,
753 "candidate non-allocation-dominated access offsets must be concrete and nonnegative");
754 }
755
756 Block* mem_block = cfg->get_block_for_node(mem);
757 const uint mem_index = block_index(mem_block, mem);
758
759 if (access_block == mem_block) {
760 // Earlier accesses in the same block
761 if (mem_index < access_index && !block_has_safepoint(mem_block, mem_index + 1, access_index)) {
762 elide_mach_barrier(access);
763 }
764 } else if (mem_block->dominates(access_block)) {
765 // Dominating block? Look around for safepoints
766 ResourceMark rm;
767 Block_List stack;
768 VectorSet visited;
769 stack.push(access_block);
770 bool safepoint_found = block_has_safepoint(access_block);
771 while (!safepoint_found && stack.size() > 0) {
772 const Block* const block = stack.pop();
773 if (visited.test_set(block->_pre_order)) {
774 continue;
775 }
776 if (block_has_safepoint(block)) {
777 safepoint_found = true;
778 break;
779 }
780 if (block == mem_block) {
781 continue;
782 }
783
784 // Push predecessor blocks
785 for (uint p = 1; p < block->num_preds(); ++p) {
786 Block* const pred = cfg->get_block_for_node(block->pred(p));
787 stack.push(pred);
788 }
789 }
790
791 if (!safepoint_found) {
792 elide_mach_barrier(access);
793 }
794 }
795 }
796 }
797 }
798
799 void ZBarrierSetC2::analyze_dominating_barriers() const {
800 ResourceMark rm;
801 Compile* const C = Compile::current();
802 PhaseCFG* const cfg = C->cfg();
803
804 Node_List loads;
805 Node_List load_dominators;
806
807 Node_List stores;
808 Node_List store_dominators;
809
810 Node_List atomics;
811 Node_List atomic_dominators;
812
813 // Step 1 - Find accesses and allocations, and track them in lists
814 for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
815 const Block* const block = cfg->get_block(i);
816 for (uint j = 0; j < block->number_of_nodes(); ++j) {
817 Node* const node = block->get_node(j);
818 if (node->is_Phi()) {
819 if (is_allocation(node)) {
820 load_dominators.push(node);
821 store_dominators.push(node);
822 // An allocation can't be considered to "dominate" an atomic operation.
823 // For example a CAS requires the memory location to be store-good.
824 // When you have a dominating store or atomic instruction, that is
825 // indeed ensured to be the case. However, as for allocations, the
826 // initialized memory location could be raw null, which isn't store-good.
827 }
828 continue;
829 } else if (!node->is_Mach()) {
830 continue;
831 }
832
833 MachNode* const mach = node->as_Mach();
834 switch (mach->ideal_Opcode()) {
835 case Op_LoadP:
836 if ((mach->barrier_data() & ZBarrierStrong) != 0 &&
837 (mach->barrier_data() & ZBarrierNoKeepalive) == 0) {
838 loads.push(mach);
839 load_dominators.push(mach);
840 }
841 break;
842 case Op_StoreP:
843 if (mach->barrier_data() != 0) {
844 stores.push(mach);
845 load_dominators.push(mach);
846 store_dominators.push(mach);
847 atomic_dominators.push(mach);
848 }
849 break;
850 case Op_CompareAndExchangeP:
851 case Op_CompareAndSwapP:
852 case Op_GetAndSetP:
853 if (mach->barrier_data() != 0) {
854 atomics.push(mach);
855 load_dominators.push(mach);
856 store_dominators.push(mach);
857 atomic_dominators.push(mach);
858 }
859 break;
860
861 default:
862 break;
863 }
864 }
865 }
866
867 // Step 2 - Find dominating accesses or allocations for each access
868 analyze_dominating_barriers_impl(loads, load_dominators);
869 analyze_dominating_barriers_impl(stores, store_dominators);
870 analyze_dominating_barriers_impl(atomics, atomic_dominators);
871 }
872
873 // == Reduced spilling optimization ==
874
875 void ZBarrierSetC2::compute_liveness_at_stubs() const {
876 ResourceMark rm;
877 Compile* const C = Compile::current();
878 Arena* const A = Thread::current()->resource_area();
879 PhaseCFG* const cfg = C->cfg();
880 PhaseRegAlloc* const regalloc = C->regalloc();
881 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
882 ZBarrierSetAssembler* const bs = ZBarrierSet::assembler();
883 Block_List worklist;
884
885 for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
886 new ((void*)(live + i)) RegMask();
887 worklist.push(cfg->get_block(i));
888 }
889
890 while (worklist.size() > 0) {
891 const Block* const block = worklist.pop();
892 RegMask& old_live = live[block->_pre_order];
893 RegMask new_live;
894
895 // Initialize to union of successors
896 for (uint i = 0; i < block->_num_succs; i++) {
897 const uint succ_id = block->_succs[i]->_pre_order;
898 new_live.OR(live[succ_id]);
899 }
900
901 // Walk block backwards, computing liveness
902 for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
903 const Node* const node = block->get_node(i);
904
905 // Remove def bits
906 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
907 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
908 if (first != OptoReg::Bad) {
909 new_live.Remove(first);
910 }
911 if (second != OptoReg::Bad) {
912 new_live.Remove(second);
913 }
914
915 // Add use bits
916 for (uint j = 1; j < node->req(); ++j) {
917 const Node* const use = node->in(j);
918 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
919 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
920 if (first != OptoReg::Bad) {
921 new_live.Insert(first);
922 }
923 if (second != OptoReg::Bad) {
924 new_live.Insert(second);
925 }
926 }
927
928 // If this node tracks liveness, update it
929 RegMask* const regs = barrier_set_state()->live(node);
930 if (regs != nullptr) {
931 regs->OR(new_live);
932 }
933 }
934
935 // Now at block top, see if we have any changes
936 new_live.SUBTRACT(old_live);
937 if (new_live.is_NotEmpty()) {
938 // Liveness has refined, update and propagate to prior blocks
939 old_live.OR(new_live);
940 for (uint i = 1; i < block->num_preds(); ++i) {
941 Block* const pred = cfg->get_block_for_node(block->pred(i));
942 worklist.push(pred);
943 }
944 }
945 }
946 }
947
948 void ZBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
949 eliminate_gc_barrier_data(node);
950 }
951
952 void ZBarrierSetC2::eliminate_gc_barrier_data(Node* node) const {
953 if (node->is_LoadStore()) {
954 LoadStoreNode* loadstore = node->as_LoadStore();
955 loadstore->set_barrier_data(ZBarrierElided);
956 } else if (node->is_Mem()) {
957 MemNode* mem = node->as_Mem();
958 mem->set_barrier_data(ZBarrierElided);
959 }
960 }
961
962 #ifndef PRODUCT
963 void ZBarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const {
964 if ((mach->barrier_data() & ZBarrierStrong) != 0) {
965 st->print("strong ");
966 }
967 if ((mach->barrier_data() & ZBarrierWeak) != 0) {
968 st->print("weak ");
969 }
970 if ((mach->barrier_data() & ZBarrierPhantom) != 0) {
971 st->print("phantom ");
972 }
973 if ((mach->barrier_data() & ZBarrierNoKeepalive) != 0) {
974 st->print("nokeepalive ");
975 }
976 if ((mach->barrier_data() & ZBarrierNative) != 0) {
977 st->print("native ");
978 }
979 if ((mach->barrier_data() & ZBarrierElided) != 0) {
980 st->print("elided ");
981 }
982 }
983 #endif // !PRODUCT