1 /*
2 * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "classfile/javaClasses.hpp"
26 #include "gc/z/c2/zBarrierSetC2.hpp"
27 #include "gc/z/zBarrierSet.hpp"
28 #include "gc/z/zBarrierSetAssembler.hpp"
29 #include "gc/z/zBarrierSetRuntime.hpp"
30 #include "opto/arraycopynode.hpp"
31 #include "opto/addnode.hpp"
32 #include "opto/block.hpp"
33 #include "opto/compile.hpp"
34 #include "opto/graphKit.hpp"
35 #include "opto/machnode.hpp"
36 #include "opto/macro.hpp"
37 #include "opto/memnode.hpp"
38 #include "opto/node.hpp"
39 #include "opto/output.hpp"
40 #include "opto/regalloc.hpp"
41 #include "opto/rootnode.hpp"
42 #include "opto/runtime.hpp"
43 #include "opto/type.hpp"
44 #include "utilities/growableArray.hpp"
45 #include "utilities/macros.hpp"
46
47 class ZBarrierSetC2State : public ArenaObj {
48 private:
49 GrowableArray<ZLoadBarrierStubC2*>* _stubs;
50 Node_Array _live;
51
52 public:
53 ZBarrierSetC2State(Arena* arena) :
54 _stubs(new (arena) GrowableArray<ZLoadBarrierStubC2*>(arena, 8, 0, NULL)),
55 _live(arena) {}
56
57 GrowableArray<ZLoadBarrierStubC2*>* stubs() {
58 return _stubs;
59 }
60
61 RegMask* live(const Node* node) {
62 if (!node->is_Mach()) {
63 // Don't need liveness for non-MachNodes
64 return NULL;
65 }
66
67 const MachNode* const mach = node->as_Mach();
68 if (mach->barrier_data() == ZLoadBarrierElided) {
69 // Don't need liveness data for nodes without barriers
70 return NULL;
71 }
72
73 RegMask* live = (RegMask*)_live[node->_idx];
74 if (live == NULL) {
75 live = new (Compile::current()->comp_arena()->AmallocWords(sizeof(RegMask))) RegMask();
76 _live.map(node->_idx, (Node*)live);
77 }
78
79 return live;
80 }
81 };
82
83 static ZBarrierSetC2State* barrier_set_state() {
84 return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
85 }
86
87 ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) {
88 ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref, tmp, barrier_data);
89 if (!Compile::current()->output()->in_scratch_emit_size()) {
90 barrier_set_state()->stubs()->append(stub);
91 }
92
93 return stub;
94 }
95
96 ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) :
97 _node(node),
98 _ref_addr(ref_addr),
99 _ref(ref),
100 _tmp(tmp),
101 _barrier_data(barrier_data),
102 _entry(),
103 _continuation() {
104 assert_different_registers(ref, ref_addr.base());
105 assert_different_registers(ref, ref_addr.index());
106 }
107
108 Address ZLoadBarrierStubC2::ref_addr() const {
109 return _ref_addr;
110 }
111
112 Register ZLoadBarrierStubC2::ref() const {
113 return _ref;
114 }
115
116 Register ZLoadBarrierStubC2::tmp() const {
117 return _tmp;
118 }
119
120 address ZLoadBarrierStubC2::slow_path() const {
121 DecoratorSet decorators = DECORATORS_NONE;
122 if (_barrier_data & ZLoadBarrierStrong) {
123 decorators |= ON_STRONG_OOP_REF;
124 }
125 if (_barrier_data & ZLoadBarrierWeak) {
126 decorators |= ON_WEAK_OOP_REF;
127 }
128 if (_barrier_data & ZLoadBarrierPhantom) {
129 decorators |= ON_PHANTOM_OOP_REF;
130 }
131 if (_barrier_data & ZLoadBarrierNoKeepalive) {
132 decorators |= AS_NO_KEEPALIVE;
133 }
134 return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
135 }
136
137 RegMask& ZLoadBarrierStubC2::live() const {
138 RegMask* mask = barrier_set_state()->live(_node);
139 assert(mask != NULL, "must be mach-node with barrier");
140 return *mask;
141 }
142
143 Label* ZLoadBarrierStubC2::entry() {
144 // The _entry will never be bound when in_scratch_emit_size() is true.
145 // However, we still need to return a label that is not bound now, but
146 // will eventually be bound. Any label will do, as it will only act as
147 // a placeholder, so we return the _continuation label.
148 return Compile::current()->output()->in_scratch_emit_size() ? &_continuation : &_entry;
149 }
150
151 Label* ZLoadBarrierStubC2::continuation() {
152 return &_continuation;
153 }
154
155 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
156 return new (comp_arena) ZBarrierSetC2State(comp_arena);
157 }
158
159 void ZBarrierSetC2::late_barrier_analysis() const {
160 analyze_dominating_barriers();
161 compute_liveness_at_stubs();
162 }
163
164 void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const {
165 MacroAssembler masm(&cb);
166 GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
167
168 for (int i = 0; i < stubs->length(); i++) {
169 // Make sure there is enough space in the code buffer
170 if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == NULL) {
171 ciEnv::current()->record_failure("CodeCache is full");
172 return;
173 }
174
175 ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
176 }
177
178 masm.flush();
179 }
180
181 int ZBarrierSetC2::estimate_stub_size() const {
182 Compile* const C = Compile::current();
183 BufferBlob* const blob = C->output()->scratch_buffer_blob();
184 GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs();
185 int size = 0;
186
187 for (int i = 0; i < stubs->length(); i++) {
188 CodeBuffer cb(blob->content_begin(), (address)C->output()->scratch_locs_memory() - blob->content_begin());
189 MacroAssembler masm(&cb);
190 ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i));
191 size += cb.insts_size();
192 }
193
194 return size;
195 }
196
197 static void set_barrier_data(C2Access& access) {
198 if (ZBarrierSet::barrier_needed(access.decorators(), access.type())) {
199 uint8_t barrier_data = 0;
200
201 if (access.decorators() & ON_PHANTOM_OOP_REF) {
202 barrier_data |= ZLoadBarrierPhantom;
203 } else if (access.decorators() & ON_WEAK_OOP_REF) {
204 barrier_data |= ZLoadBarrierWeak;
205 } else {
206 barrier_data |= ZLoadBarrierStrong;
207 }
208
209 if (access.decorators() & AS_NO_KEEPALIVE) {
210 barrier_data |= ZLoadBarrierNoKeepalive;
211 }
212
213 access.set_barrier_data(barrier_data);
214 }
215 }
216
217 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
218 set_barrier_data(access);
219 return BarrierSetC2::load_at_resolved(access, val_type);
220 }
221
222 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
223 Node* new_val, const Type* val_type) const {
224 set_barrier_data(access);
225 return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
226 }
227
228 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
229 Node* new_val, const Type* value_type) const {
230 set_barrier_data(access);
231 return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
232 }
233
234 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
235 set_barrier_data(access);
236 return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
237 }
238
239 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type,
240 bool is_clone, bool is_clone_instance,
241 ArrayCopyPhase phase) const {
242 if (phase == ArrayCopyPhase::Parsing) {
243 return false;
244 }
245 if (phase == ArrayCopyPhase::Optimization) {
246 return is_clone_instance;
247 }
248 // else ArrayCopyPhase::Expansion
249 return type == T_OBJECT || type == T_ARRAY;
250 }
251
252 // This TypeFunc assumes a 64bit system
253 static const TypeFunc* clone_type() {
254 // Create input type (domain)
255 const Type** domain_fields = TypeTuple::fields(4);
256 domain_fields[TypeFunc::Parms + 0] = TypeInstPtr::NOTNULL; // src
257 domain_fields[TypeFunc::Parms + 1] = TypeInstPtr::NOTNULL; // dst
258 domain_fields[TypeFunc::Parms + 2] = TypeLong::LONG; // size lower
259 domain_fields[TypeFunc::Parms + 3] = Type::HALF; // size upper
260 const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + 4, domain_fields);
261
262 // Create result type (range)
263 const Type** range_fields = TypeTuple::fields(0);
264 const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 0, range_fields);
265
266 return TypeFunc::make(domain, range);
267 }
268
269 #define XTOP LP64_ONLY(COMMA phase->top())
270
271 void ZBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
272 Node* const src = ac->in(ArrayCopyNode::Src);
273 const TypeAryPtr* ary_ptr = src->get_ptr_type()->isa_aryptr();
274
275 if (ac->is_clone_array() && ary_ptr != NULL) {
276 BasicType bt = ary_ptr->elem()->array_element_basic_type();
277 if (is_reference_type(bt)) {
278 // Clone object array
279 bt = T_OBJECT;
280 } else {
281 // Clone primitive array
282 bt = T_LONG;
283 }
284
285 Node* ctrl = ac->in(TypeFunc::Control);
286 Node* mem = ac->in(TypeFunc::Memory);
287 Node* src = ac->in(ArrayCopyNode::Src);
288 Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
289 Node* dest = ac->in(ArrayCopyNode::Dest);
290 Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
291 Node* length = ac->in(ArrayCopyNode::Length);
292
293 if (bt == T_OBJECT) {
294 // BarrierSetC2::clone sets the offsets via BarrierSetC2::arraycopy_payload_base_offset
295 // which 8-byte aligns them to allow for word size copies. Make sure the offsets point
296 // to the first element in the array when cloning object arrays. Otherwise, load
297 // barriers are applied to parts of the header. Also adjust the length accordingly.
298 assert(src_offset == dest_offset, "should be equal");
299 jlong offset = src_offset->get_long();
300 if (offset != arrayOopDesc::base_offset_in_bytes(T_OBJECT)) {
301 assert(!UseCompressedClassPointers, "should only happen without compressed class pointers");
302 assert((arrayOopDesc::base_offset_in_bytes(T_OBJECT) - offset) == BytesPerLong, "unexpected offset");
303 length = phase->transform_later(new SubLNode(length, phase->longcon(1))); // Size is in longs
304 src_offset = phase->longcon(arrayOopDesc::base_offset_in_bytes(T_OBJECT));
305 dest_offset = src_offset;
306 }
307 }
308 Node* payload_src = phase->basic_plus_adr(src, src_offset);
309 Node* payload_dst = phase->basic_plus_adr(dest, dest_offset);
310
311 const char* copyfunc_name = "arraycopy";
312 address copyfunc_addr = phase->basictype2arraycopy(bt, NULL, NULL, true, copyfunc_name, true);
313
314 const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
315 const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type();
316
317 Node* call = phase->make_leaf_call(ctrl, mem, call_type, copyfunc_addr, copyfunc_name, raw_adr_type, payload_src, payload_dst, length XTOP);
318 phase->transform_later(call);
319
320 phase->igvn().replace_node(ac, call);
321 return;
322 }
323
324 // Clone instance
325 Node* const ctrl = ac->in(TypeFunc::Control);
326 Node* const mem = ac->in(TypeFunc::Memory);
327 Node* const dst = ac->in(ArrayCopyNode::Dest);
328 Node* const size = ac->in(ArrayCopyNode::Length);
329
330 assert(size->bottom_type()->is_long(), "Should be long");
331
332 // The native clone we are calling here expects the instance size in words
333 // Add header/offset size to payload size to get instance size.
334 Node* const base_offset = phase->longcon(arraycopy_payload_base_offset(ac->is_clone_array()) >> LogBytesPerLong);
335 Node* const full_size = phase->transform_later(new AddLNode(size, base_offset));
336
337 Node* const call = phase->make_leaf_call(ctrl,
338 mem,
339 clone_type(),
340 ZBarrierSetRuntime::clone_addr(),
341 "ZBarrierSetRuntime::clone",
342 TypeRawPtr::BOTTOM,
343 src,
344 dst,
345 full_size,
346 phase->top());
347 phase->transform_later(call);
348 phase->igvn().replace_node(ac, call);
349 }
350
351 #undef XTOP
352
353 // == Dominating barrier elision ==
354
355 static bool block_has_safepoint(const Block* block, uint from, uint to) {
356 for (uint i = from; i < to; i++) {
357 if (block->get_node(i)->is_MachSafePoint()) {
358 // Safepoint found
359 return true;
360 }
361 }
362
363 // Safepoint not found
364 return false;
365 }
366
367 static bool block_has_safepoint(const Block* block) {
368 return block_has_safepoint(block, 0, block->number_of_nodes());
369 }
370
371 static uint block_index(const Block* block, const Node* node) {
372 for (uint j = 0; j < block->number_of_nodes(); ++j) {
373 if (block->get_node(j) == node) {
374 return j;
375 }
376 }
377 ShouldNotReachHere();
378 return 0;
379 }
380
381 void ZBarrierSetC2::analyze_dominating_barriers() const {
382 ResourceMark rm;
383 Compile* const C = Compile::current();
384 PhaseCFG* const cfg = C->cfg();
385 Block_List worklist;
386 Node_List mem_ops;
387 Node_List barrier_loads;
388
389 // Step 1 - Find accesses, and track them in lists
390 for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
391 const Block* const block = cfg->get_block(i);
392 for (uint j = 0; j < block->number_of_nodes(); ++j) {
393 const Node* const node = block->get_node(j);
394 if (!node->is_Mach()) {
395 continue;
396 }
397
398 MachNode* const mach = node->as_Mach();
399 switch (mach->ideal_Opcode()) {
400 case Op_LoadP:
401 if ((mach->barrier_data() & ZLoadBarrierStrong) != 0) {
402 barrier_loads.push(mach);
403 }
404 if ((mach->barrier_data() & (ZLoadBarrierStrong | ZLoadBarrierNoKeepalive)) ==
405 ZLoadBarrierStrong) {
406 mem_ops.push(mach);
407 }
408 break;
409 case Op_CompareAndExchangeP:
410 case Op_CompareAndSwapP:
411 case Op_GetAndSetP:
412 if ((mach->barrier_data() & ZLoadBarrierStrong) != 0) {
413 barrier_loads.push(mach);
414 }
415 case Op_StoreP:
416 mem_ops.push(mach);
417 break;
418
419 default:
420 break;
421 }
422 }
423 }
424
425 // Step 2 - Find dominating accesses for each load
426 for (uint i = 0; i < barrier_loads.size(); i++) {
427 MachNode* const load = barrier_loads.at(i)->as_Mach();
428 const TypePtr* load_adr_type = NULL;
429 intptr_t load_offset = 0;
430 const Node* const load_obj = load->get_base_and_disp(load_offset, load_adr_type);
431 Block* const load_block = cfg->get_block_for_node(load);
432 const uint load_index = block_index(load_block, load);
433
434 for (uint j = 0; j < mem_ops.size(); j++) {
435 MachNode* mem = mem_ops.at(j)->as_Mach();
436 const TypePtr* mem_adr_type = NULL;
437 intptr_t mem_offset = 0;
438 const Node* mem_obj = mem->get_base_and_disp(mem_offset, mem_adr_type);
439 Block* mem_block = cfg->get_block_for_node(mem);
440 uint mem_index = block_index(mem_block, mem);
441
442 if (load_obj == NodeSentinel || mem_obj == NodeSentinel ||
443 load_obj == NULL || mem_obj == NULL ||
444 load_offset < 0 || mem_offset < 0) {
445 continue;
446 }
447
448 if (mem_obj != load_obj || mem_offset != load_offset) {
449 // Not the same addresses, not a candidate
450 continue;
451 }
452
453 if (load_block == mem_block) {
454 // Earlier accesses in the same block
455 if (mem_index < load_index && !block_has_safepoint(mem_block, mem_index + 1, load_index)) {
456 load->set_barrier_data(ZLoadBarrierElided);
457 }
458 } else if (mem_block->dominates(load_block)) {
459 // Dominating block? Look around for safepoints
460 ResourceMark rm;
461 Block_List stack;
462 VectorSet visited;
463 stack.push(load_block);
464 bool safepoint_found = block_has_safepoint(load_block);
465 while (!safepoint_found && stack.size() > 0) {
466 Block* block = stack.pop();
467 if (visited.test_set(block->_pre_order)) {
468 continue;
469 }
470 if (block_has_safepoint(block)) {
471 safepoint_found = true;
472 break;
473 }
474 if (block == mem_block) {
475 continue;
476 }
477
478 // Push predecessor blocks
479 for (uint p = 1; p < block->num_preds(); ++p) {
480 Block* pred = cfg->get_block_for_node(block->pred(p));
481 stack.push(pred);
482 }
483 }
484
485 if (!safepoint_found) {
486 load->set_barrier_data(ZLoadBarrierElided);
487 }
488 }
489 }
490 }
491 }
492
493 // == Reduced spilling optimization ==
494
495 void ZBarrierSetC2::compute_liveness_at_stubs() const {
496 ResourceMark rm;
497 Compile* const C = Compile::current();
498 Arena* const A = Thread::current()->resource_area();
499 PhaseCFG* const cfg = C->cfg();
500 PhaseRegAlloc* const regalloc = C->regalloc();
501 RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask));
502 ZBarrierSetAssembler* const bs = ZBarrierSet::assembler();
503 Block_List worklist;
504
505 for (uint i = 0; i < cfg->number_of_blocks(); ++i) {
506 new ((void*)(live + i)) RegMask();
507 worklist.push(cfg->get_block(i));
508 }
509
510 while (worklist.size() > 0) {
511 const Block* const block = worklist.pop();
512 RegMask& old_live = live[block->_pre_order];
513 RegMask new_live;
514
515 // Initialize to union of successors
516 for (uint i = 0; i < block->_num_succs; i++) {
517 const uint succ_id = block->_succs[i]->_pre_order;
518 new_live.OR(live[succ_id]);
519 }
520
521 // Walk block backwards, computing liveness
522 for (int i = block->number_of_nodes() - 1; i >= 0; --i) {
523 const Node* const node = block->get_node(i);
524
525 // Remove def bits
526 const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node));
527 const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node));
528 if (first != OptoReg::Bad) {
529 new_live.Remove(first);
530 }
531 if (second != OptoReg::Bad) {
532 new_live.Remove(second);
533 }
534
535 // Add use bits
536 for (uint j = 1; j < node->req(); ++j) {
537 const Node* const use = node->in(j);
538 const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use));
539 const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use));
540 if (first != OptoReg::Bad) {
541 new_live.Insert(first);
542 }
543 if (second != OptoReg::Bad) {
544 new_live.Insert(second);
545 }
546 }
547
548 // If this node tracks liveness, update it
549 RegMask* const regs = barrier_set_state()->live(node);
550 if (regs != NULL) {
551 regs->OR(new_live);
552 }
553 }
554
555 // Now at block top, see if we have any changes
556 new_live.SUBTRACT(old_live);
557 if (new_live.is_NotEmpty()) {
558 // Liveness has refined, update and propagate to prior blocks
559 old_live.OR(new_live);
560 for (uint i = 1; i < block->num_preds(); ++i) {
561 Block* const pred = cfg->get_block_for_node(block->pred(i));
562 worklist.push(pred);
563 }
564 }
565 }
566 }
--- EOF ---