1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "gc/shared/barrierSet.hpp"
26 #include "gc/shared/c2/barrierSetC2.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "oops/compressedOops.hpp"
30 #include "opto/ad.hpp"
31 #include "opto/addnode.hpp"
32 #include "opto/callnode.hpp"
33 #include "opto/idealGraphPrinter.hpp"
34 #include "opto/matcher.hpp"
35 #include "opto/memnode.hpp"
36 #include "opto/movenode.hpp"
37 #include "opto/opcodes.hpp"
38 #include "opto/regmask.hpp"
39 #include "opto/rootnode.hpp"
40 #include "opto/runtime.hpp"
41 #include "opto/type.hpp"
42 #include "opto/vectornode.hpp"
43 #include "runtime/os.inline.hpp"
44 #include "runtime/sharedRuntime.hpp"
45 #include "utilities/align.hpp"
46
47 OptoReg::Name OptoReg::c_frame_pointer;
48
49 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
50 RegMask Matcher::mreg2regmask[_last_Mach_Reg];
51 RegMask Matcher::caller_save_regmask;
52 RegMask Matcher::caller_save_regmask_exclude_soe;
53 RegMask Matcher::STACK_ONLY_mask;
54 RegMask Matcher::c_frame_ptr_mask;
55 const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE;
56 const uint Matcher::_end_rematerialize = _END_REMATERIALIZE;
57
58 //---------------------------Matcher-------------------------------------------
59 Matcher::Matcher()
60 : PhaseTransform( Phase::Ins_Select ),
61 _states_arena(Chunk::medium_size, mtCompiler, Arena::Tag::tag_states),
62 _new_nodes(C->comp_arena()),
63 _visited(&_states_arena),
64 _shared(&_states_arena),
65 _dontcare(&_states_arena),
66 _reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp),
67 _swallowed(swallowed),
68 _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
69 _end_inst_chain_rule(_END_INST_CHAIN_RULE),
70 _must_clone(must_clone),
71 _shared_nodes(C->comp_arena()),
72 #ifndef PRODUCT
73 _old2new_map(C->comp_arena()),
74 _new2old_map(C->comp_arena()),
75 _reused(C->comp_arena()),
76 #endif // !PRODUCT
77 _allocation_started(false),
78 _ruleName(ruleName),
79 _register_save_policy(register_save_policy),
80 _c_reg_save_policy(c_reg_save_policy),
81 _register_save_type(register_save_type),
82 _return_addr_mask(C->comp_arena()) {
83 C->set_matcher(this);
84
85 idealreg2spillmask [Op_RegI] = nullptr;
86 idealreg2spillmask [Op_RegN] = nullptr;
87 idealreg2spillmask [Op_RegL] = nullptr;
88 idealreg2spillmask [Op_RegF] = nullptr;
89 idealreg2spillmask [Op_RegD] = nullptr;
90 idealreg2spillmask [Op_RegP] = nullptr;
91 idealreg2spillmask [Op_VecA] = nullptr;
92 idealreg2spillmask [Op_VecS] = nullptr;
93 idealreg2spillmask [Op_VecD] = nullptr;
94 idealreg2spillmask [Op_VecX] = nullptr;
95 idealreg2spillmask [Op_VecY] = nullptr;
96 idealreg2spillmask [Op_VecZ] = nullptr;
97 idealreg2spillmask [Op_RegFlags] = nullptr;
98 idealreg2spillmask [Op_RegVectMask] = nullptr;
99
100 idealreg2debugmask [Op_RegI] = nullptr;
101 idealreg2debugmask [Op_RegN] = nullptr;
102 idealreg2debugmask [Op_RegL] = nullptr;
103 idealreg2debugmask [Op_RegF] = nullptr;
104 idealreg2debugmask [Op_RegD] = nullptr;
105 idealreg2debugmask [Op_RegP] = nullptr;
106 idealreg2debugmask [Op_VecA] = nullptr;
107 idealreg2debugmask [Op_VecS] = nullptr;
108 idealreg2debugmask [Op_VecD] = nullptr;
109 idealreg2debugmask [Op_VecX] = nullptr;
110 idealreg2debugmask [Op_VecY] = nullptr;
111 idealreg2debugmask [Op_VecZ] = nullptr;
112 idealreg2debugmask [Op_RegFlags] = nullptr;
113 idealreg2debugmask [Op_RegVectMask] = nullptr;
114
115 DEBUG_ONLY(_mem_node = nullptr;) // Ideal memory node consumed by mach node
116 }
117
118 //------------------------------warp_incoming_stk_arg------------------------
119 // This warps a VMReg into an OptoReg::Name
120 OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) {
121 OptoReg::Name warped;
122 if( reg->is_stack() ) { // Stack slot argument?
123 warped = OptoReg::add(_old_SP, reg->reg2stack() );
124 warped = OptoReg::add(warped, C->out_preserve_stack_slots());
125 if( warped >= _in_arg_limit )
126 _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
127 return warped;
128 }
129 return OptoReg::as_OptoReg(reg);
130 }
131
132 //---------------------------compute_old_SP------------------------------------
133 OptoReg::Name Compile::compute_old_SP() {
134 int fixed = fixed_slots();
135 int preserve = in_preserve_stack_slots();
136 return OptoReg::stack2reg(align_up(fixed + preserve, (int)Matcher::stack_alignment_in_slots()));
137 }
138
139
140
141 #ifdef ASSERT
142 void Matcher::verify_new_nodes_only(Node* xroot) {
143 // Make sure that the new graph only references new nodes
144 ResourceMark rm;
145 Unique_Node_List worklist;
146 VectorSet visited;
147 worklist.push(xroot);
148 while (worklist.size() > 0) {
149 Node* n = worklist.pop();
150 if (visited.test_set(n->_idx)) {
151 continue;
152 }
153 assert(C->node_arena()->contains(n), "dead node");
154 assert(!n->is_Initialize() || n->as_Initialize()->number_of_projs(TypeFunc::Memory) == 1,
155 "after matching, Initialize should have a single memory projection");
156 for (uint j = 0; j < n->req(); j++) {
157 Node* in = n->in(j);
158 if (in != nullptr) {
159 worklist.push(in);
160 }
161 }
162 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
163 worklist.push(n->fast_out(j));
164 }
165 }
166 }
167 #endif
168
169 // Array of RegMask, one per returned values (inline type instances can
170 // be returned as multiple return values, one per field)
171 RegMask* Matcher::return_values_mask(const TypeFunc* tf) {
172 const TypeTuple* range = tf->range_cc();
173 uint cnt = range->cnt() - TypeFunc::Parms;
174 if (cnt == 0) {
175 return nullptr;
176 }
177 RegMask* mask = NEW_RESOURCE_ARRAY(RegMask, cnt);
178 BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, cnt);
179 VMRegPair* vm_parm_regs = NEW_RESOURCE_ARRAY(VMRegPair, cnt);
180 for (uint i = 0; i < cnt; i++) {
181 sig_bt[i] = range->field_at(i+TypeFunc::Parms)->basic_type();
182 new (mask + i) RegMask();
183 }
184
185 int regs = SharedRuntime::java_return_convention(sig_bt, vm_parm_regs, cnt);
186 if (regs <= 0) {
187 // We ran out of registers to store the null marker for a nullable inline type return.
188 // Since it is only set in the 'call_epilog', we can simply put it on the stack.
189 assert(tf->returns_inline_type_as_fields(), "should have been tested during graph construction");
190 // TODO 8284443 Can we teach the register allocator to reserve a stack slot instead?
191 // mask[--cnt] = STACK_ONLY_mask does not work (test with -XX:+StressGCM)
192 int slot = C->fixed_slots() - 2;
193 if (C->needs_stack_repair()) {
194 slot -= 2; // Account for stack increment value
195 }
196 mask[--cnt].clear();
197 mask[cnt].insert(OptoReg::stack2reg(slot));
198 }
199 for (uint i = 0; i < cnt; i++) {
200 mask[i].clear();
201
202 OptoReg::Name reg1 = OptoReg::as_OptoReg(vm_parm_regs[i].first());
203 if (OptoReg::is_valid(reg1)) {
204 mask[i].insert(reg1);
205 }
206 OptoReg::Name reg2 = OptoReg::as_OptoReg(vm_parm_regs[i].second());
207 if (OptoReg::is_valid(reg2)) {
208 mask[i].insert(reg2);
209 }
210 }
211
212 return mask;
213 }
214
215 //---------------------------match---------------------------------------------
216 void Matcher::match( ) {
217 if( MaxLabelRootDepth < 100 ) { // Too small?
218 assert(false, "invalid MaxLabelRootDepth, increase it to 100 minimum");
219 MaxLabelRootDepth = 100;
220 }
221 // One-time initialization of some register masks.
222 init_spill_mask( C->root()->in(1) );
223 if (C->failing()) {
224 return;
225 }
226 assert(_return_addr_mask.is_empty(),
227 "return address mask must be empty initially");
228 _return_addr_mask.insert(return_addr());
229 #ifdef _LP64
230 // Pointers take 2 slots in 64-bit land
231 _return_addr_mask.insert(OptoReg::add(return_addr(), 1));
232 #endif
233
234 // Map Java-signature return types into return register-value
235 // machine registers.
236 _return_values_mask = return_values_mask(C->tf());
237
238 // ---------------
239 // Frame Layout
240
241 // Need the method signature to determine the incoming argument types,
242 // because the types determine which registers the incoming arguments are
243 // in, and this affects the matched code.
244 const TypeTuple *domain = C->tf()->domain_cc();
245 uint argcnt = domain->cnt() - TypeFunc::Parms;
246 BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt );
247 VMRegPair *vm_parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
248 _parm_regs = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt );
249 _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt );
250 uint i;
251 for( i = 0; i<argcnt; i++ ) {
252 sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
253 new (_calling_convention_mask + i) RegMask(C->comp_arena());
254 }
255
256 // Pass array of ideal registers and length to USER code (from the AD file)
257 // that will convert this to an array of register numbers.
258 const StartNode *start = C->start();
259 start->calling_convention( sig_bt, vm_parm_regs, argcnt );
260 #ifdef ASSERT
261 // Sanity check users' calling convention. Real handy while trying to
262 // get the initial port correct.
263 { for (uint i = 0; i<argcnt; i++) {
264 if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
265 assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" );
266 _parm_regs[i].set_bad();
267 continue;
268 }
269 VMReg parm_reg = vm_parm_regs[i].first();
270 assert(parm_reg->is_valid(), "invalid arg?");
271 if (parm_reg->is_reg()) {
272 OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg);
273 assert(can_be_java_arg(opto_parm_reg) ||
274 C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) ||
275 opto_parm_reg == inline_cache_reg(),
276 "parameters in register must be preserved by runtime stubs");
277 }
278 for (uint j = 0; j < i; j++) {
279 assert(parm_reg != vm_parm_regs[j].first(),
280 "calling conv. must produce distinct regs");
281 }
282 }
283 }
284 #endif
285
286 // Do some initial frame layout.
287
288 // Compute the old incoming SP (may be called FP) as
289 // OptoReg::stack0() + locks + in_preserve_stack_slots + pad2.
290 _old_SP = C->compute_old_SP();
291 assert( is_even(_old_SP), "must be even" );
292
293 // Compute highest incoming stack argument as
294 // _old_SP + out_preserve_stack_slots + incoming argument size.
295 _in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
296 assert( is_even(_in_arg_limit), "out_preserve must be even" );
297 for( i = 0; i < argcnt; i++ ) {
298 // Permit args to have no register
299 _calling_convention_mask[i].clear();
300 if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
301 _parm_regs[i].set_bad();
302 continue;
303 }
304 // calling_convention returns stack arguments as a count of
305 // slots beyond OptoReg::stack0()/VMRegImpl::stack0. We need to convert this to
306 // the allocators point of view, taking into account all the
307 // preserve area, locks & pad2.
308
309 OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first());
310 if( OptoReg::is_valid(reg1))
311 _calling_convention_mask[i].insert(reg1);
312
313 OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second());
314 if( OptoReg::is_valid(reg2))
315 _calling_convention_mask[i].insert(reg2);
316
317 // Saved biased stack-slot register number
318 _parm_regs[i].set_pair(reg2, reg1);
319 }
320
321 // Finally, make sure the incoming arguments take up an even number of
322 // words, in case the arguments or locals need to contain doubleword stack
323 // slots. The rest of the system assumes that stack slot pairs (in
324 // particular, in the spill area) which look aligned will in fact be
325 // aligned relative to the stack pointer in the target machine. Double
326 // stack slots will always be allocated aligned.
327 _new_SP = OptoReg::Name(align_up(_in_arg_limit, (int)RegMask::SlotsPerLong));
328
329 // Compute highest outgoing stack argument as
330 // _new_SP + out_preserve_stack_slots + max(outgoing argument size).
331 _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
332 assert( is_even(_out_arg_limit), "out_preserve must be even" );
333
334 // ---------------
335 // Collect roots of matcher trees. Every node for which
336 // _shared[_idx] is cleared is guaranteed to not be shared, and thus
337 // can be a valid interior of some tree.
338 find_shared( C->root() );
339 find_shared( C->top() );
340
341 C->print_method(PHASE_BEFORE_MATCHING, 1);
342
343 // Create new ideal node ConP #null even if it does exist in old space
344 // to avoid false sharing if the corresponding mach node is not used.
345 // The corresponding mach node is only used in rare cases for derived
346 // pointers.
347 Node* new_ideal_null = ConNode::make(TypePtr::NULL_PTR);
348
349 // Swap out to old-space; emptying new-space
350 Arena* old = C->swap_old_and_new();
351
352 // Save debug and profile information for nodes in old space:
353 _old_node_note_array = C->node_note_array();
354 if (_old_node_note_array != nullptr) {
355 C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*>
356 (C->comp_arena(), _old_node_note_array->length(),
357 0, nullptr));
358 }
359
360 // Pre-size the new_node table to avoid the need for range checks.
361 grow_new_node_array(C->unique());
362
363 // Reset node counter so MachNodes start with _idx at 0
364 int live_nodes = C->live_nodes();
365 C->set_unique(0);
366 C->reset_dead_node_list();
367
368 // Recursively match trees from old space into new space.
369 // Correct leaves of new-space Nodes; they point to old-space.
370 _visited.clear();
371 Node* const n = xform(C->top(), live_nodes);
372 if (C->failing()) return;
373 C->set_cached_top_node(n);
374 if (!C->failing()) {
375 Node* xroot = xform( C->root(), 1 );
376 if (C->failing()) return;
377 if (xroot == nullptr) {
378 Matcher::soft_match_failure(); // recursive matching process failed
379 assert(false, "instruction match failed");
380 C->record_method_not_compilable("instruction match failed");
381 } else {
382 // During matching shared constants were attached to C->root()
383 // because xroot wasn't available yet, so transfer the uses to
384 // the xroot.
385 for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) {
386 Node* n = C->root()->fast_out(j);
387 if (C->node_arena()->contains(n)) {
388 assert(n->in(0) == C->root(), "should be control user");
389 n->set_req(0, xroot);
390 --j;
391 --jmax;
392 }
393 }
394
395 // Generate new mach node for ConP #null
396 assert(new_ideal_null != nullptr, "sanity");
397 _mach_null = match_tree(new_ideal_null);
398 // Don't set control, it will confuse GCM since there are no uses.
399 // The control will be set when this node is used first time
400 // in find_base_for_derived().
401 assert(_mach_null != nullptr || C->failure_is_artificial(), ""); // bailouts are handled below.
402
403 C->set_root(xroot->is_Root() ? xroot->as_Root() : nullptr);
404
405 #ifdef ASSERT
406 verify_new_nodes_only(xroot);
407 #endif
408 }
409 }
410 if (C->top() == nullptr || C->root() == nullptr) {
411 // New graph lost. This is due to a compilation failure we encountered earlier.
412 stringStream ss;
413 if (C->failure_reason() != nullptr) {
414 ss.print("graph lost: %s", C->failure_reason());
415 } else {
416 assert(C->failure_reason() != nullptr, "graph lost: reason unknown");
417 ss.print("graph lost: reason unknown");
418 }
419 C->record_method_not_compilable(ss.as_string() DEBUG_ONLY(COMMA true));
420 }
421 if (C->failing()) {
422 // delete old;
423 old->destruct_contents();
424 return;
425 }
426 assert( C->top(), "" );
427 assert( C->root(), "" );
428 validate_null_checks();
429
430 // Now smoke old-space
431 NOT_DEBUG( old->destruct_contents() );
432
433 // ------------------------
434 // Set up save-on-entry registers.
435 Fixup_Save_On_Entry( );
436
437 { // Cleanup mach IR after selection phase is over.
438 Compile::TracePhase tp(_t_postselect_cleanup);
439 do_postselect_cleanup();
440 if (C->failing()) return;
441 assert(verify_after_postselect_cleanup(), "");
442 }
443 }
444
445 //------------------------------Fixup_Save_On_Entry----------------------------
446 // The stated purpose of this routine is to take care of save-on-entry
447 // registers. However, the overall goal of the Match phase is to convert into
448 // machine-specific instructions which have RegMasks to guide allocation.
449 // So what this procedure really does is put a valid RegMask on each input
450 // to the machine-specific variations of all Return, TailCall and Halt
451 // instructions. It also adds edgs to define the save-on-entry values (and of
452 // course gives them a mask).
453
454 static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
455 RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size );
456 for (unsigned int i = 0; i < size; ++i) {
457 new (rms + i) RegMask(Compile::current()->comp_arena());
458 }
459 // Do all the pre-defined register masks
460 rms[TypeFunc::Control ].assignFrom(RegMask::EMPTY);
461 rms[TypeFunc::I_O ].assignFrom(RegMask::EMPTY);
462 rms[TypeFunc::Memory ].assignFrom(RegMask::EMPTY);
463 rms[TypeFunc::ReturnAdr].assignFrom(ret_adr);
464 rms[TypeFunc::FramePtr ].assignFrom(fp);
465 return rms;
466 }
467
468 int Matcher::scalable_predicate_reg_slots() {
469 assert(Matcher::has_predicated_vectors() && Matcher::supports_scalable_vector(),
470 "scalable predicate vector should be supported");
471 int vector_reg_bit_size = Matcher::scalable_vector_reg_size(T_BYTE) << LogBitsPerByte;
472 // We assume each predicate register is one-eighth of the size of
473 // scalable vector register, one mask bit per vector byte.
474 int predicate_reg_bit_size = vector_reg_bit_size >> 3;
475 // Compute number of slots which is required when scalable predicate
476 // register is spilled. E.g. if scalable vector register is 640 bits,
477 // predicate register is 80 bits, which is 2.5 * slots.
478 // We will round up the slot number to power of 2, which is required
479 // by find_first_set().
480 int slots = predicate_reg_bit_size & (BitsPerInt - 1)
481 ? (predicate_reg_bit_size >> LogBitsPerInt) + 1
482 : predicate_reg_bit_size >> LogBitsPerInt;
483 return round_up_power_of_2(slots);
484 }
485
486 #define NOF_STACK_MASKS (2*13)
487
488 // Create the initial stack mask used by values spilling to the stack.
489 // Disallow any debug info in outgoing argument areas by setting the
490 // initial mask accordingly.
491 void Matcher::init_first_stack_mask() {
492
493 // Allocate storage for spill masks as masks for the appropriate load type.
494 RegMask *rms = (RegMask*)C->comp_arena()->AmallocWords(sizeof(RegMask) * NOF_STACK_MASKS);
495
496 // Initialize empty placeholder masks into the newly allocated arena
497 for (int i = 0; i < NOF_STACK_MASKS; i++) {
498 new (rms + i) RegMask(C->comp_arena());
499 }
500
501 int index = 0;
502 for (int i = Op_RegN; i <= Op_RegVectMask; ++i) {
503 idealreg2spillmask[i] = &rms[index++];
504 idealreg2debugmask[i] = &rms[index++];
505 }
506 assert(index == NOF_STACK_MASKS, "wrong size");
507
508 // At first, start with the empty mask
509 C->FIRST_STACK_mask().clear();
510
511 // Add in the incoming argument area
512 OptoReg::Name init_in = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
513 for (OptoReg::Name i = init_in; i < _in_arg_limit; i = OptoReg::add(i, 1)) {
514 C->FIRST_STACK_mask().insert(i);
515 }
516
517 // Add in all bits past the outgoing argument area
518 C->FIRST_STACK_mask().set_all_from(_out_arg_limit);
519
520 // Make spill masks. Registers for their class, plus FIRST_STACK_mask.
521 RegMask aligned_stack_mask(C->FIRST_STACK_mask(), C->comp_arena());
522 // Keep spill masks aligned.
523 aligned_stack_mask.clear_to_pairs();
524 assert(aligned_stack_mask.is_infinite_stack(), "should be infinite stack");
525 RegMask scalable_stack_mask(aligned_stack_mask, C->comp_arena());
526
527 idealreg2spillmask[Op_RegP]->assignFrom(*idealreg2regmask[Op_RegP]);
528 #ifdef _LP64
529 idealreg2spillmask[Op_RegN]->assignFrom(*idealreg2regmask[Op_RegN]);
530 idealreg2spillmask[Op_RegN]->or_with(C->FIRST_STACK_mask());
531 idealreg2spillmask[Op_RegP]->or_with(aligned_stack_mask);
532 #else
533 idealreg2spillmask[Op_RegP]->or_with(C->FIRST_STACK_mask());
534 #endif
535 idealreg2spillmask[Op_RegI]->assignFrom(*idealreg2regmask[Op_RegI]);
536 idealreg2spillmask[Op_RegI]->or_with(C->FIRST_STACK_mask());
537 idealreg2spillmask[Op_RegL]->assignFrom(*idealreg2regmask[Op_RegL]);
538 idealreg2spillmask[Op_RegL]->or_with(aligned_stack_mask);
539 idealreg2spillmask[Op_RegF]->assignFrom(*idealreg2regmask[Op_RegF]);
540 idealreg2spillmask[Op_RegF]->or_with(C->FIRST_STACK_mask());
541 idealreg2spillmask[Op_RegD]->assignFrom(*idealreg2regmask[Op_RegD]);
542 idealreg2spillmask[Op_RegD]->or_with(aligned_stack_mask);
543
544 if (Matcher::has_predicated_vectors()) {
545 idealreg2spillmask[Op_RegVectMask]->assignFrom(*idealreg2regmask[Op_RegVectMask]);
546 idealreg2spillmask[Op_RegVectMask]->or_with(aligned_stack_mask);
547 } else {
548 idealreg2spillmask[Op_RegVectMask]->assignFrom(RegMask::EMPTY);
549 }
550
551 if (Matcher::vector_size_supported(T_BYTE,4)) {
552 idealreg2spillmask[Op_VecS]->assignFrom(*idealreg2regmask[Op_VecS]);
553 idealreg2spillmask[Op_VecS]->or_with(C->FIRST_STACK_mask());
554 } else {
555 idealreg2spillmask[Op_VecS]->assignFrom(RegMask::EMPTY);
556 }
557
558 if (Matcher::vector_size_supported(T_FLOAT,2)) {
559 // For VecD we need dual alignment and 8 bytes (2 slots) for spills.
560 // RA guarantees such alignment since it is needed for Double and Long values.
561 idealreg2spillmask[Op_VecD]->assignFrom(*idealreg2regmask[Op_VecD]);
562 idealreg2spillmask[Op_VecD]->or_with(aligned_stack_mask);
563 } else {
564 idealreg2spillmask[Op_VecD]->assignFrom(RegMask::EMPTY);
565 }
566
567 if (Matcher::vector_size_supported(T_FLOAT,4)) {
568 // For VecX we need quadro alignment and 16 bytes (4 slots) for spills.
569 //
570 // RA can use input arguments stack slots for spills but until RA
571 // we don't know frame size and offset of input arg stack slots.
572 //
573 // Exclude last input arg stack slots to avoid spilling vectors there
574 // otherwise vector spills could stomp over stack slots in caller frame.
575 OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
576 for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecX); k++) {
577 aligned_stack_mask.remove(in);
578 in = OptoReg::add(in, -1);
579 }
580 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX);
581 assert(aligned_stack_mask.is_infinite_stack(), "should be infinite stack");
582 idealreg2spillmask[Op_VecX]->assignFrom(*idealreg2regmask[Op_VecX]);
583 idealreg2spillmask[Op_VecX]->or_with(aligned_stack_mask);
584 } else {
585 idealreg2spillmask[Op_VecX]->assignFrom(RegMask::EMPTY);
586 }
587
588 if (Matcher::vector_size_supported(T_FLOAT,8)) {
589 // For VecY we need octo alignment and 32 bytes (8 slots) for spills.
590 OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
591 for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecY); k++) {
592 aligned_stack_mask.remove(in);
593 in = OptoReg::add(in, -1);
594 }
595 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY);
596 assert(aligned_stack_mask.is_infinite_stack(), "should be infinite stack");
597 idealreg2spillmask[Op_VecY]->assignFrom(*idealreg2regmask[Op_VecY]);
598 idealreg2spillmask[Op_VecY]->or_with(aligned_stack_mask);
599 } else {
600 idealreg2spillmask[Op_VecY]->assignFrom(RegMask::EMPTY);
601 }
602
603 if (Matcher::vector_size_supported(T_FLOAT,16)) {
604 // For VecZ we need enough alignment and 64 bytes (16 slots) for spills.
605 OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
606 for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecZ); k++) {
607 aligned_stack_mask.remove(in);
608 in = OptoReg::add(in, -1);
609 }
610 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecZ);
611 assert(aligned_stack_mask.is_infinite_stack(), "should be infinite stack");
612 idealreg2spillmask[Op_VecZ]->assignFrom(*idealreg2regmask[Op_VecZ]);
613 idealreg2spillmask[Op_VecZ]->or_with(aligned_stack_mask);
614 } else {
615 idealreg2spillmask[Op_VecZ]->assignFrom(RegMask::EMPTY);
616 }
617
618 if (Matcher::supports_scalable_vector()) {
619 int k = 1;
620 OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
621 if (Matcher::has_predicated_vectors()) {
622 // Exclude last input arg stack slots to avoid spilling vector register there,
623 // otherwise RegVectMask spills could stomp over stack slots in caller frame.
624 for (; (in >= init_in) && (k < scalable_predicate_reg_slots()); k++) {
625 scalable_stack_mask.remove(in);
626 in = OptoReg::add(in, -1);
627 }
628
629 // For RegVectMask
630 scalable_stack_mask.clear_to_sets(scalable_predicate_reg_slots());
631 assert(scalable_stack_mask.is_infinite_stack(), "should be infinite stack");
632 idealreg2spillmask[Op_RegVectMask]->assignFrom(*idealreg2regmask[Op_RegVectMask]);
633 idealreg2spillmask[Op_RegVectMask]->or_with(scalable_stack_mask);
634 }
635
636 // Exclude last input arg stack slots to avoid spilling vector register there,
637 // otherwise vector spills could stomp over stack slots in caller frame.
638 for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
639 scalable_stack_mask.remove(in);
640 in = OptoReg::add(in, -1);
641 }
642
643 // For VecA
644 scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
645 assert(scalable_stack_mask.is_infinite_stack(), "should be infinite stack");
646 idealreg2spillmask[Op_VecA]->assignFrom(*idealreg2regmask[Op_VecA]);
647 idealreg2spillmask[Op_VecA]->or_with(scalable_stack_mask);
648 } else {
649 idealreg2spillmask[Op_VecA]->assignFrom(RegMask::EMPTY);
650 }
651
652 if (UseFPUForSpilling) {
653 // This mask logic assumes that the spill operations are
654 // symmetric and that the registers involved are the same size.
655 // On sparc for instance we may have to use 64 bit moves will
656 // kill 2 registers when used with F0-F31.
657 idealreg2spillmask[Op_RegI]->or_with(*idealreg2regmask[Op_RegF]);
658 idealreg2spillmask[Op_RegF]->or_with(*idealreg2regmask[Op_RegI]);
659 #ifdef _LP64
660 idealreg2spillmask[Op_RegN]->or_with(*idealreg2regmask[Op_RegF]);
661 idealreg2spillmask[Op_RegL]->or_with(*idealreg2regmask[Op_RegD]);
662 idealreg2spillmask[Op_RegD]->or_with(*idealreg2regmask[Op_RegL]);
663 idealreg2spillmask[Op_RegP]->or_with(*idealreg2regmask[Op_RegD]);
664 #else
665 idealreg2spillmask[Op_RegP]->or_with(*idealreg2regmask[Op_RegF]);
666 #ifdef ARM
667 // ARM has support for moving 64bit values between a pair of
668 // integer registers and a double register
669 idealreg2spillmask[Op_RegL]->or_with(*idealreg2regmask[Op_RegD]);
670 idealreg2spillmask[Op_RegD]->or_with(*idealreg2regmask[Op_RegL]);
671 #endif
672 #endif
673 }
674
675 // Make up debug masks. Any spill slot plus callee-save (SOE) registers.
676 // Caller-save (SOC, AS) registers are assumed to be trashable by the various
677 // inline-cache fixup routines.
678 idealreg2debugmask[Op_RegN]->assignFrom(*idealreg2spillmask[Op_RegN]);
679 idealreg2debugmask[Op_RegI]->assignFrom(*idealreg2spillmask[Op_RegI]);
680 idealreg2debugmask[Op_RegL]->assignFrom(*idealreg2spillmask[Op_RegL]);
681 idealreg2debugmask[Op_RegF]->assignFrom(*idealreg2spillmask[Op_RegF]);
682 idealreg2debugmask[Op_RegD]->assignFrom(*idealreg2spillmask[Op_RegD]);
683 idealreg2debugmask[Op_RegP]->assignFrom(*idealreg2spillmask[Op_RegP]);
684 idealreg2debugmask[Op_RegVectMask]->assignFrom(*idealreg2spillmask[Op_RegVectMask]);
685
686 idealreg2debugmask[Op_VecA]->assignFrom(*idealreg2spillmask[Op_VecA]);
687 idealreg2debugmask[Op_VecS]->assignFrom(*idealreg2spillmask[Op_VecS]);
688 idealreg2debugmask[Op_VecD]->assignFrom(*idealreg2spillmask[Op_VecD]);
689 idealreg2debugmask[Op_VecX]->assignFrom(*idealreg2spillmask[Op_VecX]);
690 idealreg2debugmask[Op_VecY]->assignFrom(*idealreg2spillmask[Op_VecY]);
691 idealreg2debugmask[Op_VecZ]->assignFrom(*idealreg2spillmask[Op_VecZ]);
692
693 // Prevent stub compilations from attempting to reference
694 // callee-saved (SOE) registers from debug info
695 bool exclude_soe = !Compile::current()->is_method_compilation();
696 RegMask* caller_save_mask = exclude_soe ? &caller_save_regmask_exclude_soe : &caller_save_regmask;
697
698 idealreg2debugmask[Op_RegN]->subtract(*caller_save_mask);
699 idealreg2debugmask[Op_RegI]->subtract(*caller_save_mask);
700 idealreg2debugmask[Op_RegL]->subtract(*caller_save_mask);
701 idealreg2debugmask[Op_RegF]->subtract(*caller_save_mask);
702 idealreg2debugmask[Op_RegD]->subtract(*caller_save_mask);
703 idealreg2debugmask[Op_RegP]->subtract(*caller_save_mask);
704 idealreg2debugmask[Op_RegVectMask]->subtract(*caller_save_mask);
705
706 idealreg2debugmask[Op_VecA]->subtract(*caller_save_mask);
707 idealreg2debugmask[Op_VecS]->subtract(*caller_save_mask);
708 idealreg2debugmask[Op_VecD]->subtract(*caller_save_mask);
709 idealreg2debugmask[Op_VecX]->subtract(*caller_save_mask);
710 idealreg2debugmask[Op_VecY]->subtract(*caller_save_mask);
711 idealreg2debugmask[Op_VecZ]->subtract(*caller_save_mask);
712 }
713
714 //---------------------------is_save_on_entry----------------------------------
715 bool Matcher::is_save_on_entry(int reg) {
716 return
717 _register_save_policy[reg] == 'E' ||
718 _register_save_policy[reg] == 'A'; // Save-on-entry register?
719 }
720
721 //---------------------------Fixup_Save_On_Entry-------------------------------
722 void Matcher::Fixup_Save_On_Entry( ) {
723 init_first_stack_mask();
724
725 Node *root = C->root(); // Short name for root
726 // Count number of save-on-entry registers.
727 uint soe_cnt = number_of_saved_registers();
728 uint i;
729
730 // Find the procedure Start Node
731 StartNode *start = C->start();
732 assert( start, "Expect a start node" );
733
734 // Input RegMask array shared by all Returns.
735 // The type for doubles and longs has a count of 2, but
736 // there is only 1 returned value
737 uint ret_edge_cnt = C->tf()->range_cc()->cnt();
738 RegMask *ret_rms = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
739 for (i = TypeFunc::Parms; i < ret_edge_cnt; i++) {
740 ret_rms[i].assignFrom(_return_values_mask[i-TypeFunc::Parms]);
741 }
742
743 // Input RegMask array shared by all ForwardExceptions
744 uint forw_exc_edge_cnt = TypeFunc::Parms;
745 RegMask* forw_exc_rms = init_input_masks( forw_exc_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
746
747 // Input RegMask array shared by all Rethrows.
748 uint reth_edge_cnt = TypeFunc::Parms+1;
749 RegMask *reth_rms = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
750 // Rethrow takes exception oop only, but in the argument 0 slot.
751 OptoReg::Name reg = find_receiver();
752 if (reg >= 0) {
753 reth_rms[TypeFunc::Parms].assignFrom(mreg2regmask[reg]);
754 #ifdef _LP64
755 // Need two slots for ptrs in 64-bit land
756 reth_rms[TypeFunc::Parms].insert(OptoReg::add(OptoReg::Name(reg), 1));
757 #endif
758 }
759
760 // Input RegMask array shared by all TailCalls
761 uint tail_call_edge_cnt = TypeFunc::Parms+2;
762 RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
763
764 // Input RegMask array shared by all TailJumps
765 uint tail_jump_edge_cnt = TypeFunc::Parms+2;
766 RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
767
768 // TailCalls have 2 returned values (target & moop), whose masks come
769 // from the usual MachNode/MachOper mechanism. Find a sample
770 // TailCall to extract these masks and put the correct masks into
771 // the tail_call_rms array.
772 for( i=1; i < root->req(); i++ ) {
773 MachReturnNode *m = root->in(i)->as_MachReturn();
774 if( m->ideal_Opcode() == Op_TailCall ) {
775 tail_call_rms[TypeFunc::Parms + 0].assignFrom(m->MachNode::in_RegMask(TypeFunc::Parms + 0));
776 tail_call_rms[TypeFunc::Parms + 1].assignFrom(m->MachNode::in_RegMask(TypeFunc::Parms + 1));
777 break;
778 }
779 }
780
781 // TailJumps have 2 returned values (target & ex_oop), whose masks come
782 // from the usual MachNode/MachOper mechanism. Find a sample
783 // TailJump to extract these masks and put the correct masks into
784 // the tail_jump_rms array.
785 for( i=1; i < root->req(); i++ ) {
786 MachReturnNode *m = root->in(i)->as_MachReturn();
787 if( m->ideal_Opcode() == Op_TailJump ) {
788 tail_jump_rms[TypeFunc::Parms + 0].assignFrom(m->MachNode::in_RegMask(TypeFunc::Parms + 0));
789 tail_jump_rms[TypeFunc::Parms + 1].assignFrom(m->MachNode::in_RegMask(TypeFunc::Parms + 1));
790 break;
791 }
792 }
793
794 // Input RegMask array shared by all Halts
795 uint halt_edge_cnt = TypeFunc::Parms;
796 RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
797
798 // Capture the return input masks into each exit flavor
799 for( i=1; i < root->req(); i++ ) {
800 MachReturnNode *exit = root->in(i)->as_MachReturn();
801 switch( exit->ideal_Opcode() ) {
802 case Op_Return : exit->_in_rms = ret_rms; break;
803 case Op_Rethrow : exit->_in_rms = reth_rms; break;
804 case Op_TailCall : exit->_in_rms = tail_call_rms; break;
805 case Op_TailJump : exit->_in_rms = tail_jump_rms; break;
806 case Op_ForwardException: exit->_in_rms = forw_exc_rms; break;
807 case Op_Halt : exit->_in_rms = halt_rms; break;
808 default : ShouldNotReachHere();
809 }
810 }
811
812 // Next unused projection number from Start.
813 int proj_cnt = C->tf()->domain_cc()->cnt();
814
815 // Do all the save-on-entry registers. Make projections from Start for
816 // them, and give them a use at the exit points. To the allocator, they
817 // look like incoming register arguments.
818 for( i = 0; i < _last_Mach_Reg; i++ ) {
819 if( is_save_on_entry(i) ) {
820
821 // Add the save-on-entry to the mask array
822 ret_rms [ ret_edge_cnt].assignFrom(mreg2regmask[i]);
823 reth_rms [ reth_edge_cnt].assignFrom(mreg2regmask[i]);
824 tail_call_rms[tail_call_edge_cnt].assignFrom(mreg2regmask[i]);
825 tail_jump_rms[tail_jump_edge_cnt].assignFrom(mreg2regmask[i]);
826 forw_exc_rms [ forw_exc_edge_cnt].assignFrom(mreg2regmask[i]);
827 // Halts need the SOE registers, but only in the stack as debug info.
828 // A just-prior uncommon-trap or deoptimization will use the SOE regs.
829 halt_rms [ halt_edge_cnt].assignFrom(*idealreg2spillmask[_register_save_type[i]]);
830
831 Node *mproj;
832
833 // Is this a RegF low half of a RegD? Double up 2 adjacent RegF's
834 // into a single RegD.
835 if( (i&1) == 0 &&
836 _register_save_type[i ] == Op_RegF &&
837 _register_save_type[i+1] == Op_RegF &&
838 is_save_on_entry(i+1) ) {
839 // Add other bit for double
840 ret_rms [ ret_edge_cnt].insert(OptoReg::Name(i+1));
841 reth_rms [ reth_edge_cnt].insert(OptoReg::Name(i+1));
842 tail_call_rms[tail_call_edge_cnt].insert(OptoReg::Name(i+1));
843 tail_jump_rms[tail_jump_edge_cnt].insert(OptoReg::Name(i+1));
844 forw_exc_rms [ forw_exc_edge_cnt].insert(OptoReg::Name(i+1));
845 halt_rms [ halt_edge_cnt].insert(OptoReg::Name(i+1));
846 mproj = new MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD );
847 proj_cnt += 2; // Skip 2 for doubles
848 }
849 else if( (i&1) == 1 && // Else check for high half of double
850 _register_save_type[i-1] == Op_RegF &&
851 _register_save_type[i ] == Op_RegF &&
852 is_save_on_entry(i-1) ) {
853 ret_rms [ ret_edge_cnt].assignFrom(RegMask::EMPTY);
854 reth_rms [ reth_edge_cnt].assignFrom(RegMask::EMPTY);
855 tail_call_rms[tail_call_edge_cnt].assignFrom(RegMask::EMPTY);
856 tail_jump_rms[tail_jump_edge_cnt].assignFrom(RegMask::EMPTY);
857 forw_exc_rms [ forw_exc_edge_cnt].assignFrom(RegMask::EMPTY);
858 halt_rms [ halt_edge_cnt].assignFrom(RegMask::EMPTY);
859 mproj = C->top();
860 }
861 // Is this a RegI low half of a RegL? Double up 2 adjacent RegI's
862 // into a single RegL.
863 else if( (i&1) == 0 &&
864 _register_save_type[i ] == Op_RegI &&
865 _register_save_type[i+1] == Op_RegI &&
866 is_save_on_entry(i+1) ) {
867 // Add other bit for long
868 ret_rms [ ret_edge_cnt].insert(OptoReg::Name(i+1));
869 reth_rms [ reth_edge_cnt].insert(OptoReg::Name(i+1));
870 tail_call_rms[tail_call_edge_cnt].insert(OptoReg::Name(i+1));
871 tail_jump_rms[tail_jump_edge_cnt].insert(OptoReg::Name(i+1));
872 forw_exc_rms [ forw_exc_edge_cnt].insert(OptoReg::Name(i+1));
873 halt_rms [ halt_edge_cnt].insert(OptoReg::Name(i+1));
874 mproj = new MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL );
875 proj_cnt += 2; // Skip 2 for longs
876 }
877 else if( (i&1) == 1 && // Else check for high half of long
878 _register_save_type[i-1] == Op_RegI &&
879 _register_save_type[i ] == Op_RegI &&
880 is_save_on_entry(i-1) ) {
881 ret_rms [ ret_edge_cnt].assignFrom(RegMask::EMPTY);
882 reth_rms [ reth_edge_cnt].assignFrom(RegMask::EMPTY);
883 tail_call_rms[tail_call_edge_cnt].assignFrom(RegMask::EMPTY);
884 tail_jump_rms[tail_jump_edge_cnt].assignFrom(RegMask::EMPTY);
885 forw_exc_rms [ forw_exc_edge_cnt].assignFrom(RegMask::EMPTY);
886 halt_rms [ halt_edge_cnt].assignFrom(RegMask::EMPTY);
887 mproj = C->top();
888 } else {
889 // Make a projection for it off the Start
890 mproj = new MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] );
891 }
892
893 ret_edge_cnt ++;
894 reth_edge_cnt ++;
895 tail_call_edge_cnt ++;
896 tail_jump_edge_cnt ++;
897 forw_exc_edge_cnt++;
898 halt_edge_cnt ++;
899
900 // Add a use of the SOE register to all exit paths
901 for (uint j=1; j < root->req(); j++) {
902 root->in(j)->add_req(mproj);
903 }
904 } // End of if a save-on-entry register
905 } // End of for all machine registers
906 }
907
908 //------------------------------init_spill_mask--------------------------------
909 void Matcher::init_spill_mask( Node *ret ) {
910 if( idealreg2regmask[Op_RegI] ) return; // One time only init
911
912 OptoReg::c_frame_pointer = c_frame_pointer();
913 c_frame_ptr_mask.assignFrom(RegMask(c_frame_pointer()));
914 #ifdef _LP64
915 // pointers are twice as big
916 c_frame_ptr_mask.insert(OptoReg::add(c_frame_pointer(), 1));
917 #endif
918
919 // Start at OptoReg::stack0()
920 STACK_ONLY_mask.clear();
921 // STACK_ONLY_mask is all stack bits
922 STACK_ONLY_mask.set_all_from(OptoReg::stack2reg(0));
923
924 for (OptoReg::Name i = OptoReg::Name(0); i < OptoReg::Name(_last_Mach_Reg);
925 i = OptoReg::add(i, 1)) {
926 // Copy the register names over into the shared world.
927 // SharedInfo::regName[i] = regName[i];
928 // Handy RegMasks per machine register
929 mreg2regmask[i].insert(i);
930
931 // Set up regmasks used to exclude save-on-call (and always-save) registers from debug masks.
932 if (_register_save_policy[i] == 'C' ||
933 _register_save_policy[i] == 'A') {
934 caller_save_regmask.insert(i);
935 }
936 // Exclude save-on-entry registers from debug masks for stub compilations.
937 if (_register_save_policy[i] == 'C' ||
938 _register_save_policy[i] == 'A' ||
939 _register_save_policy[i] == 'E') {
940 caller_save_regmask_exclude_soe.insert(i);
941 }
942 }
943
944 // Grab the Frame Pointer
945 Node *fp = ret->in(TypeFunc::FramePtr);
946 // Share frame pointer while making spill ops
947 set_shared(fp);
948
949 // Get the ADLC notion of the right regmask, for each basic type.
950 #ifdef _LP64
951 idealreg2regmask[Op_RegN] = regmask_for_ideal_register(Op_RegN, ret);
952 #endif
953 idealreg2regmask[Op_RegI] = regmask_for_ideal_register(Op_RegI, ret);
954 idealreg2regmask[Op_RegP] = regmask_for_ideal_register(Op_RegP, ret);
955 idealreg2regmask[Op_RegF] = regmask_for_ideal_register(Op_RegF, ret);
956 idealreg2regmask[Op_RegD] = regmask_for_ideal_register(Op_RegD, ret);
957 idealreg2regmask[Op_RegL] = regmask_for_ideal_register(Op_RegL, ret);
958 idealreg2regmask[Op_VecA] = regmask_for_ideal_register(Op_VecA, ret);
959 idealreg2regmask[Op_VecS] = regmask_for_ideal_register(Op_VecS, ret);
960 idealreg2regmask[Op_VecD] = regmask_for_ideal_register(Op_VecD, ret);
961 idealreg2regmask[Op_VecX] = regmask_for_ideal_register(Op_VecX, ret);
962 idealreg2regmask[Op_VecY] = regmask_for_ideal_register(Op_VecY, ret);
963 idealreg2regmask[Op_VecZ] = regmask_for_ideal_register(Op_VecZ, ret);
964 idealreg2regmask[Op_RegVectMask] = regmask_for_ideal_register(Op_RegVectMask, ret);
965 }
966
967 #ifdef ASSERT
968 static void match_alias_type(Compile* C, Node* n, Node* m) {
969 if (!VerifyAliases) return; // do not go looking for trouble by default
970 const TypePtr* nat = n->adr_type();
971 const TypePtr* mat = m->adr_type();
972 int nidx = C->get_alias_index(nat);
973 int midx = C->get_alias_index(mat);
974 // Detune the assert for cases like (AndI 0xFF (LoadB p)).
975 if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) {
976 for (uint i = 1; i < n->req(); i++) {
977 Node* n1 = n->in(i);
978 const TypePtr* n1at = n1->adr_type();
979 if (n1at != nullptr) {
980 nat = n1at;
981 nidx = C->get_alias_index(n1at);
982 }
983 }
984 }
985 // %%% Kludgery. Instead, fix ideal adr_type methods for all these cases:
986 if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) {
987 switch (n->Opcode()) {
988 case Op_PrefetchAllocation:
989 nidx = Compile::AliasIdxRaw;
990 nat = TypeRawPtr::BOTTOM;
991 break;
992 }
993 }
994 if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) {
995 switch (n->Opcode()) {
996 case Op_ClearArray:
997 midx = Compile::AliasIdxRaw;
998 mat = TypeRawPtr::BOTTOM;
999 break;
1000 }
1001 }
1002 if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) {
1003 switch (n->Opcode()) {
1004 case Op_Return:
1005 case Op_Rethrow:
1006 case Op_Halt:
1007 case Op_TailCall:
1008 case Op_TailJump:
1009 case Op_ForwardException:
1010 nidx = Compile::AliasIdxBot;
1011 nat = TypePtr::BOTTOM;
1012 break;
1013 }
1014 }
1015 if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
1016 switch (n->Opcode()) {
1017 case Op_StrComp:
1018 case Op_StrEquals:
1019 case Op_StrIndexOf:
1020 case Op_StrIndexOfChar:
1021 case Op_AryEq:
1022 case Op_VectorizedHashCode:
1023 case Op_CountPositives:
1024 case Op_MemBarVolatile:
1025 case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
1026 case Op_StrInflatedCopy:
1027 case Op_StrCompressedCopy:
1028 case Op_OnSpinWait:
1029 case Op_EncodeISOArray:
1030 nidx = Compile::AliasIdxTop;
1031 nat = nullptr;
1032 break;
1033 }
1034 }
1035 if (nidx != midx) {
1036 if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) {
1037 tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx);
1038 n->dump();
1039 m->dump();
1040 }
1041 assert(C->subsume_loads() && C->must_alias(nat, midx),
1042 "must not lose alias info when matching");
1043 }
1044 }
1045 #endif
1046
1047 //------------------------------xform------------------------------------------
1048 // Given a Node in old-space, Match him (Label/Reduce) to produce a machine
1049 // Node in new-space. Given a new-space Node, recursively walk his children.
1050 Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; }
1051 Node *Matcher::xform( Node *n, int max_stack ) {
1052 // Use one stack to keep both: child's node/state and parent's node/index
1053 MStack mstack(max_stack * 2 * 2); // usually: C->live_nodes() * 2 * 2
1054 mstack.push(n, Visit, nullptr, -1); // set null as parent to indicate root
1055 while (mstack.is_nonempty()) {
1056 C->check_node_count(NodeLimitFudgeFactor, "too many nodes matching instructions");
1057 if (C->failing()) return nullptr;
1058 n = mstack.node(); // Leave node on stack
1059 Node_State nstate = mstack.state();
1060 if (nstate == Visit) {
1061 mstack.set_state(Post_Visit);
1062 Node *oldn = n;
1063 // Old-space or new-space check
1064 if (!C->node_arena()->contains(n)) {
1065 // Old space!
1066 Node* m = nullptr;
1067 if (has_new_node(n)) { // Not yet Label/Reduced
1068 m = new_node(n);
1069 } else {
1070 if (!is_dontcare(n)) { // Matcher can match this guy
1071 // Calls match special. They match alone with no children.
1072 // Their children, the incoming arguments, match normally.
1073 m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n);
1074 if (C->failing()) return nullptr;
1075 if (m == nullptr) { Matcher::soft_match_failure(); return nullptr; }
1076 if (n->is_MemBar()) {
1077 m->as_MachMemBar()->set_adr_type(n->adr_type());
1078 }
1079 } else { // Nothing the matcher cares about
1080 if (n->is_Proj() && n->in(0) != nullptr && n->in(0)->is_Multi()) { // Projections?
1081 if (n->in(0)->is_Initialize() && n->as_Proj()->_con == TypeFunc::Memory) {
1082 // Initialize may have multiple NarrowMem projections. They would all match to identical raw mem MachProjs.
1083 // We don't need multiple MachProjs. Create one if none already exist, otherwise use existing one.
1084 m = n->in(0)->as_Initialize()->mem_mach_proj();
1085 if (m == nullptr && has_new_node(n->in(0))) {
1086 InitializeNode* new_init = new_node(n->in(0))->as_Initialize();
1087 m = new_init->mem_mach_proj();
1088 }
1089 assert(m == nullptr || m->is_MachProj(), "no mem projection yet or a MachProj created during matching");
1090 }
1091 if (m == nullptr) {
1092 // Convert to machine-dependent projection
1093 RegMask* mask = nullptr;
1094 if (n->in(0)->is_Call() && n->in(0)->as_Call()->tf()->returns_inline_type_as_fields()) {
1095 mask = return_values_mask(n->in(0)->as_Call()->tf());
1096 }
1097 m = n->in(0)->as_Multi()->match(n->as_Proj(), this, mask);
1098 NOT_PRODUCT(record_new2old(m, n);)
1099 }
1100 if (m->in(0) != nullptr) // m might be top
1101 collect_null_checks(m, n);
1102 } else { // Else just a regular 'ol guy
1103 m = n->clone(); // So just clone into new-space
1104 NOT_PRODUCT(record_new2old(m, n);)
1105 // Def-Use edges will be added incrementally as Uses
1106 // of this node are matched.
1107 assert(m->outcnt() == 0, "no Uses of this clone yet");
1108 }
1109 }
1110
1111 set_new_node(n, m); // Map old to new
1112 if (_old_node_note_array != nullptr) {
1113 Node_Notes* nn = C->locate_node_notes(_old_node_note_array,
1114 n->_idx);
1115 C->set_node_notes_at(m->_idx, nn);
1116 }
1117 DEBUG_ONLY(match_alias_type(C, n, m));
1118 }
1119 n = m; // n is now a new-space node
1120 mstack.set_node(n);
1121 }
1122
1123 // New space!
1124 if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty())
1125
1126 int i;
1127 // Put precedence edges on stack first (match them last).
1128 for (i = oldn->req(); (uint)i < oldn->len(); i++) {
1129 Node *m = oldn->in(i);
1130 if (m == nullptr) break;
1131 // set -1 to call add_prec() instead of set_req() during Step1
1132 mstack.push(m, Visit, n, -1);
1133 }
1134
1135 // Handle precedence edges for interior nodes
1136 for (i = n->len()-1; (uint)i >= n->req(); i--) {
1137 Node *m = n->in(i);
1138 if (m == nullptr || C->node_arena()->contains(m)) continue;
1139 n->rm_prec(i);
1140 // set -1 to call add_prec() instead of set_req() during Step1
1141 mstack.push(m, Visit, n, -1);
1142 }
1143
1144 // For constant debug info, I'd rather have unmatched constants.
1145 int cnt = n->req();
1146 JVMState* jvms = n->jvms();
1147 int debug_cnt = jvms ? jvms->debug_start() : cnt;
1148
1149 // Now do only debug info. Clone constants rather than matching.
1150 // Constants are represented directly in the debug info without
1151 // the need for executable machine instructions.
1152 // Monitor boxes are also represented directly.
1153 for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do
1154 Node *m = n->in(i); // Get input
1155 int op = m->Opcode();
1156 assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites");
1157 if( op == Op_ConI || op == Op_ConP || op == Op_ConN || op == Op_ConNKlass ||
1158 op == Op_ConF || op == Op_ConD || op == Op_ConL
1159 // || op == Op_BoxLock // %%%% enable this and remove (+++) in chaitin.cpp
1160 ) {
1161 m = m->clone();
1162 NOT_PRODUCT(record_new2old(m, n));
1163 mstack.push(m, Post_Visit, n, i); // Don't need to visit
1164 mstack.push(m->in(0), Visit, m, 0);
1165 } else {
1166 mstack.push(m, Visit, n, i);
1167 }
1168 }
1169
1170 // And now walk his children, and convert his inputs to new-space.
1171 for( ; i >= 0; --i ) { // For all normal inputs do
1172 Node *m = n->in(i); // Get input
1173 if(m != nullptr)
1174 mstack.push(m, Visit, n, i);
1175 }
1176
1177 }
1178 else if (nstate == Post_Visit) {
1179 // Set xformed input
1180 Node *p = mstack.parent();
1181 if (p != nullptr) { // root doesn't have parent
1182 int i = (int)mstack.index();
1183 if (i >= 0)
1184 p->set_req(i, n); // required input
1185 else if (i == -1)
1186 p->add_prec(n); // precedence input
1187 else
1188 ShouldNotReachHere();
1189 }
1190 mstack.pop(); // remove processed node from stack
1191 }
1192 else {
1193 ShouldNotReachHere();
1194 }
1195 } // while (mstack.is_nonempty())
1196 return n; // Return new-space Node
1197 }
1198
1199 //------------------------------warp_outgoing_stk_arg------------------------
1200 OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) {
1201 // Convert outgoing argument location to a pre-biased stack offset
1202 if (reg->is_stack()) {
1203 OptoReg::Name warped = reg->reg2stack();
1204 // Adjust the stack slot offset to be the register number used
1205 // by the allocator.
1206 warped = OptoReg::add(begin_out_arg_area, warped);
1207 // Keep track of the largest numbered stack slot used for an arg.
1208 // Largest used slot per call-site indicates the amount of stack
1209 // that is killed by the call.
1210 if (warped >= out_arg_limit_per_call) {
1211 out_arg_limit_per_call = OptoReg::add(warped, 1);
1212 }
1213 return warped;
1214 }
1215 return OptoReg::as_OptoReg(reg);
1216 }
1217
1218
1219 //------------------------------match_sfpt-------------------------------------
1220 // Helper function to match call instructions. Calls match special.
1221 // They match alone with no children. Their children, the incoming
1222 // arguments, match normally.
1223 MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
1224 MachSafePointNode *msfpt = nullptr;
1225 MachCallNode *mcall = nullptr;
1226 uint cnt;
1227 // Split out case for SafePoint vs Call
1228 CallNode *call;
1229 const TypeTuple *domain;
1230 ciMethod* method = nullptr;
1231 if( sfpt->is_Call() ) {
1232 call = sfpt->as_Call();
1233 domain = call->tf()->domain_cc();
1234 cnt = domain->cnt();
1235
1236 // Match just the call, nothing else
1237 MachNode *m = match_tree(call);
1238 if (C->failing()) return nullptr;
1239 if( m == nullptr ) { Matcher::soft_match_failure(); return nullptr; }
1240
1241 // Copy data from the Ideal SafePoint to the machine version
1242 mcall = m->as_MachCall();
1243
1244 mcall->set_tf( call->tf());
1245 mcall->set_entry_point( call->entry_point());
1246 mcall->set_cnt( call->cnt());
1247 mcall->set_guaranteed_safepoint(call->guaranteed_safepoint());
1248
1249 if( mcall->is_MachCallJava() ) {
1250 MachCallJavaNode *mcall_java = mcall->as_MachCallJava();
1251 const CallJavaNode *call_java = call->as_CallJava();
1252 assert(call_java->validate_symbolic_info(), "inconsistent info");
1253 method = call_java->method();
1254 mcall_java->_method = method;
1255 mcall_java->_optimized_virtual = call_java->is_optimized_virtual();
1256 mcall_java->_override_symbolic_info = call_java->override_symbolic_info();
1257 mcall_java->_arg_escape = call_java->arg_escape();
1258 if( mcall_java->is_MachCallStaticJava() )
1259 mcall_java->as_MachCallStaticJava()->_name =
1260 call_java->as_CallStaticJava()->_name;
1261 if( mcall_java->is_MachCallDynamicJava() )
1262 mcall_java->as_MachCallDynamicJava()->_vtable_index =
1263 call_java->as_CallDynamicJava()->_vtable_index;
1264 }
1265 else if( mcall->is_MachCallRuntime() ) {
1266 MachCallRuntimeNode* mach_call_rt = mcall->as_MachCallRuntime();
1267 mach_call_rt->_name = call->as_CallRuntime()->_name;
1268 mach_call_rt->_leaf_no_fp = call->is_CallLeafNoFP();
1269 }
1270 msfpt = mcall;
1271 }
1272 // This is a non-call safepoint
1273 else {
1274 call = nullptr;
1275 domain = nullptr;
1276 MachNode *mn = match_tree(sfpt);
1277 if (C->failing()) return nullptr;
1278 msfpt = mn->as_MachSafePoint();
1279 cnt = TypeFunc::Parms;
1280 }
1281 msfpt->_has_ea_local_in_scope = sfpt->has_ea_local_in_scope();
1282
1283 // Advertise the correct memory effects (for anti-dependence computation).
1284 msfpt->set_adr_type(sfpt->adr_type());
1285
1286 // Allocate a private array of RegMasks. These RegMasks are not shared.
1287 msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt );
1288 // Empty them all.
1289 for (uint i = 0; i < cnt; i++) {
1290 ::new (msfpt->_in_rms + i) RegMask(C->comp_arena());
1291 }
1292
1293 // Do all the pre-defined non-Empty register masks
1294 msfpt->_in_rms[TypeFunc::ReturnAdr].assignFrom(_return_addr_mask);
1295 msfpt->_in_rms[TypeFunc::FramePtr ].assignFrom(c_frame_ptr_mask);
1296
1297 // Place first outgoing argument can possibly be put.
1298 OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
1299 assert( is_even(begin_out_arg_area), "" );
1300 // Compute max outgoing register number per call site.
1301 OptoReg::Name out_arg_limit_per_call = begin_out_arg_area;
1302 // Calls to C may hammer extra stack slots above and beyond any arguments.
1303 // These are usually backing store for register arguments for varargs.
1304 if( call != nullptr && call->is_CallRuntime() )
1305 out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed());
1306
1307
1308 // Do the normal argument list (parameters) register masks
1309 // Null entry point is a special cast where the target of the call
1310 // is in a register.
1311 int adj = (call != nullptr && call->entry_point() == nullptr) ? 1 : 0;
1312 int argcnt = cnt - TypeFunc::Parms - adj;
1313 if( argcnt > 0 ) { // Skip it all if we have no args
1314 BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt );
1315 VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
1316 int i;
1317 for( i = 0; i < argcnt; i++ ) {
1318 sig_bt[i] = domain->field_at(i+TypeFunc::Parms+adj)->basic_type();
1319 }
1320 // V-call to pick proper calling convention
1321 call->calling_convention( sig_bt, parm_regs, argcnt );
1322
1323 #ifdef ASSERT
1324 // Sanity check users' calling convention. Really handy during
1325 // the initial porting effort. Fairly expensive otherwise.
1326 { for (int i = 0; i<argcnt; i++) {
1327 if( !parm_regs[i].first()->is_valid() &&
1328 !parm_regs[i].second()->is_valid() ) continue;
1329 VMReg reg1 = parm_regs[i].first();
1330 VMReg reg2 = parm_regs[i].second();
1331 for (int j = 0; j < i; j++) {
1332 if( !parm_regs[j].first()->is_valid() &&
1333 !parm_regs[j].second()->is_valid() ) continue;
1334 VMReg reg3 = parm_regs[j].first();
1335 VMReg reg4 = parm_regs[j].second();
1336 if( !reg1->is_valid() ) {
1337 assert( !reg2->is_valid(), "valid halvsies" );
1338 } else if( !reg3->is_valid() ) {
1339 assert( !reg4->is_valid(), "valid halvsies" );
1340 } else {
1341 assert( reg1 != reg2, "calling conv. must produce distinct regs");
1342 assert( reg1 != reg3, "calling conv. must produce distinct regs");
1343 assert( reg1 != reg4, "calling conv. must produce distinct regs");
1344 assert( reg2 != reg3, "calling conv. must produce distinct regs");
1345 assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs");
1346 assert( reg3 != reg4, "calling conv. must produce distinct regs");
1347 }
1348 }
1349 }
1350 }
1351 #endif
1352
1353 // Visit each argument. Compute its outgoing register mask.
1354 // Return results now can have 2 bits returned.
1355 // Compute max over all outgoing arguments both per call-site
1356 // and over the entire method.
1357 for( i = 0; i < argcnt; i++ ) {
1358 // Address of incoming argument mask to fill in
1359 RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms+adj];
1360 VMReg first = parm_regs[i].first();
1361 VMReg second = parm_regs[i].second();
1362 if(!first->is_valid() &&
1363 !second->is_valid()) {
1364 continue; // Avoid Halves
1365 }
1366 // Handle case where arguments are in vector registers.
1367 if(call->in(TypeFunc::Parms + i)->bottom_type()->isa_vect()) {
1368 OptoReg::Name reg_fst = OptoReg::as_OptoReg(first);
1369 OptoReg::Name reg_snd = OptoReg::as_OptoReg(second);
1370 assert (reg_fst <= reg_snd, "fst=%d snd=%d", reg_fst, reg_snd);
1371 for (OptoReg::Name r = reg_fst; r <= reg_snd; r++) {
1372 rm->insert(r);
1373 }
1374 }
1375 // Grab first register, adjust stack slots and insert in mask.
1376 OptoReg::Name reg1 = warp_outgoing_stk_arg(first, begin_out_arg_area, out_arg_limit_per_call );
1377 if (OptoReg::is_valid(reg1)) {
1378 rm->insert( reg1 );
1379 }
1380 // Grab second register (if any), adjust stack slots and insert in mask.
1381 OptoReg::Name reg2 = warp_outgoing_stk_arg(second, begin_out_arg_area, out_arg_limit_per_call );
1382 if (OptoReg::is_valid(reg2)) {
1383 rm->insert( reg2 );
1384 }
1385 } // End of for all arguments
1386 }
1387
1388 // Compute the max stack slot killed by any call. These will not be
1389 // available for debug info, and will be used to adjust FIRST_STACK_mask
1390 // after all call sites have been visited.
1391 if( _out_arg_limit < out_arg_limit_per_call)
1392 _out_arg_limit = out_arg_limit_per_call;
1393
1394 if (mcall) {
1395 // Kill the outgoing argument area, including any non-argument holes and
1396 // any legacy C-killed slots. Use Fat-Projections to do the killing.
1397 // Since the max-per-method covers the max-per-call-site and debug info
1398 // is excluded on the max-per-method basis, debug info cannot land in
1399 // this killed area.
1400 uint r_cnt = mcall->tf()->range_sig()->cnt();
1401 MachProjNode *proj = new MachProjNode( mcall, r_cnt+10000, RegMask::EMPTY, MachProjNode::fat_proj );
1402 for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) {
1403 proj->_rout.insert(OptoReg::Name(i));
1404 }
1405 if (!proj->_rout.is_empty()) {
1406 push_projection(proj);
1407 }
1408 }
1409 // Transfer the safepoint information from the call to the mcall
1410 // Move the JVMState list
1411 msfpt->set_jvms(sfpt->jvms());
1412 for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) {
1413 jvms->set_map(sfpt);
1414 }
1415
1416 // Debug inputs begin just after the last incoming parameter
1417 assert((mcall == nullptr) || (mcall->jvms() == nullptr) ||
1418 (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain_cc()->cnt()), "");
1419
1420 // Add additional edges.
1421 if (msfpt->mach_constant_base_node_input() != (uint)-1 && !msfpt->is_MachCallLeaf()) {
1422 // For these calls we can not add MachConstantBase in expand(), as the
1423 // ins are not complete then.
1424 msfpt->ins_req(msfpt->mach_constant_base_node_input(), C->mach_constant_base_node());
1425 if (msfpt->jvms() &&
1426 msfpt->mach_constant_base_node_input() <= msfpt->jvms()->debug_start() + msfpt->_jvmadj) {
1427 // We added an edge before jvms, so we must adapt the position of the ins.
1428 msfpt->jvms()->adapt_position(+1);
1429 }
1430 }
1431
1432 // Registers killed by the call are set in the local scheduling pass
1433 // of Global Code Motion.
1434 return msfpt;
1435 }
1436
1437 //---------------------------match_tree----------------------------------------
1438 // Match a Ideal Node DAG - turn it into a tree; Label & Reduce. Used as part
1439 // of the whole-sale conversion from Ideal to Mach Nodes. Also used for
1440 // making GotoNodes while building the CFG and in init_spill_mask() to identify
1441 // a Load's result RegMask for memoization in idealreg2regmask[]
1442 MachNode *Matcher::match_tree( const Node *n ) {
1443 assert( n->Opcode() != Op_Phi, "cannot match" );
1444 assert( !n->is_block_start(), "cannot match" );
1445 // Set the mark for all locally allocated State objects.
1446 // When this call returns, the _states_arena arena will be reset
1447 // freeing all State objects.
1448 ResourceMark rm( &_states_arena );
1449
1450 LabelRootDepth = 0;
1451
1452 // StoreNodes require their Memory input to match any LoadNodes
1453 Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ;
1454 #ifdef ASSERT
1455 Node* save_mem_node = _mem_node;
1456 _mem_node = n->is_Store() ? (Node*)n : nullptr;
1457 #endif
1458 // State object for root node of match tree
1459 // Allocate it on _states_arena - stack allocation can cause stack overflow.
1460 State *s = new (&_states_arena) State;
1461 s->_kids[0] = nullptr;
1462 s->_kids[1] = nullptr;
1463 s->_leaf = (Node*)n;
1464 // Label the input tree, allocating labels from top-level arena
1465 Node* root_mem = mem;
1466 Label_Root(n, s, n->in(0), root_mem);
1467 if (C->failing()) return nullptr;
1468
1469 // The minimum cost match for the whole tree is found at the root State
1470 uint mincost = max_juint;
1471 uint cost = max_juint;
1472 uint i;
1473 for (i = 0; i < NUM_OPERANDS; i++) {
1474 if (s->valid(i) && // valid entry and
1475 s->cost(i) < cost && // low cost and
1476 s->rule(i) >= NUM_OPERANDS) {// not an operand
1477 mincost = i;
1478 cost = s->cost(i);
1479 }
1480 }
1481 if (mincost == max_juint) {
1482 #ifndef PRODUCT
1483 tty->print("No matching rule for:");
1484 s->dump();
1485 #endif
1486 Matcher::soft_match_failure();
1487 return nullptr;
1488 }
1489 // Reduce input tree based upon the state labels to machine Nodes
1490 MachNode *m = ReduceInst(s, s->rule(mincost), mem);
1491 // New-to-old mapping is done in ReduceInst, to cover complex instructions.
1492 NOT_PRODUCT(_old2new_map.map(n->_idx, m);)
1493
1494 // Add any Matcher-ignored edges
1495 uint cnt = n->req();
1496 uint start = 1;
1497 if( mem != (Node*)1 ) start = MemNode::Memory+1;
1498 if( n->is_AddP() ) {
1499 assert( mem == (Node*)1, "" );
1500 start = AddPNode::Base+1;
1501 }
1502 for( i = start; i < cnt; i++ ) {
1503 if( !n->match_edge(i) ) {
1504 if( i < m->req() )
1505 m->ins_req( i, n->in(i) );
1506 else
1507 m->add_req( n->in(i) );
1508 }
1509 }
1510
1511 DEBUG_ONLY( _mem_node = save_mem_node; )
1512 return m;
1513 }
1514
1515
1516 //------------------------------match_into_reg---------------------------------
1517 // Choose to either match this Node in a register or part of the current
1518 // match tree. Return true for requiring a register and false for matching
1519 // as part of the current match tree.
1520 static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) {
1521
1522 const Type *t = m->bottom_type();
1523
1524 if (t->singleton()) {
1525 // Never force constants into registers. Allow them to match as
1526 // constants or registers. Copies of the same value will share
1527 // the same register. See find_shared_node.
1528 return false;
1529 } else { // Not a constant
1530 if (!shared && Matcher::is_encode_and_store_pattern(n, m)) {
1531 // Make it possible to match "encode and store" patterns with non-shared
1532 // encode operations that are pinned to a control node (e.g. by CastPP
1533 // node removal in final graph reshaping). The matched instruction cannot
1534 // float above the encode's control node because it is pinned to the
1535 // store's control node.
1536 return false;
1537 }
1538 // Stop recursion if they have different Controls.
1539 Node* m_control = m->in(0);
1540 // Control of load's memory can post-dominates load's control.
1541 // So use it since load can't float above its memory.
1542 Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : nullptr;
1543 if (control && m_control && control != m_control && control != mem_control) {
1544
1545 // Actually, we can live with the most conservative control we
1546 // find, if it post-dominates the others. This allows us to
1547 // pick up load/op/store trees where the load can float a little
1548 // above the store.
1549 Node *x = control;
1550 const uint max_scan = 6; // Arbitrary scan cutoff
1551 uint j;
1552 for (j=0; j<max_scan; j++) {
1553 if (x->is_Region()) // Bail out at merge points
1554 return true;
1555 x = x->in(0);
1556 if (x == m_control) // Does 'control' post-dominate
1557 break; // m->in(0)? If so, we can use it
1558 if (x == mem_control) // Does 'control' post-dominate
1559 break; // mem_control? If so, we can use it
1560 }
1561 if (j == max_scan) // No post-domination before scan end?
1562 return true; // Then break the match tree up
1563 }
1564 if ((m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) ||
1565 (m->is_DecodeNKlass() && Matcher::narrow_klass_use_complex_address())) {
1566 // These are commonly used in address expressions and can
1567 // efficiently fold into them on X64 in some cases.
1568 return false;
1569 }
1570 }
1571
1572 // Not forceable cloning. If shared, put it into a register.
1573 return shared;
1574 }
1575
1576
1577 //------------------------------Instruction Selection--------------------------
1578 // Label method walks a "tree" of nodes, using the ADLC generated DFA to match
1579 // ideal nodes to machine instructions. Trees are delimited by shared Nodes,
1580 // things the Matcher does not match (e.g., Memory), and things with different
1581 // Controls (hence forced into different blocks). We pass in the Control
1582 // selected for this entire State tree.
1583
1584 // The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the
1585 // Store and the Load must have identical Memories (as well as identical
1586 // pointers). Since the Matcher does not have anything for Memory (and
1587 // does not handle DAGs), I have to match the Memory input myself. If the
1588 // Tree root is a Store or if there are multiple Loads in the tree, I require
1589 // all Loads to have the identical memory.
1590 Node* Matcher::Label_Root(const Node* n, State* svec, Node* control, Node*& mem) {
1591 // Since Label_Root is a recursive function, its possible that we might run
1592 // out of stack space. See bugs 6272980 & 6227033 for more info.
1593 LabelRootDepth++;
1594 if (LabelRootDepth > MaxLabelRootDepth) {
1595 // Bailout. Can for example be hit with a deep chain of operations.
1596 C->record_method_not_compilable("Out of stack space, increase MaxLabelRootDepth");
1597 return nullptr;
1598 }
1599 uint care = 0; // Edges matcher cares about
1600 uint cnt = n->req();
1601 uint i = 0;
1602
1603 // Examine children for memory state
1604 // Can only subsume a child into your match-tree if that child's memory state
1605 // is not modified along the path to another input.
1606 // It is unsafe even if the other inputs are separate roots.
1607 Node *input_mem = nullptr;
1608 for( i = 1; i < cnt; i++ ) {
1609 if( !n->match_edge(i) ) continue;
1610 Node *m = n->in(i); // Get ith input
1611 assert( m, "expect non-null children" );
1612 if( m->is_Load() ) {
1613 if( input_mem == nullptr ) {
1614 input_mem = m->in(MemNode::Memory);
1615 if (mem == (Node*)1) {
1616 // Save this memory to bail out if there's another memory access
1617 // to a different memory location in the same tree.
1618 mem = input_mem;
1619 }
1620 } else if( input_mem != m->in(MemNode::Memory) ) {
1621 input_mem = NodeSentinel;
1622 }
1623 }
1624 }
1625
1626 for( i = 1; i < cnt; i++ ){// For my children
1627 if( !n->match_edge(i) ) continue;
1628 Node *m = n->in(i); // Get ith input
1629 // Allocate states out of a private arena
1630 State *s = new (&_states_arena) State;
1631 svec->_kids[care++] = s;
1632 assert( care <= 2, "binary only for now" );
1633
1634 // Recursively label the State tree.
1635 s->_kids[0] = nullptr;
1636 s->_kids[1] = nullptr;
1637 s->_leaf = m;
1638
1639 // Check for leaves of the State Tree; things that cannot be a part of
1640 // the current tree. If it finds any, that value is matched as a
1641 // register operand. If not, then the normal matching is used.
1642 if( match_into_reg(n, m, control, i, is_shared(m)) ||
1643 // Stop recursion if this is a LoadNode and there is another memory access
1644 // to a different memory location in the same tree (for example, a StoreNode
1645 // at the root of this tree or another LoadNode in one of the children).
1646 ((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ||
1647 // Can NOT include the match of a subtree when its memory state
1648 // is used by any of the other subtrees
1649 (input_mem == NodeSentinel) ) {
1650 // Print when we exclude matching due to different memory states at input-loads
1651 if (PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
1652 && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem)) {
1653 tty->print_cr("invalid input_mem");
1654 }
1655 // Switch to a register-only opcode; this value must be in a register
1656 // and cannot be subsumed as part of a larger instruction.
1657 s->DFA( m->ideal_reg(), m );
1658
1659 } else {
1660 // If match tree has no control and we do, adopt it for entire tree
1661 if( control == nullptr && m->in(0) != nullptr && m->req() > 1 )
1662 control = m->in(0); // Pick up control
1663 // Else match as a normal part of the match tree.
1664 control = Label_Root(m, s, control, mem);
1665 if (C->failing()) return nullptr;
1666 }
1667 }
1668
1669 // Call DFA to match this node, and return
1670 svec->DFA( n->Opcode(), n );
1671
1672 uint x;
1673 for( x = 0; x < _LAST_MACH_OPER; x++ )
1674 if( svec->valid(x) )
1675 break;
1676
1677 if (x >= _LAST_MACH_OPER) {
1678 #ifdef ASSERT
1679 n->dump();
1680 svec->dump();
1681 #endif
1682 assert( false, "bad AD file" );
1683 C->record_failure("bad AD file");
1684 }
1685 return control;
1686 }
1687
1688
1689 // Con nodes reduced using the same rule can share their MachNode
1690 // which reduces the number of copies of a constant in the final
1691 // program. The register allocator is free to split uses later to
1692 // split live ranges.
1693 MachNode* Matcher::find_shared_node(Node* leaf, uint rule) {
1694 if (!leaf->is_Con() && !leaf->is_DecodeNarrowPtr()) return nullptr;
1695
1696 // See if this Con has already been reduced using this rule.
1697 if (_shared_nodes.max() <= leaf->_idx) return nullptr;
1698 MachNode* last = (MachNode*)_shared_nodes.at(leaf->_idx);
1699 if (last != nullptr && rule == last->rule()) {
1700 // Don't expect control change for DecodeN
1701 if (leaf->is_DecodeNarrowPtr())
1702 return last;
1703 // Get the new space root.
1704 Node* xroot = new_node(C->root());
1705 if (xroot == nullptr) {
1706 // This shouldn't happen give the order of matching.
1707 return nullptr;
1708 }
1709
1710 // Shared constants need to have their control be root so they
1711 // can be scheduled properly.
1712 Node* control = last->in(0);
1713 if (control != xroot) {
1714 if (control == nullptr || control == C->root()) {
1715 last->set_req(0, xroot);
1716 } else {
1717 assert(false, "unexpected control");
1718 return nullptr;
1719 }
1720 }
1721 return last;
1722 }
1723 return nullptr;
1724 }
1725
1726
1727 //------------------------------ReduceInst-------------------------------------
1728 // Reduce a State tree (with given Control) into a tree of MachNodes.
1729 // This routine (and it's cohort ReduceOper) convert Ideal Nodes into
1730 // complicated machine Nodes. Each MachNode covers some tree of Ideal Nodes.
1731 // Each MachNode has a number of complicated MachOper operands; each
1732 // MachOper also covers a further tree of Ideal Nodes.
1733
1734 // The root of the Ideal match tree is always an instruction, so we enter
1735 // the recursion here. After building the MachNode, we need to recurse
1736 // the tree checking for these cases:
1737 // (1) Child is an instruction -
1738 // Build the instruction (recursively), add it as an edge.
1739 // Build a simple operand (register) to hold the result of the instruction.
1740 // (2) Child is an interior part of an instruction -
1741 // Skip over it (do nothing)
1742 // (3) Child is the start of a operand -
1743 // Build the operand, place it inside the instruction
1744 // Call ReduceOper.
1745 MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
1746 assert( rule >= NUM_OPERANDS, "called with operand rule" );
1747
1748 MachNode* shared_node = find_shared_node(s->_leaf, rule);
1749 if (shared_node != nullptr) {
1750 return shared_node;
1751 }
1752
1753 // Build the object to represent this state & prepare for recursive calls
1754 MachNode *mach = s->MachNodeGenerator(rule);
1755 guarantee(mach != nullptr, "Missing MachNode");
1756 mach->_opnds[0] = s->MachOperGenerator(_reduceOp[rule]);
1757 assert( mach->_opnds[0] != nullptr, "Missing result operand" );
1758 Node *leaf = s->_leaf;
1759 NOT_PRODUCT(record_new2old(mach, leaf);)
1760 // Check for instruction or instruction chain rule
1761 if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) {
1762 assert(C->node_arena()->contains(s->_leaf) || !has_new_node(s->_leaf),
1763 "duplicating node that's already been matched");
1764 // Instruction
1765 mach->add_req( leaf->in(0) ); // Set initial control
1766 // Reduce interior of complex instruction
1767 ReduceInst_Interior( s, rule, mem, mach, 1 );
1768 } else {
1769 // Instruction chain rules are data-dependent on their inputs
1770 mach->add_req(nullptr); // Set initial control to none
1771 ReduceInst_Chain_Rule( s, rule, mem, mach );
1772 }
1773
1774 // If a Memory was used, insert a Memory edge
1775 if( mem != (Node*)1 ) {
1776 mach->ins_req(MemNode::Memory,mem);
1777 #ifdef ASSERT
1778 // Verify adr type after matching memory operation
1779 const MachOper* oper = mach->memory_operand();
1780 if (oper != nullptr && oper != (MachOper*)-1) {
1781 // It has a unique memory operand. Find corresponding ideal mem node.
1782 Node* m = nullptr;
1783 if (leaf->is_Mem()) {
1784 m = leaf;
1785 } else {
1786 m = _mem_node;
1787 assert(m != nullptr && m->is_Mem(), "expecting memory node");
1788 }
1789 const Type* mach_at = mach->adr_type();
1790 // DecodeN node consumed by an address may have different type
1791 // than its input. Don't compare types for such case.
1792 if (m->adr_type() != mach_at &&
1793 (m->in(MemNode::Address)->is_DecodeNarrowPtr() ||
1794 (m->in(MemNode::Address)->is_AddP() &&
1795 m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeNarrowPtr()) ||
1796 (m->in(MemNode::Address)->is_AddP() &&
1797 m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() &&
1798 m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeNarrowPtr()))) {
1799 mach_at = m->adr_type();
1800 }
1801 if (m->adr_type() != mach_at) {
1802 m->dump();
1803 tty->print_cr("mach:");
1804 mach->dump(1);
1805 }
1806 assert(m->adr_type() == mach_at, "matcher should not change adr type");
1807 }
1808 #endif
1809 }
1810
1811 // If the _leaf is an AddP, insert the base edge
1812 if (leaf->is_AddP()) {
1813 mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
1814 }
1815
1816 uint number_of_projections_prior = number_of_projections();
1817
1818 // Perform any 1-to-many expansions required
1819 MachNode *ex = mach->Expand(s, _projection_list, mem);
1820 if (ex != mach) {
1821 assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
1822 if( ex->in(1)->is_Con() )
1823 ex->in(1)->set_req(0, C->root());
1824 // Remove old node from the graph
1825 for( uint i=0; i<mach->req(); i++ ) {
1826 mach->set_req(i,nullptr);
1827 }
1828 NOT_PRODUCT(record_new2old(ex, s->_leaf);)
1829 }
1830
1831 // PhaseChaitin::fixup_spills will sometimes generate spill code
1832 // via the matcher. By the time, nodes have been wired into the CFG,
1833 // and any further nodes generated by expand rules will be left hanging
1834 // in space, and will not get emitted as output code. Catch this.
1835 // Also, catch any new register allocation constraints ("projections")
1836 // generated belatedly during spill code generation.
1837 if (_allocation_started) {
1838 guarantee(ex == mach, "no expand rules during spill generation");
1839 guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation");
1840 }
1841
1842 if (leaf->is_Con() || leaf->is_DecodeNarrowPtr()) {
1843 // Record the con for sharing
1844 _shared_nodes.map(leaf->_idx, ex);
1845 }
1846
1847 // Have mach nodes inherit GC barrier data
1848 mach->set_barrier_data(MemNode::barrier_data(leaf));
1849
1850 return ex;
1851 }
1852
1853 void Matcher::handle_precedence_edges(Node* n, MachNode *mach) {
1854 for (uint i = n->req(); i < n->len(); i++) {
1855 if (n->in(i) != nullptr) {
1856 mach->add_prec(n->in(i));
1857 }
1858 }
1859 }
1860
1861 void Matcher::ReduceInst_Chain_Rule(State* s, int rule, Node* &mem, MachNode* mach) {
1862 // 'op' is what I am expecting to receive
1863 int op = _leftOp[rule];
1864 // Operand type to catch childs result
1865 // This is what my child will give me.
1866 unsigned int opnd_class_instance = s->rule(op);
1867 // Choose between operand class or not.
1868 // This is what I will receive.
1869 int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op;
1870 // New rule for child. Chase operand classes to get the actual rule.
1871 unsigned int newrule = s->rule(catch_op);
1872
1873 if (newrule < NUM_OPERANDS) {
1874 // Chain from operand or operand class, may be output of shared node
1875 assert(opnd_class_instance < NUM_OPERANDS, "Bad AD file: Instruction chain rule must chain from operand");
1876 // Insert operand into array of operands for this instruction
1877 mach->_opnds[1] = s->MachOperGenerator(opnd_class_instance);
1878
1879 ReduceOper(s, newrule, mem, mach);
1880 } else {
1881 // Chain from the result of an instruction
1882 assert(newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
1883 mach->_opnds[1] = s->MachOperGenerator(_reduceOp[catch_op]);
1884 Node *mem1 = (Node*)1;
1885 DEBUG_ONLY(Node *save_mem_node = _mem_node;)
1886 mach->add_req( ReduceInst(s, newrule, mem1) );
1887 DEBUG_ONLY(_mem_node = save_mem_node;)
1888 }
1889 return;
1890 }
1891
1892
1893 uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) {
1894 handle_precedence_edges(s->_leaf, mach);
1895
1896 if( s->_leaf->is_Load() ) {
1897 Node *mem2 = s->_leaf->in(MemNode::Memory);
1898 assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" );
1899 DEBUG_ONLY( if( mem == (Node*)1 ) _mem_node = s->_leaf;)
1900 mem = mem2;
1901 }
1902 if( s->_leaf->in(0) != nullptr && s->_leaf->req() > 1) {
1903 if( mach->in(0) == nullptr )
1904 mach->set_req(0, s->_leaf->in(0));
1905 }
1906
1907 // Now recursively walk the state tree & add operand list.
1908 for( uint i=0; i<2; i++ ) { // binary tree
1909 State *newstate = s->_kids[i];
1910 if( newstate == nullptr ) break; // Might only have 1 child
1911 // 'op' is what I am expecting to receive
1912 int op;
1913 if( i == 0 ) {
1914 op = _leftOp[rule];
1915 } else {
1916 op = _rightOp[rule];
1917 }
1918 // Operand type to catch childs result
1919 // This is what my child will give me.
1920 int opnd_class_instance = newstate->rule(op);
1921 // Choose between operand class or not.
1922 // This is what I will receive.
1923 int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op;
1924 // New rule for child. Chase operand classes to get the actual rule.
1925 int newrule = newstate->rule(catch_op);
1926
1927 if (newrule < NUM_OPERANDS) { // Operand/operandClass or internalOp/instruction?
1928 // Operand/operandClass
1929 // Insert operand into array of operands for this instruction
1930 mach->_opnds[num_opnds++] = newstate->MachOperGenerator(opnd_class_instance);
1931 ReduceOper(newstate, newrule, mem, mach);
1932
1933 } else { // Child is internal operand or new instruction
1934 if (newrule < _LAST_MACH_OPER) { // internal operand or instruction?
1935 // internal operand --> call ReduceInst_Interior
1936 // Interior of complex instruction. Do nothing but recurse.
1937 num_opnds = ReduceInst_Interior(newstate, newrule, mem, mach, num_opnds);
1938 } else {
1939 // instruction --> call build operand( ) to catch result
1940 // --> ReduceInst( newrule )
1941 mach->_opnds[num_opnds++] = s->MachOperGenerator(_reduceOp[catch_op]);
1942 Node *mem1 = (Node*)1;
1943 DEBUG_ONLY(Node *save_mem_node = _mem_node;)
1944 mach->add_req( ReduceInst( newstate, newrule, mem1 ) );
1945 DEBUG_ONLY(_mem_node = save_mem_node;)
1946 }
1947 }
1948 assert( mach->_opnds[num_opnds-1], "" );
1949 }
1950 return num_opnds;
1951 }
1952
1953 // This routine walks the interior of possible complex operands.
1954 // At each point we check our children in the match tree:
1955 // (1) No children -
1956 // We are a leaf; add _leaf field as an input to the MachNode
1957 // (2) Child is an internal operand -
1958 // Skip over it ( do nothing )
1959 // (3) Child is an instruction -
1960 // Call ReduceInst recursively and
1961 // and instruction as an input to the MachNode
1962 void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) {
1963 assert( rule < _LAST_MACH_OPER, "called with operand rule" );
1964 State *kid = s->_kids[0];
1965 assert( kid == nullptr || s->_leaf->in(0) == nullptr, "internal operands have no control" );
1966
1967 // Leaf? And not subsumed?
1968 if( kid == nullptr && !_swallowed[rule] ) {
1969 mach->add_req( s->_leaf ); // Add leaf pointer
1970 return; // Bail out
1971 }
1972
1973 if( s->_leaf->is_Load() ) {
1974 assert( mem == (Node*)1, "multiple Memories being matched at once?" );
1975 mem = s->_leaf->in(MemNode::Memory);
1976 DEBUG_ONLY(_mem_node = s->_leaf;)
1977 }
1978
1979 handle_precedence_edges(s->_leaf, mach);
1980
1981 if( s->_leaf->in(0) && s->_leaf->req() > 1) {
1982 if( !mach->in(0) )
1983 mach->set_req(0,s->_leaf->in(0));
1984 else {
1985 assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" );
1986 }
1987 }
1988
1989 for (uint i = 0; kid != nullptr && i < 2; kid = s->_kids[1], i++) { // binary tree
1990 int newrule;
1991 if( i == 0) {
1992 newrule = kid->rule(_leftOp[rule]);
1993 } else {
1994 newrule = kid->rule(_rightOp[rule]);
1995 }
1996
1997 if (newrule < _LAST_MACH_OPER) { // Operand or instruction?
1998 // Internal operand; recurse but do nothing else
1999 ReduceOper(kid, newrule, mem, mach);
2000
2001 } else { // Child is a new instruction
2002 // Reduce the instruction, and add a direct pointer from this
2003 // machine instruction to the newly reduced one.
2004 Node *mem1 = (Node*)1;
2005 DEBUG_ONLY(Node *save_mem_node = _mem_node;)
2006 mach->add_req( ReduceInst( kid, newrule, mem1 ) );
2007 DEBUG_ONLY(_mem_node = save_mem_node;)
2008 }
2009 }
2010 }
2011
2012
2013 // -------------------------------------------------------------------------
2014 // Java-Java calling convention
2015 // (what you use when Java calls Java)
2016
2017 //------------------------------find_receiver----------------------------------
2018 // For a given signature, return the OptoReg for parameter 0.
2019 OptoReg::Name Matcher::find_receiver() {
2020 VMRegPair regs;
2021 BasicType sig_bt = T_OBJECT;
2022 SharedRuntime::java_calling_convention(&sig_bt, ®s, 1);
2023 // Return argument 0 register. In the LP64 build pointers
2024 // take 2 registers, but the VM wants only the 'main' name.
2025 return OptoReg::as_OptoReg(regs.first());
2026 }
2027
2028 bool Matcher::is_vshift_con_pattern(Node* n, Node* m) {
2029 if (n != nullptr && m != nullptr) {
2030 return VectorNode::is_vector_shift(n) &&
2031 VectorNode::is_vector_shift_count(m) && m->in(1)->is_Con();
2032 }
2033 return false;
2034 }
2035
2036 bool Matcher::clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2037 // Must clone all producers of flags, or we will not match correctly.
2038 // Suppose a compare setting int-flags is shared (e.g., a switch-tree)
2039 // then it will match into an ideal Op_RegFlags. Alas, the fp-flags
2040 // are also there, so we may match a float-branch to int-flags and
2041 // expect the allocator to haul the flags from the int-side to the
2042 // fp-side. No can do.
2043 if (_must_clone[m->Opcode()]) {
2044 mstack.push(m, Visit);
2045 return true;
2046 }
2047 return pd_clone_node(n, m, mstack);
2048 }
2049
2050 bool Matcher::clone_base_plus_offset_address(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2051 Node *off = m->in(AddPNode::Offset);
2052 if (off->is_Con()) {
2053 address_visited.test_set(m->_idx); // Flag as address_visited
2054 mstack.push(m->in(AddPNode::Address), Pre_Visit);
2055 // Clone X+offset as it also folds into most addressing expressions
2056 mstack.push(off, Visit);
2057 mstack.push(m->in(AddPNode::Base), Pre_Visit);
2058 return true;
2059 }
2060 return false;
2061 }
2062
2063 // A method-klass-holder may be passed in the inline_cache_reg
2064 // and then expanded into the inline_cache_reg and a method_ptr register
2065 // defined in ad_<arch>.cpp
2066
2067 //------------------------------find_shared------------------------------------
2068 // Set bits if Node is shared or otherwise a root
2069 void Matcher::find_shared(Node* n) {
2070 // Allocate stack of size C->live_nodes() * 2 to avoid frequent realloc
2071 MStack mstack(C->live_nodes() * 2);
2072 // Mark nodes as address_visited if they are inputs to an address expression
2073 VectorSet address_visited;
2074 mstack.push(n, Visit); // Don't need to pre-visit root node
2075 while (mstack.is_nonempty()) {
2076 n = mstack.node(); // Leave node on stack
2077 Node_State nstate = mstack.state();
2078 uint nop = n->Opcode();
2079 if (nstate == Pre_Visit) {
2080 if (address_visited.test(n->_idx)) { // Visited in address already?
2081 // Flag as visited and shared now.
2082 set_visited(n);
2083 }
2084 if (is_visited(n)) { // Visited already?
2085 // Node is shared and has no reason to clone. Flag it as shared.
2086 // This causes it to match into a register for the sharing.
2087 set_shared(n); // Flag as shared and
2088 if (n->is_DecodeNarrowPtr()) {
2089 // Oop field/array element loads must be shared but since
2090 // they are shared through a DecodeN they may appear to have
2091 // a single use so force sharing here.
2092 set_shared(n->in(1));
2093 }
2094 mstack.pop(); // remove node from stack
2095 continue;
2096 }
2097 nstate = Visit; // Not already visited; so visit now
2098 }
2099 if (nstate == Visit) {
2100 mstack.set_state(Post_Visit);
2101 set_visited(n); // Flag as visited now
2102 bool mem_op = false;
2103 int mem_addr_idx = MemNode::Address;
2104 if (find_shared_visit(mstack, n, nop, mem_op, mem_addr_idx)) {
2105 continue;
2106 }
2107 for (int i = n->len() - 1; i >= 0; --i) { // For my children
2108 Node* m = n->in(i); // Get ith input
2109 if (m == nullptr) {
2110 continue; // Ignore nulls
2111 }
2112 if (clone_node(n, m, mstack)) {
2113 continue;
2114 }
2115
2116 // Clone addressing expressions as they are "free" in memory access instructions
2117 if (mem_op && i == mem_addr_idx && m->is_AddP() &&
2118 // When there are other uses besides address expressions
2119 // put it on stack and mark as shared.
2120 !is_visited(m)) {
2121 // Some inputs for address expression are not put on stack
2122 // to avoid marking them as shared and forcing them into register
2123 // if they are used only in address expressions.
2124 // But they should be marked as shared if there are other uses
2125 // besides address expressions.
2126
2127 if (pd_clone_address_expressions(m->as_AddP(), mstack, address_visited)) {
2128 continue;
2129 }
2130 } // if( mem_op &&
2131 mstack.push(m, Pre_Visit);
2132 } // for(int i = ...)
2133 }
2134 else if (nstate == Alt_Post_Visit) {
2135 mstack.pop(); // Remove node from stack
2136 // We cannot remove the Cmp input from the Bool here, as the Bool may be
2137 // shared and all users of the Bool need to move the Cmp in parallel.
2138 // This leaves both the Bool and the If pointing at the Cmp. To
2139 // prevent the Matcher from trying to Match the Cmp along both paths
2140 // BoolNode::match_edge always returns a zero.
2141
2142 // We reorder the Op_If in a pre-order manner, so we can visit without
2143 // accidentally sharing the Cmp (the Bool and the If make 2 users).
2144 n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool
2145 }
2146 else if (nstate == Post_Visit) {
2147 mstack.pop(); // Remove node from stack
2148
2149 // Now hack a few special opcodes
2150 uint opcode = n->Opcode();
2151 bool gc_handled = BarrierSet::barrier_set()->barrier_set_c2()->matcher_find_shared_post_visit(this, n, opcode);
2152 if (!gc_handled) {
2153 find_shared_post_visit(n, opcode);
2154 }
2155 }
2156 else {
2157 ShouldNotReachHere();
2158 }
2159 } // end of while (mstack.is_nonempty())
2160 }
2161
2162 bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) {
2163 switch(opcode) { // Handle some opcodes special
2164 case Op_Phi: // Treat Phis as shared roots
2165 case Op_Parm:
2166 case Op_Proj: // All handled specially during matching
2167 case Op_SafePointScalarObject:
2168 set_shared(n);
2169 set_dontcare(n);
2170 break;
2171 case Op_If:
2172 case Op_CountedLoopEnd:
2173 mstack.set_state(Alt_Post_Visit); // Alternative way
2174 // Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)). Helps
2175 // with matching cmp/branch in 1 instruction. The Matcher needs the
2176 // Bool and CmpX side-by-side, because it can only get at constants
2177 // that are at the leaves of Match trees, and the Bool's condition acts
2178 // as a constant here.
2179 mstack.push(n->in(1), Visit); // Clone the Bool
2180 mstack.push(n->in(0), Pre_Visit); // Visit control input
2181 return true; // while (mstack.is_nonempty())
2182 case Op_ConvI2D: // These forms efficiently match with a prior
2183 case Op_ConvI2F: // Load but not a following Store
2184 if( n->in(1)->is_Load() && // Prior load
2185 n->outcnt() == 1 && // Not already shared
2186 n->unique_out()->is_Store() ) // Following store
2187 set_shared(n); // Force it to be a root
2188 break;
2189 case Op_ReverseBytesI:
2190 case Op_ReverseBytesL:
2191 if( n->in(1)->is_Load() && // Prior load
2192 n->outcnt() == 1 ) // Not already shared
2193 set_shared(n); // Force it to be a root
2194 break;
2195 case Op_BoxLock: // Can't match until we get stack-regs in ADLC
2196 case Op_IfFalse:
2197 case Op_IfTrue:
2198 case Op_MachProj:
2199 case Op_MergeMem:
2200 case Op_Catch:
2201 case Op_CatchProj:
2202 case Op_CProj:
2203 case Op_JumpProj:
2204 case Op_JProj:
2205 case Op_NeverBranch:
2206 set_dontcare(n);
2207 break;
2208 case Op_Jump:
2209 mstack.push(n->in(1), Pre_Visit); // Switch Value (could be shared)
2210 mstack.push(n->in(0), Pre_Visit); // Visit Control input
2211 return true; // while (mstack.is_nonempty())
2212 case Op_StrComp:
2213 case Op_StrEquals:
2214 case Op_StrIndexOf:
2215 case Op_StrIndexOfChar:
2216 case Op_AryEq:
2217 case Op_VectorizedHashCode:
2218 case Op_CountPositives:
2219 case Op_StrInflatedCopy:
2220 case Op_StrCompressedCopy:
2221 case Op_EncodeISOArray:
2222 case Op_FmaD:
2223 case Op_FmaF:
2224 case Op_FmaHF:
2225 case Op_FmaVD:
2226 case Op_FmaVF:
2227 case Op_FmaVHF:
2228 case Op_MacroLogicV:
2229 case Op_VectorCmpMasked:
2230 case Op_CompressV:
2231 case Op_CompressM:
2232 case Op_ExpandV:
2233 case Op_VectorLoadMask:
2234 set_shared(n); // Force result into register (it will be anyways)
2235 break;
2236 case Op_ConP: { // Convert pointers above the centerline to NUL
2237 TypeNode *tn = n->as_Type(); // Constants derive from type nodes
2238 const TypePtr* tp = tn->type()->is_ptr();
2239 if (tp->_ptr == TypePtr::AnyNull) {
2240 tn->set_type(TypePtr::NULL_PTR);
2241 }
2242 break;
2243 }
2244 case Op_ConN: { // Convert narrow pointers above the centerline to NUL
2245 TypeNode *tn = n->as_Type(); // Constants derive from type nodes
2246 const TypePtr* tp = tn->type()->make_ptr();
2247 if (tp && tp->_ptr == TypePtr::AnyNull) {
2248 tn->set_type(TypeNarrowOop::NULL_PTR);
2249 }
2250 break;
2251 }
2252 case Op_Binary: // These are introduced in the Post_Visit state.
2253 ShouldNotReachHere();
2254 break;
2255 case Op_ClearArray:
2256 case Op_SafePoint:
2257 mem_op = true;
2258 break;
2259 default:
2260 if( n->is_Store() ) {
2261 // Do match stores, despite no ideal reg
2262 mem_op = true;
2263 break;
2264 }
2265 if( n->is_Mem() ) { // Loads and LoadStores
2266 mem_op = true;
2267 // Loads must be root of match tree due to prior load conflict
2268 if( C->subsume_loads() == false )
2269 set_shared(n);
2270 }
2271 // Fall into default case
2272 if( !n->ideal_reg() )
2273 set_dontcare(n); // Unmatchable Nodes
2274 } // end_switch
2275 return false;
2276 }
2277
2278 void Matcher::find_shared_post_visit(Node* n, uint opcode) {
2279 if (n->is_predicated_vector()) {
2280 // Restructure into binary trees for Matching.
2281 if (n->req() == 4) {
2282 n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
2283 n->set_req(2, n->in(3));
2284 n->del_req(3);
2285 } else if (n->req() == 5) {
2286 n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
2287 n->set_req(2, new BinaryNode(n->in(3), n->in(4)));
2288 n->del_req(4);
2289 n->del_req(3);
2290 } else if (n->req() == 6) {
2291 Node* b3 = new BinaryNode(n->in(4), n->in(5));
2292 Node* b2 = new BinaryNode(n->in(3), b3);
2293 Node* b1 = new BinaryNode(n->in(2), b2);
2294 n->set_req(2, b1);
2295 n->del_req(5);
2296 n->del_req(4);
2297 n->del_req(3);
2298 }
2299 return;
2300 }
2301
2302 switch(opcode) { // Handle some opcodes special
2303 case Op_CompareAndExchangeB:
2304 case Op_CompareAndExchangeS:
2305 case Op_CompareAndExchangeI:
2306 case Op_CompareAndExchangeL:
2307 case Op_CompareAndExchangeP:
2308 case Op_CompareAndExchangeN:
2309 case Op_WeakCompareAndSwapB:
2310 case Op_WeakCompareAndSwapS:
2311 case Op_WeakCompareAndSwapI:
2312 case Op_WeakCompareAndSwapL:
2313 case Op_WeakCompareAndSwapP:
2314 case Op_WeakCompareAndSwapN:
2315 case Op_CompareAndSwapB:
2316 case Op_CompareAndSwapS:
2317 case Op_CompareAndSwapI:
2318 case Op_CompareAndSwapL:
2319 case Op_CompareAndSwapP:
2320 case Op_CompareAndSwapN: { // Convert trinary to binary-tree
2321 Node* newval = n->in(MemNode::ValueIn);
2322 Node* oldval = n->in(LoadStoreConditionalNode::ExpectedIn);
2323 Node* pair = new BinaryNode(oldval, newval);
2324 n->set_req(MemNode::ValueIn, pair);
2325 n->del_req(LoadStoreConditionalNode::ExpectedIn);
2326 break;
2327 }
2328 case Op_CMoveD: // Convert trinary to binary-tree
2329 case Op_CMoveF:
2330 case Op_CMoveI:
2331 case Op_CMoveL:
2332 case Op_CMoveN:
2333 case Op_CMoveP: {
2334 // Restructure into a binary tree for Matching. It's possible that
2335 // we could move this code up next to the graph reshaping for IfNodes
2336 // or vice-versa, but I do not want to debug this for Ladybird.
2337 // 10/2/2000 CNC.
2338 Node* pair1 = new BinaryNode(n->in(1), n->in(1)->in(1));
2339 n->set_req(1, pair1);
2340 Node* pair2 = new BinaryNode(n->in(2), n->in(3));
2341 n->set_req(2, pair2);
2342 n->del_req(3);
2343 break;
2344 }
2345 case Op_MacroLogicV: {
2346 Node* pair1 = new BinaryNode(n->in(1), n->in(2));
2347 Node* pair2 = new BinaryNode(n->in(3), n->in(4));
2348 n->set_req(1, pair1);
2349 n->set_req(2, pair2);
2350 n->del_req(4);
2351 n->del_req(3);
2352 break;
2353 }
2354 case Op_StoreVectorMasked: {
2355 Node* pair = new BinaryNode(n->in(3), n->in(4));
2356 n->set_req(3, pair);
2357 n->del_req(4);
2358 break;
2359 }
2360 case Op_SelectFromTwoVector:
2361 case Op_LoopLimit: {
2362 Node* pair1 = new BinaryNode(n->in(1), n->in(2));
2363 n->set_req(1, pair1);
2364 n->set_req(2, n->in(3));
2365 n->del_req(3);
2366 break;
2367 }
2368 case Op_StrEquals:
2369 case Op_StrIndexOfChar: {
2370 Node* pair1 = new BinaryNode(n->in(2), n->in(3));
2371 n->set_req(2, pair1);
2372 n->set_req(3, n->in(4));
2373 n->del_req(4);
2374 break;
2375 }
2376 case Op_StrComp:
2377 case Op_StrIndexOf:
2378 case Op_VectorizedHashCode: {
2379 Node* pair1 = new BinaryNode(n->in(2), n->in(3));
2380 n->set_req(2, pair1);
2381 Node* pair2 = new BinaryNode(n->in(4),n->in(5));
2382 n->set_req(3, pair2);
2383 n->del_req(5);
2384 n->del_req(4);
2385 break;
2386 }
2387 case Op_EncodeISOArray:
2388 case Op_StrCompressedCopy:
2389 case Op_StrInflatedCopy: {
2390 // Restructure into a binary tree for Matching.
2391 Node* pair = new BinaryNode(n->in(3), n->in(4));
2392 n->set_req(3, pair);
2393 n->del_req(4);
2394 break;
2395 }
2396 case Op_FmaD:
2397 case Op_FmaF:
2398 case Op_FmaHF:
2399 case Op_FmaVD:
2400 case Op_FmaVF:
2401 case Op_FmaVHF: {
2402 // Restructure into a binary tree for Matching.
2403 Node* pair = new BinaryNode(n->in(1), n->in(2));
2404 n->set_req(2, pair);
2405 n->set_req(1, n->in(3));
2406 n->del_req(3);
2407 break;
2408 }
2409 case Op_MulAddS2I: {
2410 Node* pair1 = new BinaryNode(n->in(1), n->in(2));
2411 Node* pair2 = new BinaryNode(n->in(3), n->in(4));
2412 n->set_req(1, pair1);
2413 n->set_req(2, pair2);
2414 n->del_req(4);
2415 n->del_req(3);
2416 break;
2417 }
2418 case Op_ClearArray: {
2419 Node* pair = new BinaryNode(n->in(2), n->in(3));
2420 n->set_req(2, pair);
2421 n->set_req(3, n->in(4));
2422 n->del_req(4);
2423 break;
2424 }
2425 case Op_VectorCmpMasked:
2426 case Op_CopySignD:
2427 case Op_SignumVF:
2428 case Op_SignumVD:
2429 case Op_SignumF:
2430 case Op_SignumD: {
2431 Node* pair = new BinaryNode(n->in(2), n->in(3));
2432 n->set_req(2, pair);
2433 n->del_req(3);
2434 break;
2435 }
2436 case Op_VectorBlend:
2437 case Op_VectorInsert: {
2438 Node* pair = new BinaryNode(n->in(1), n->in(2));
2439 n->set_req(1, pair);
2440 n->set_req(2, n->in(3));
2441 n->del_req(3);
2442 break;
2443 }
2444 case Op_LoadVectorGatherMasked: // fall-through
2445 case Op_StoreVectorScatter: {
2446 Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1));
2447 n->set_req(MemNode::ValueIn, pair);
2448 n->del_req(MemNode::ValueIn+1);
2449 break;
2450 }
2451 case Op_StoreVectorScatterMasked: {
2452 Node* pair = new BinaryNode(n->in(MemNode::ValueIn+1), n->in(MemNode::ValueIn+2));
2453 n->set_req(MemNode::ValueIn+1, pair);
2454 n->del_req(MemNode::ValueIn+2);
2455 pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1));
2456 n->set_req(MemNode::ValueIn, pair);
2457 n->del_req(MemNode::ValueIn+1);
2458 break;
2459 }
2460 case Op_VectorMaskCmp: {
2461 n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
2462 n->set_req(2, n->in(3));
2463 n->del_req(3);
2464 break;
2465 }
2466 case Op_PartialSubtypeCheck: {
2467 if (UseSecondarySupersTable && n->in(2)->is_Con()) {
2468 // PartialSubtypeCheck uses both constant and register operands for superclass input.
2469 n->set_req(2, new BinaryNode(n->in(2), n->in(2)));
2470 break;
2471 }
2472 break;
2473 }
2474 case Op_StoreLSpecial: {
2475 if (n->req() > (MemNode::ValueIn + 1) && n->in(MemNode::ValueIn + 1) != nullptr) {
2476 Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn + 1));
2477 n->set_req(MemNode::ValueIn, pair);
2478 n->del_req(MemNode::ValueIn + 1);
2479 }
2480 break;
2481 }
2482 default:
2483 break;
2484 }
2485 }
2486
2487 #ifndef PRODUCT
2488 void Matcher::record_new2old(Node* newn, Node* old) {
2489 _new2old_map.map(newn->_idx, old);
2490 if (!_reused.test_set(old->_igv_idx)) {
2491 // Reuse the Ideal-level IGV identifier so that the node can be tracked
2492 // across matching. If there are multiple machine nodes expanded from the
2493 // same Ideal node, only one will reuse its IGV identifier.
2494 newn->_igv_idx = old->_igv_idx;
2495 }
2496 }
2497
2498 // machine-independent root to machine-dependent root
2499 void Matcher::dump_old2new_map() {
2500 _old2new_map.dump();
2501 }
2502 #endif // !PRODUCT
2503
2504 //---------------------------collect_null_checks-------------------------------
2505 // Find null checks in the ideal graph; write a machine-specific node for
2506 // it. Used by later implicit-null-check handling. Actually collects
2507 // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
2508 // value being tested.
2509 void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
2510 Node *iff = proj->in(0);
2511 if( iff->Opcode() == Op_If ) {
2512 // During matching If's have Bool & Cmp side-by-side
2513 BoolNode *b = iff->in(1)->as_Bool();
2514 Node *cmp = iff->in(2);
2515 int opc = cmp->Opcode();
2516 if (opc != Op_CmpP && opc != Op_CmpN) return;
2517
2518 const Type* ct = cmp->in(2)->bottom_type();
2519 if (ct == TypePtr::NULL_PTR ||
2520 (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) {
2521
2522 bool push_it = false;
2523 if( proj->Opcode() == Op_IfTrue ) {
2524 #ifndef PRODUCT
2525 extern uint all_null_checks_found;
2526 all_null_checks_found++;
2527 #endif
2528 if( b->_test._test == BoolTest::ne ) {
2529 push_it = true;
2530 }
2531 } else {
2532 assert( proj->Opcode() == Op_IfFalse, "" );
2533 if( b->_test._test == BoolTest::eq ) {
2534 push_it = true;
2535 }
2536 }
2537 if( push_it ) {
2538 _null_check_tests.push(proj);
2539 Node* val = cmp->in(1);
2540 #ifdef _LP64
2541 if (val->bottom_type()->isa_narrowoop() &&
2542 !Matcher::narrow_oop_use_complex_address()) {
2543 //
2544 // Look for DecodeN node which should be pinned to orig_proj.
2545 // On platforms (Sparc) which can not handle 2 adds
2546 // in addressing mode we have to keep a DecodeN node and
2547 // use it to do implicit null check in address.
2548 //
2549 // DecodeN node was pinned to non-null path (orig_proj) during
2550 // CastPP transformation in final_graph_reshaping_impl().
2551 //
2552 uint cnt = orig_proj->outcnt();
2553 for (uint i = 0; i < orig_proj->outcnt(); i++) {
2554 Node* d = orig_proj->raw_out(i);
2555 if (d->is_DecodeN() && d->in(1) == val) {
2556 val = d;
2557 val->set_req(0, nullptr); // Unpin now.
2558 // Mark this as special case to distinguish from
2559 // a regular case: CmpP(DecodeN, null).
2560 val = (Node*)(((intptr_t)val) | 1);
2561 break;
2562 }
2563 }
2564 }
2565 #endif
2566 _null_check_tests.push(val);
2567 }
2568 }
2569 }
2570 }
2571
2572 //---------------------------validate_null_checks------------------------------
2573 // Its possible that the value being null checked is not the root of a match
2574 // tree. If so, I cannot use the value in an implicit null check.
2575 void Matcher::validate_null_checks( ) {
2576 uint cnt = _null_check_tests.size();
2577 for( uint i=0; i < cnt; i+=2 ) {
2578 Node *test = _null_check_tests[i];
2579 Node *val = _null_check_tests[i+1];
2580 bool is_decoden = ((intptr_t)val) & 1;
2581 val = (Node*)(((intptr_t)val) & ~1);
2582 if (has_new_node(val)) {
2583 Node* new_val = new_node(val);
2584 if (is_decoden) {
2585 assert(val->is_DecodeNarrowPtr() && val->in(0) == nullptr, "sanity");
2586 // Note: new_val may have a control edge if
2587 // the original ideal node DecodeN was matched before
2588 // it was unpinned in Matcher::collect_null_checks().
2589 // Unpin the mach node and mark it.
2590 new_val->set_req(0, nullptr);
2591 new_val = (Node*)(((intptr_t)new_val) | 1);
2592 }
2593 // Is a match-tree root, so replace with the matched value
2594 _null_check_tests.map(i+1, new_val);
2595 } else {
2596 // Yank from candidate list
2597 _null_check_tests.map(i+1,_null_check_tests[--cnt]);
2598 _null_check_tests.map(i,_null_check_tests[--cnt]);
2599 _null_check_tests.pop();
2600 _null_check_tests.pop();
2601 i-=2;
2602 }
2603 }
2604 }
2605
2606 bool Matcher::gen_narrow_oop_implicit_null_checks() {
2607 // Advice matcher to perform null checks on the narrow oop side.
2608 // Implicit checks are not possible on the uncompressed oop side anyway
2609 // (at least not for read accesses).
2610 // Performs significantly better (especially on Power 6).
2611 if (!os::zero_page_read_protected()) {
2612 return true;
2613 }
2614 return CompressedOops::use_implicit_null_checks() &&
2615 (narrow_oop_use_complex_address() ||
2616 CompressedOops::base() != nullptr);
2617 }
2618
2619 // Compute RegMask for an ideal register.
2620 const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) {
2621 assert(!C->failing_internal() || C->failure_is_artificial(), "already failing.");
2622 if (C->failing()) {
2623 return nullptr;
2624 }
2625 const Type* t = Type::mreg2type[ideal_reg];
2626 if (t == nullptr) {
2627 assert(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg);
2628 return nullptr; // not supported
2629 }
2630 Node* fp = ret->in(TypeFunc::FramePtr);
2631 Node* mem = ret->in(TypeFunc::Memory);
2632 const TypePtr* atp = TypePtr::BOTTOM;
2633 MemNode::MemOrd mo = MemNode::unordered;
2634
2635 Node* spill;
2636 switch (ideal_reg) {
2637 case Op_RegN: spill = new LoadNNode(nullptr, mem, fp, atp, t->is_narrowoop(), mo); break;
2638 case Op_RegI: spill = new LoadINode(nullptr, mem, fp, atp, t->is_int(), mo); break;
2639 case Op_RegP: spill = new LoadPNode(nullptr, mem, fp, atp, t->is_ptr(), mo); break;
2640 case Op_RegF: spill = new LoadFNode(nullptr, mem, fp, atp, t, mo); break;
2641 case Op_RegD: spill = new LoadDNode(nullptr, mem, fp, atp, t, mo); break;
2642 case Op_RegL: spill = new LoadLNode(nullptr, mem, fp, atp, t->is_long(), mo); break;
2643
2644 case Op_VecA: // fall-through
2645 case Op_VecS: // fall-through
2646 case Op_VecD: // fall-through
2647 case Op_VecX: // fall-through
2648 case Op_VecY: // fall-through
2649 case Op_VecZ: spill = new LoadVectorNode(nullptr, mem, fp, atp, t->is_vect()); break;
2650 case Op_RegVectMask: return Matcher::predicate_reg_mask();
2651
2652 default: ShouldNotReachHere();
2653 }
2654 MachNode* mspill = match_tree(spill);
2655 assert(mspill != nullptr || C->failure_is_artificial(), "matching failed: %d", ideal_reg);
2656 if (C->failing()) {
2657 return nullptr;
2658 }
2659 // Handle generic vector operand case
2660 if (Matcher::supports_generic_vector_operands && t->isa_vect()) {
2661 specialize_mach_node(mspill);
2662 }
2663 return &mspill->out_RegMask();
2664 }
2665
2666 // Process Mach IR right after selection phase is over.
2667 void Matcher::do_postselect_cleanup() {
2668 if (supports_generic_vector_operands) {
2669 specialize_generic_vector_operands();
2670 if (C->failing()) return;
2671 }
2672 }
2673
2674 //----------------------------------------------------------------------
2675 // Generic machine operands elision.
2676 //----------------------------------------------------------------------
2677
2678 // Compute concrete vector operand for a generic TEMP vector mach node based on its user info.
2679 void Matcher::specialize_temp_node(MachTempNode* tmp, MachNode* use, uint idx) {
2680 assert(use->in(idx) == tmp, "not a user");
2681 assert(!Matcher::is_generic_vector(use->_opnds[0]), "use not processed yet");
2682
2683 if ((uint)idx == use->two_adr()) { // DEF_TEMP case
2684 tmp->_opnds[0] = use->_opnds[0]->clone();
2685 } else {
2686 uint ideal_vreg = vector_ideal_reg(C->max_vector_size());
2687 tmp->_opnds[0] = Matcher::pd_specialize_generic_vector_operand(tmp->_opnds[0], ideal_vreg, true /*is_temp*/);
2688 }
2689 }
2690
2691 // Compute concrete vector operand for a generic DEF/USE vector operand (of mach node m at index idx).
2692 MachOper* Matcher::specialize_vector_operand(MachNode* m, uint opnd_idx) {
2693 assert(Matcher::is_generic_vector(m->_opnds[opnd_idx]), "repeated updates");
2694 Node* def = nullptr;
2695 if (opnd_idx == 0) { // DEF
2696 def = m; // use mach node itself to compute vector operand type
2697 } else {
2698 int base_idx = m->operand_index(opnd_idx);
2699 def = m->in(base_idx);
2700 if (def->is_Mach()) {
2701 if (def->is_MachTemp() && Matcher::is_generic_vector(def->as_Mach()->_opnds[0])) {
2702 specialize_temp_node(def->as_MachTemp(), m, base_idx); // MachTemp node use site
2703 } else if (is_reg2reg_move(def->as_Mach())) {
2704 def = def->in(1); // skip over generic reg-to-reg moves
2705 }
2706 }
2707 }
2708 assert(def->bottom_type()->isa_vect(), "not a vector");
2709 uint ideal_vreg = def->bottom_type()->ideal_reg();
2710 return Matcher::pd_specialize_generic_vector_operand(m->_opnds[opnd_idx], ideal_vreg, false /*is_temp*/);
2711 }
2712
2713 void Matcher::specialize_mach_node(MachNode* m) {
2714 assert(!m->is_MachTemp(), "processed along with its user");
2715 // For generic use operands pull specific register class operands from
2716 // its def instruction's output operand (def operand).
2717 for (uint i = 0; i < m->num_opnds(); i++) {
2718 if (Matcher::is_generic_vector(m->_opnds[i])) {
2719 m->_opnds[i] = specialize_vector_operand(m, i);
2720 }
2721 }
2722 }
2723
2724 // Replace generic vector operands with concrete vector operands and eliminate generic reg-to-reg moves from the graph.
2725 void Matcher::specialize_generic_vector_operands() {
2726 assert(supports_generic_vector_operands, "sanity");
2727 ResourceMark rm;
2728
2729 // Replace generic vector operands (vec/legVec) with concrete ones (vec[SDXYZ]/legVec[SDXYZ])
2730 // and remove reg-to-reg vector moves (MoveVec2Leg and MoveLeg2Vec).
2731 Unique_Node_List live_nodes;
2732 C->identify_useful_nodes(live_nodes);
2733
2734 while (live_nodes.size() > 0) {
2735 MachNode* m = live_nodes.pop()->isa_Mach();
2736 if (m != nullptr) {
2737 if (Matcher::is_reg2reg_move(m)) {
2738 // Register allocator properly handles vec <=> leg moves using register masks.
2739 int opnd_idx = m->operand_index(1);
2740 Node* def = m->in(opnd_idx);
2741 m->subsume_by(def, C);
2742 } else if (m->is_MachTemp()) {
2743 // process MachTemp nodes at use site (see Matcher::specialize_vector_operand)
2744 } else {
2745 specialize_mach_node(m);
2746 }
2747 }
2748 }
2749 }
2750
2751 uint Matcher::vector_length(const Node* n) {
2752 const TypeVect* vt = n->bottom_type()->is_vect();
2753 return vt->length();
2754 }
2755
2756 uint Matcher::vector_length(const MachNode* use, const MachOper* opnd) {
2757 int def_idx = use->operand_index(opnd);
2758 Node* def = use->in(def_idx);
2759 return def->bottom_type()->is_vect()->length();
2760 }
2761
2762 uint Matcher::vector_length_in_bytes(const Node* n) {
2763 const TypeVect* vt = n->bottom_type()->is_vect();
2764 return vt->length_in_bytes();
2765 }
2766
2767 uint Matcher::vector_length_in_bytes(const MachNode* use, const MachOper* opnd) {
2768 uint def_idx = use->operand_index(opnd);
2769 Node* def = use->in(def_idx);
2770 return def->bottom_type()->is_vect()->length_in_bytes();
2771 }
2772
2773 BasicType Matcher::vector_element_basic_type(const Node* n) {
2774 const TypeVect* vt = n->bottom_type()->is_vect();
2775 return vt->element_basic_type();
2776 }
2777
2778 BasicType Matcher::vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
2779 int def_idx = use->operand_index(opnd);
2780 Node* def = use->in(def_idx);
2781 return def->bottom_type()->is_vect()->element_basic_type();
2782 }
2783
2784 bool Matcher::is_non_long_integral_vector(const Node* n) {
2785 BasicType bt = vector_element_basic_type(n);
2786 assert(bt != T_CHAR, "char is not allowed in vector");
2787 return is_subword_type(bt) || bt == T_INT;
2788 }
2789
2790 bool Matcher::is_encode_and_store_pattern(const Node* n, const Node* m) {
2791 if (n == nullptr ||
2792 m == nullptr ||
2793 n->Opcode() != Op_StoreN ||
2794 !m->is_EncodeP() ||
2795 n->as_Store()->barrier_data() == 0) {
2796 return false;
2797 }
2798 assert(m == n->in(MemNode::ValueIn), "m should be input to n");
2799 return true;
2800 }
2801
2802 #ifdef ASSERT
2803 bool Matcher::verify_after_postselect_cleanup() {
2804 assert(!C->failing_internal() || C->failure_is_artificial(), "sanity");
2805 if (supports_generic_vector_operands) {
2806 Unique_Node_List useful;
2807 C->identify_useful_nodes(useful);
2808 for (uint i = 0; i < useful.size(); i++) {
2809 MachNode* m = useful.at(i)->isa_Mach();
2810 if (m != nullptr) {
2811 assert(!Matcher::is_reg2reg_move(m), "no MoveVec nodes allowed");
2812 for (uint j = 0; j < m->num_opnds(); j++) {
2813 assert(!Matcher::is_generic_vector(m->_opnds[j]), "no generic vector operands allowed");
2814 }
2815 }
2816 }
2817 }
2818 return true;
2819 }
2820 #endif // ASSERT
2821
2822 // Used by the DFA in dfa_xxx.cpp. Check for a following barrier or
2823 // atomic instruction acting as a store_load barrier without any
2824 // intervening volatile load, and thus we don't need a barrier here.
2825 // We retain the Node to act as a compiler ordering barrier.
2826 bool Matcher::post_store_load_barrier(const Node* vmb) {
2827 Compile* C = Compile::current();
2828 assert(vmb->is_MemBar(), "");
2829 assert(vmb->Opcode() != Op_MemBarAcquire && vmb->Opcode() != Op_LoadFence, "");
2830 const MemBarNode* membar = vmb->as_MemBar();
2831
2832 // Get the Ideal Proj node, ctrl, that can be used to iterate forward
2833 Node* ctrl = nullptr;
2834 for (DUIterator_Fast imax, i = membar->fast_outs(imax); i < imax; i++) {
2835 Node* p = membar->fast_out(i);
2836 assert(p->is_Proj(), "only projections here");
2837 if ((p->as_Proj()->_con == TypeFunc::Control) &&
2838 !C->node_arena()->contains(p)) { // Unmatched old-space only
2839 ctrl = p;
2840 break;
2841 }
2842 }
2843 assert((ctrl != nullptr), "missing control projection");
2844
2845 for (DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++) {
2846 Node *x = ctrl->fast_out(j);
2847 int xop = x->Opcode();
2848
2849 // We don't need current barrier if we see another or a lock
2850 // before seeing volatile load.
2851 //
2852 // Op_Fastunlock previously appeared in the Op_* list below.
2853 // With the advent of 1-0 lock operations we're no longer guaranteed
2854 // that a monitor exit operation contains a serializing instruction.
2855
2856 if (xop == Op_MemBarVolatile ||
2857 xop == Op_CompareAndExchangeB ||
2858 xop == Op_CompareAndExchangeS ||
2859 xop == Op_CompareAndExchangeI ||
2860 xop == Op_CompareAndExchangeL ||
2861 xop == Op_CompareAndExchangeP ||
2862 xop == Op_CompareAndExchangeN ||
2863 xop == Op_WeakCompareAndSwapB ||
2864 xop == Op_WeakCompareAndSwapS ||
2865 xop == Op_WeakCompareAndSwapL ||
2866 xop == Op_WeakCompareAndSwapP ||
2867 xop == Op_WeakCompareAndSwapN ||
2868 xop == Op_WeakCompareAndSwapI ||
2869 xop == Op_CompareAndSwapB ||
2870 xop == Op_CompareAndSwapS ||
2871 xop == Op_CompareAndSwapL ||
2872 xop == Op_CompareAndSwapP ||
2873 xop == Op_CompareAndSwapN ||
2874 xop == Op_CompareAndSwapI ||
2875 BarrierSet::barrier_set()->barrier_set_c2()->matcher_is_store_load_barrier(x, xop)) {
2876 return true;
2877 }
2878
2879 // Op_FastLock previously appeared in the Op_* list above.
2880 if (xop == Op_FastLock) {
2881 return true;
2882 }
2883
2884 if (x->is_MemBar()) {
2885 // We must retain this membar if there is an upcoming volatile
2886 // load, which will be followed by acquire membar.
2887 if (xop == Op_MemBarAcquire || xop == Op_LoadFence) {
2888 return false;
2889 } else {
2890 // For other kinds of barriers, check by pretending we
2891 // are them, and seeing if we can be removed.
2892 return post_store_load_barrier(x->as_MemBar());
2893 }
2894 }
2895
2896 // probably not necessary to check for these
2897 if (x->is_Call() || x->is_SafePoint() || x->is_block_proj()) {
2898 return false;
2899 }
2900 }
2901 return false;
2902 }
2903
2904 // Check whether node n is a branch to an uncommon trap that we could
2905 // optimize as test with very high branch costs in case of going to
2906 // the uncommon trap. The code must be able to be recompiled to use
2907 // a cheaper test.
2908 bool Matcher::branches_to_uncommon_trap(const Node *n) {
2909 // Don't do it for natives, adapters, or runtime stubs
2910 Compile *C = Compile::current();
2911 if (!C->is_method_compilation()) return false;
2912
2913 assert(n->is_If(), "You should only call this on if nodes.");
2914 IfNode *ifn = n->as_If();
2915
2916 Node *ifFalse = nullptr;
2917 for (DUIterator_Fast imax, i = ifn->fast_outs(imax); i < imax; i++) {
2918 if (ifn->fast_out(i)->is_IfFalse()) {
2919 ifFalse = ifn->fast_out(i);
2920 break;
2921 }
2922 }
2923 assert(ifFalse, "An If should have an ifFalse. Graph is broken.");
2924
2925 Node *reg = ifFalse;
2926 int cnt = 4; // We must protect against cycles. Limit to 4 iterations.
2927 // Alternatively use visited set? Seems too expensive.
2928 while (reg != nullptr && cnt > 0) {
2929 CallNode *call = nullptr;
2930 RegionNode *nxt_reg = nullptr;
2931 for (DUIterator_Fast imax, i = reg->fast_outs(imax); i < imax; i++) {
2932 Node *o = reg->fast_out(i);
2933 if (o->is_Call()) {
2934 call = o->as_Call();
2935 }
2936 if (o->is_Region()) {
2937 nxt_reg = o->as_Region();
2938 }
2939 }
2940
2941 if (call &&
2942 call->entry_point() == OptoRuntime::uncommon_trap_blob()->entry_point()) {
2943 const Type* trtype = call->in(TypeFunc::Parms)->bottom_type();
2944 if (trtype->isa_int() && trtype->is_int()->is_con()) {
2945 jint tr_con = trtype->is_int()->get_con();
2946 Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
2947 Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
2948 assert((int)reason < (int)BitsPerInt, "recode bit map");
2949
2950 if (is_set_nth_bit(C->allowed_deopt_reasons(), (int)reason)
2951 && action != Deoptimization::Action_none) {
2952 // This uncommon trap is sure to recompile, eventually.
2953 // When that happens, C->too_many_traps will prevent
2954 // this transformation from happening again.
2955 return true;
2956 }
2957 }
2958 }
2959
2960 reg = nxt_reg;
2961 cnt--;
2962 }
2963
2964 return false;
2965 }
2966
2967 //=============================================================================
2968 //---------------------------State---------------------------------------------
2969 State::State(void) : _rule() {
2970 #ifdef ASSERT
2971 _id = 0;
2972 _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
2973 _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
2974 #endif
2975 }
2976
2977 #ifdef ASSERT
2978 State::~State() {
2979 _id = 99;
2980 _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
2981 _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
2982 memset(_cost, -3, sizeof(_cost));
2983 memset(_rule, -3, sizeof(_rule));
2984 }
2985 #endif
2986
2987 #ifndef PRODUCT
2988 //---------------------------dump----------------------------------------------
2989 void State::dump() {
2990 tty->print("\n");
2991 dump(0);
2992 }
2993
2994 void State::dump(int depth) {
2995 for (int j = 0; j < depth; j++) {
2996 tty->print(" ");
2997 }
2998 tty->print("--N: ");
2999 _leaf->dump();
3000 uint i;
3001 for (i = 0; i < _LAST_MACH_OPER; i++) {
3002 // Check for valid entry
3003 if (valid(i)) {
3004 for (int j = 0; j < depth; j++) {
3005 tty->print(" ");
3006 }
3007 assert(cost(i) != max_juint, "cost must be a valid value");
3008 assert(rule(i) < _last_Mach_Node, "rule[i] must be valid rule");
3009 tty->print_cr("%s %d %s",
3010 ruleName[i], cost(i), ruleName[rule(i)] );
3011 }
3012 }
3013 tty->cr();
3014
3015 for (i = 0; i < 2; i++) {
3016 if (_kids[i]) {
3017 _kids[i]->dump(depth + 1);
3018 }
3019 }
3020 }
3021 #endif