1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "gc/shared/barrierSet.hpp"
26 #include "gc/shared/c2/barrierSetC2.hpp"
27 #include "memory/allocation.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "oops/compressedOops.hpp"
30 #include "opto/ad.hpp"
31 #include "opto/addnode.hpp"
32 #include "opto/callnode.hpp"
33 #include "opto/idealGraphPrinter.hpp"
34 #include "opto/matcher.hpp"
35 #include "opto/memnode.hpp"
36 #include "opto/movenode.hpp"
37 #include "opto/opcodes.hpp"
38 #include "opto/regmask.hpp"
39 #include "opto/rootnode.hpp"
40 #include "opto/runtime.hpp"
41 #include "opto/type.hpp"
42 #include "opto/vectornode.hpp"
43 #include "runtime/os.inline.hpp"
44 #include "runtime/sharedRuntime.hpp"
45 #include "utilities/align.hpp"
46
47 OptoReg::Name OptoReg::c_frame_pointer;
48
49 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
50 RegMask Matcher::mreg2regmask[_last_Mach_Reg];
51 RegMask Matcher::caller_save_regmask;
52 RegMask Matcher::caller_save_regmask_exclude_soe;
53 RegMask Matcher::STACK_ONLY_mask;
54 RegMask Matcher::c_frame_ptr_mask;
55 const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE;
56 const uint Matcher::_end_rematerialize = _END_REMATERIALIZE;
57
58 //---------------------------Matcher-------------------------------------------
59 Matcher::Matcher()
60 : PhaseTransform( Phase::Ins_Select ),
61 _states_arena(Chunk::medium_size, mtCompiler, Arena::Tag::tag_states),
62 _new_nodes(C->comp_arena()),
63 _visited(&_states_arena),
64 _shared(&_states_arena),
65 _dontcare(&_states_arena),
66 _reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp),
67 _swallowed(swallowed),
68 _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
69 _end_inst_chain_rule(_END_INST_CHAIN_RULE),
70 _must_clone(must_clone),
71 _shared_nodes(C->comp_arena()),
72 #ifndef PRODUCT
73 _old2new_map(C->comp_arena()),
74 _new2old_map(C->comp_arena()),
75 _reused(C->comp_arena()),
76 #endif // !PRODUCT
77 _allocation_started(false),
78 _ruleName(ruleName),
79 _register_save_policy(register_save_policy),
80 _c_reg_save_policy(c_reg_save_policy),
81 _register_save_type(register_save_type),
82 _return_addr_mask(C->comp_arena()) {
83 C->set_matcher(this);
84
85 idealreg2spillmask [Op_RegI] = nullptr;
86 idealreg2spillmask [Op_RegN] = nullptr;
87 idealreg2spillmask [Op_RegL] = nullptr;
88 idealreg2spillmask [Op_RegF] = nullptr;
89 idealreg2spillmask [Op_RegD] = nullptr;
90 idealreg2spillmask [Op_RegP] = nullptr;
91 idealreg2spillmask [Op_VecA] = nullptr;
92 idealreg2spillmask [Op_VecS] = nullptr;
93 idealreg2spillmask [Op_VecD] = nullptr;
94 idealreg2spillmask [Op_VecX] = nullptr;
95 idealreg2spillmask [Op_VecY] = nullptr;
96 idealreg2spillmask [Op_VecZ] = nullptr;
97 idealreg2spillmask [Op_RegFlags] = nullptr;
98 idealreg2spillmask [Op_RegVectMask] = nullptr;
99
100 idealreg2debugmask [Op_RegI] = nullptr;
101 idealreg2debugmask [Op_RegN] = nullptr;
102 idealreg2debugmask [Op_RegL] = nullptr;
103 idealreg2debugmask [Op_RegF] = nullptr;
104 idealreg2debugmask [Op_RegD] = nullptr;
105 idealreg2debugmask [Op_RegP] = nullptr;
106 idealreg2debugmask [Op_VecA] = nullptr;
107 idealreg2debugmask [Op_VecS] = nullptr;
108 idealreg2debugmask [Op_VecD] = nullptr;
109 idealreg2debugmask [Op_VecX] = nullptr;
110 idealreg2debugmask [Op_VecY] = nullptr;
111 idealreg2debugmask [Op_VecZ] = nullptr;
112 idealreg2debugmask [Op_RegFlags] = nullptr;
113 idealreg2debugmask [Op_RegVectMask] = nullptr;
114
115 DEBUG_ONLY(_mem_node = nullptr;) // Ideal memory node consumed by mach node
116 }
117
118 //------------------------------warp_incoming_stk_arg------------------------
119 // This warps a VMReg into an OptoReg::Name
120 OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) {
121 OptoReg::Name warped;
122 if( reg->is_stack() ) { // Stack slot argument?
123 warped = OptoReg::add(_old_SP, reg->reg2stack() );
124 warped = OptoReg::add(warped, C->out_preserve_stack_slots());
125 if( warped >= _in_arg_limit )
126 _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
127 return warped;
128 }
129 return OptoReg::as_OptoReg(reg);
130 }
131
132 //---------------------------compute_old_SP------------------------------------
133 OptoReg::Name Compile::compute_old_SP() {
134 int fixed = fixed_slots();
135 int preserve = in_preserve_stack_slots();
136 return OptoReg::stack2reg(align_up(fixed + preserve, (int)Matcher::stack_alignment_in_slots()));
137 }
138
139
140
141 #ifdef ASSERT
142 void Matcher::verify_new_nodes_only(Node* xroot) {
143 // Make sure that the new graph only references new nodes
144 ResourceMark rm;
145 Unique_Node_List worklist;
146 VectorSet visited;
147 worklist.push(xroot);
148 while (worklist.size() > 0) {
149 Node* n = worklist.pop();
150 if (visited.test_set(n->_idx)) {
151 continue;
152 }
153 assert(C->node_arena()->contains(n), "dead node");
154 assert(!n->is_Initialize() || n->as_Initialize()->number_of_projs(TypeFunc::Memory) == 1,
155 "after matching, Initialize should have a single memory projection");
156 for (uint j = 0; j < n->req(); j++) {
157 Node* in = n->in(j);
158 if (in != nullptr) {
159 worklist.push(in);
160 }
161 }
162 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
163 worklist.push(n->fast_out(j));
164 }
165 }
166 }
167 #endif
168
169
170 //---------------------------match---------------------------------------------
171 void Matcher::match( ) {
172 if( MaxLabelRootDepth < 100 ) { // Too small?
173 assert(false, "invalid MaxLabelRootDepth, increase it to 100 minimum");
174 MaxLabelRootDepth = 100;
175 }
176 // One-time initialization of some register masks.
177 init_spill_mask( C->root()->in(1) );
178 if (C->failing()) {
179 return;
180 }
181 assert(_return_addr_mask.is_empty(),
182 "return address mask must be empty initially");
183 _return_addr_mask.insert(return_addr());
184 #ifdef _LP64
185 // Pointers take 2 slots in 64-bit land
186 _return_addr_mask.insert(OptoReg::add(return_addr(), 1));
187 #endif
188
189 // Map a Java-signature return type into return register-value
190 // machine registers for 0, 1 and 2 returned values.
191 const TypeTuple *range = C->tf()->range();
192 if( range->cnt() > TypeFunc::Parms ) { // If not a void function
193 // Get ideal-register return type
194 uint ireg = range->field_at(TypeFunc::Parms)->ideal_reg();
195 // Get machine return register
196 uint sop = C->start()->Opcode();
197 OptoRegPair regs = return_value(ireg);
198
199 // And mask for same
200 _return_value_mask.assignFrom(RegMask(regs.first()));
201 if( OptoReg::is_valid(regs.second()) )
202 _return_value_mask.insert(regs.second());
203 }
204
205 // ---------------
206 // Frame Layout
207
208 // Need the method signature to determine the incoming argument types,
209 // because the types determine which registers the incoming arguments are
210 // in, and this affects the matched code.
211 const TypeTuple *domain = C->tf()->domain();
212 uint argcnt = domain->cnt() - TypeFunc::Parms;
213 BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt );
214 VMRegPair *vm_parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
215 _parm_regs = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt );
216 _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt );
217 uint i;
218 for( i = 0; i<argcnt; i++ ) {
219 sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
220 new (_calling_convention_mask + i) RegMask(C->comp_arena());
221 }
222
223 // Pass array of ideal registers and length to USER code (from the AD file)
224 // that will convert this to an array of register numbers.
225 const StartNode *start = C->start();
226 start->calling_convention( sig_bt, vm_parm_regs, argcnt );
227 #ifdef ASSERT
228 // Sanity check users' calling convention. Real handy while trying to
229 // get the initial port correct.
230 { for (uint i = 0; i<argcnt; i++) {
231 if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
232 assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" );
233 _parm_regs[i].set_bad();
234 continue;
235 }
236 VMReg parm_reg = vm_parm_regs[i].first();
237 assert(parm_reg->is_valid(), "invalid arg?");
238 if (parm_reg->is_reg()) {
239 OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg);
240 assert(can_be_java_arg(opto_parm_reg) ||
241 C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) ||
242 opto_parm_reg == inline_cache_reg(),
243 "parameters in register must be preserved by runtime stubs");
244 }
245 for (uint j = 0; j < i; j++) {
246 assert(parm_reg != vm_parm_regs[j].first(),
247 "calling conv. must produce distinct regs");
248 }
249 }
250 }
251 #endif
252
253 // Do some initial frame layout.
254
255 // Compute the old incoming SP (may be called FP) as
256 // OptoReg::stack0() + locks + in_preserve_stack_slots + pad2.
257 _old_SP = C->compute_old_SP();
258 assert( is_even(_old_SP), "must be even" );
259
260 // Compute highest incoming stack argument as
261 // _old_SP + out_preserve_stack_slots + incoming argument size.
262 _in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
263 assert( is_even(_in_arg_limit), "out_preserve must be even" );
264 for( i = 0; i < argcnt; i++ ) {
265 // Permit args to have no register
266 _calling_convention_mask[i].clear();
267 if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
268 _parm_regs[i].set_bad();
269 continue;
270 }
271 // calling_convention returns stack arguments as a count of
272 // slots beyond OptoReg::stack0()/VMRegImpl::stack0. We need to convert this to
273 // the allocators point of view, taking into account all the
274 // preserve area, locks & pad2.
275
276 OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first());
277 if( OptoReg::is_valid(reg1))
278 _calling_convention_mask[i].insert(reg1);
279
280 OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second());
281 if( OptoReg::is_valid(reg2))
282 _calling_convention_mask[i].insert(reg2);
283
284 // Saved biased stack-slot register number
285 _parm_regs[i].set_pair(reg2, reg1);
286 }
287
288 // Finally, make sure the incoming arguments take up an even number of
289 // words, in case the arguments or locals need to contain doubleword stack
290 // slots. The rest of the system assumes that stack slot pairs (in
291 // particular, in the spill area) which look aligned will in fact be
292 // aligned relative to the stack pointer in the target machine. Double
293 // stack slots will always be allocated aligned.
294 _new_SP = OptoReg::Name(align_up(_in_arg_limit, (int)RegMask::SlotsPerLong));
295
296 // Compute highest outgoing stack argument as
297 // _new_SP + out_preserve_stack_slots + max(outgoing argument size).
298 _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
299 assert( is_even(_out_arg_limit), "out_preserve must be even" );
300
301 // ---------------
302 // Collect roots of matcher trees. Every node for which
303 // _shared[_idx] is cleared is guaranteed to not be shared, and thus
304 // can be a valid interior of some tree.
305 find_shared( C->root() );
306 find_shared( C->top() );
307
308 C->print_method(PHASE_BEFORE_MATCHING, 1);
309
310 // Create new ideal node ConP #null even if it does exist in old space
311 // to avoid false sharing if the corresponding mach node is not used.
312 // The corresponding mach node is only used in rare cases for derived
313 // pointers.
314 Node* new_ideal_null = ConNode::make(TypePtr::NULL_PTR);
315
316 // Swap out to old-space; emptying new-space
317 Arena* old = C->swap_old_and_new();
318
319 // Save debug and profile information for nodes in old space:
320 _old_node_note_array = C->node_note_array();
321 if (_old_node_note_array != nullptr) {
322 C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*>
323 (C->comp_arena(), _old_node_note_array->length(),
324 0, nullptr));
325 }
326
327 // Pre-size the new_node table to avoid the need for range checks.
328 grow_new_node_array(C->unique());
329
330 // Reset node counter so MachNodes start with _idx at 0
331 int live_nodes = C->live_nodes();
332 C->set_unique(0);
333 C->reset_dead_node_list();
334
335 // Recursively match trees from old space into new space.
336 // Correct leaves of new-space Nodes; they point to old-space.
337 _visited.clear();
338 Node* const n = xform(C->top(), live_nodes);
339 if (C->failing()) return;
340 C->set_cached_top_node(n);
341 if (!C->failing()) {
342 Node* xroot = xform( C->root(), 1 );
343 if (C->failing()) return;
344 if (xroot == nullptr) {
345 Matcher::soft_match_failure(); // recursive matching process failed
346 assert(false, "instruction match failed");
347 C->record_method_not_compilable("instruction match failed");
348 } else {
349 // During matching shared constants were attached to C->root()
350 // because xroot wasn't available yet, so transfer the uses to
351 // the xroot.
352 for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) {
353 Node* n = C->root()->fast_out(j);
354 if (C->node_arena()->contains(n)) {
355 assert(n->in(0) == C->root(), "should be control user");
356 n->set_req(0, xroot);
357 --j;
358 --jmax;
359 }
360 }
361
362 // Generate new mach node for ConP #null
363 assert(new_ideal_null != nullptr, "sanity");
364 _mach_null = match_tree(new_ideal_null);
365 // Don't set control, it will confuse GCM since there are no uses.
366 // The control will be set when this node is used first time
367 // in find_base_for_derived().
368 assert(_mach_null != nullptr || C->failure_is_artificial(), ""); // bailouts are handled below.
369
370 C->set_root(xroot->is_Root() ? xroot->as_Root() : nullptr);
371
372 #ifdef ASSERT
373 verify_new_nodes_only(xroot);
374 #endif
375 }
376 }
377 if (C->top() == nullptr || C->root() == nullptr) {
378 // New graph lost. This is due to a compilation failure we encountered earlier.
379 stringStream ss;
380 if (C->failure_reason() != nullptr) {
381 ss.print("graph lost: %s", C->failure_reason());
382 } else {
383 assert(C->failure_reason() != nullptr, "graph lost: reason unknown");
384 ss.print("graph lost: reason unknown");
385 }
386 C->record_method_not_compilable(ss.as_string() DEBUG_ONLY(COMMA true));
387 }
388 if (C->failing()) {
389 // delete old;
390 old->destruct_contents();
391 return;
392 }
393 assert( C->top(), "" );
394 assert( C->root(), "" );
395 validate_null_checks();
396
397 // Now smoke old-space
398 NOT_DEBUG( old->destruct_contents() );
399
400 // ------------------------
401 // Set up save-on-entry registers.
402 Fixup_Save_On_Entry( );
403
404 { // Cleanup mach IR after selection phase is over.
405 Compile::TracePhase tp(_t_postselect_cleanup);
406 do_postselect_cleanup();
407 if (C->failing()) return;
408 assert(verify_after_postselect_cleanup(), "");
409 }
410 }
411
412 //------------------------------Fixup_Save_On_Entry----------------------------
413 // The stated purpose of this routine is to take care of save-on-entry
414 // registers. However, the overall goal of the Match phase is to convert into
415 // machine-specific instructions which have RegMasks to guide allocation.
416 // So what this procedure really does is put a valid RegMask on each input
417 // to the machine-specific variations of all Return, TailCall and Halt
418 // instructions. It also adds edgs to define the save-on-entry values (and of
419 // course gives them a mask).
420
421 static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
422 RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size );
423 for (unsigned int i = 0; i < size; ++i) {
424 new (rms + i) RegMask(Compile::current()->comp_arena());
425 }
426 // Do all the pre-defined register masks
427 rms[TypeFunc::Control ].assignFrom(RegMask::EMPTY);
428 rms[TypeFunc::I_O ].assignFrom(RegMask::EMPTY);
429 rms[TypeFunc::Memory ].assignFrom(RegMask::EMPTY);
430 rms[TypeFunc::ReturnAdr].assignFrom(ret_adr);
431 rms[TypeFunc::FramePtr ].assignFrom(fp);
432 return rms;
433 }
434
435 int Matcher::scalable_predicate_reg_slots() {
436 assert(Matcher::has_predicated_vectors() && Matcher::supports_scalable_vector(),
437 "scalable predicate vector should be supported");
438 int vector_reg_bit_size = Matcher::scalable_vector_reg_size(T_BYTE) << LogBitsPerByte;
439 // We assume each predicate register is one-eighth of the size of
440 // scalable vector register, one mask bit per vector byte.
441 int predicate_reg_bit_size = vector_reg_bit_size >> 3;
442 // Compute number of slots which is required when scalable predicate
443 // register is spilled. E.g. if scalable vector register is 640 bits,
444 // predicate register is 80 bits, which is 2.5 * slots.
445 // We will round up the slot number to power of 2, which is required
446 // by find_first_set().
447 int slots = predicate_reg_bit_size & (BitsPerInt - 1)
448 ? (predicate_reg_bit_size >> LogBitsPerInt) + 1
449 : predicate_reg_bit_size >> LogBitsPerInt;
450 return round_up_power_of_2(slots);
451 }
452
453 #define NOF_STACK_MASKS (2*13)
454
455 // Create the initial stack mask used by values spilling to the stack.
456 // Disallow any debug info in outgoing argument areas by setting the
457 // initial mask accordingly.
458 void Matcher::init_first_stack_mask() {
459
460 // Allocate storage for spill masks as masks for the appropriate load type.
461 RegMask *rms = (RegMask*)C->comp_arena()->AmallocWords(sizeof(RegMask) * NOF_STACK_MASKS);
462
463 // Initialize empty placeholder masks into the newly allocated arena
464 for (int i = 0; i < NOF_STACK_MASKS; i++) {
465 new (rms + i) RegMask(C->comp_arena());
466 }
467
468 int index = 0;
469 for (int i = Op_RegN; i <= Op_RegVectMask; ++i) {
470 idealreg2spillmask[i] = &rms[index++];
471 idealreg2debugmask[i] = &rms[index++];
472 }
473 assert(index == NOF_STACK_MASKS, "wrong size");
474
475 // At first, start with the empty mask
476 C->FIRST_STACK_mask().clear();
477
478 // Add in the incoming argument area
479 OptoReg::Name init_in = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
480 for (OptoReg::Name i = init_in; i < _in_arg_limit; i = OptoReg::add(i, 1)) {
481 C->FIRST_STACK_mask().insert(i);
482 }
483 // Add in all bits past the outgoing argument area
484 C->FIRST_STACK_mask().set_all_from(_out_arg_limit);
485
486 // Make spill masks. Registers for their class, plus FIRST_STACK_mask.
487 RegMask aligned_stack_mask(C->FIRST_STACK_mask(), C->comp_arena());
488 // Keep spill masks aligned.
489 aligned_stack_mask.clear_to_pairs();
490 assert(aligned_stack_mask.is_infinite_stack(), "should be infinite stack");
491 RegMask scalable_stack_mask(aligned_stack_mask, C->comp_arena());
492
493 idealreg2spillmask[Op_RegP]->assignFrom(*idealreg2regmask[Op_RegP]);
494 #ifdef _LP64
495 idealreg2spillmask[Op_RegN]->assignFrom(*idealreg2regmask[Op_RegN]);
496 idealreg2spillmask[Op_RegN]->or_with(C->FIRST_STACK_mask());
497 idealreg2spillmask[Op_RegP]->or_with(aligned_stack_mask);
498 #else
499 idealreg2spillmask[Op_RegP]->or_with(C->FIRST_STACK_mask());
500 #endif
501 idealreg2spillmask[Op_RegI]->assignFrom(*idealreg2regmask[Op_RegI]);
502 idealreg2spillmask[Op_RegI]->or_with(C->FIRST_STACK_mask());
503 idealreg2spillmask[Op_RegL]->assignFrom(*idealreg2regmask[Op_RegL]);
504 idealreg2spillmask[Op_RegL]->or_with(aligned_stack_mask);
505 idealreg2spillmask[Op_RegF]->assignFrom(*idealreg2regmask[Op_RegF]);
506 idealreg2spillmask[Op_RegF]->or_with(C->FIRST_STACK_mask());
507 idealreg2spillmask[Op_RegD]->assignFrom(*idealreg2regmask[Op_RegD]);
508 idealreg2spillmask[Op_RegD]->or_with(aligned_stack_mask);
509
510 if (Matcher::has_predicated_vectors()) {
511 idealreg2spillmask[Op_RegVectMask]->assignFrom(*idealreg2regmask[Op_RegVectMask]);
512 idealreg2spillmask[Op_RegVectMask]->or_with(aligned_stack_mask);
513 } else {
514 idealreg2spillmask[Op_RegVectMask]->assignFrom(RegMask::EMPTY);
515 }
516
517 if (Matcher::vector_size_supported(T_BYTE,4)) {
518 idealreg2spillmask[Op_VecS]->assignFrom(*idealreg2regmask[Op_VecS]);
519 idealreg2spillmask[Op_VecS]->or_with(C->FIRST_STACK_mask());
520 } else {
521 idealreg2spillmask[Op_VecS]->assignFrom(RegMask::EMPTY);
522 }
523
524 if (Matcher::vector_size_supported(T_FLOAT,2)) {
525 // For VecD we need dual alignment and 8 bytes (2 slots) for spills.
526 // RA guarantees such alignment since it is needed for Double and Long values.
527 idealreg2spillmask[Op_VecD]->assignFrom(*idealreg2regmask[Op_VecD]);
528 idealreg2spillmask[Op_VecD]->or_with(aligned_stack_mask);
529 } else {
530 idealreg2spillmask[Op_VecD]->assignFrom(RegMask::EMPTY);
531 }
532
533 if (Matcher::vector_size_supported(T_FLOAT,4)) {
534 // For VecX we need quadro alignment and 16 bytes (4 slots) for spills.
535 //
536 // RA can use input arguments stack slots for spills but until RA
537 // we don't know frame size and offset of input arg stack slots.
538 //
539 // Exclude last input arg stack slots to avoid spilling vectors there
540 // otherwise vector spills could stomp over stack slots in caller frame.
541 OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
542 for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecX); k++) {
543 aligned_stack_mask.remove(in);
544 in = OptoReg::add(in, -1);
545 }
546 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX);
547 assert(aligned_stack_mask.is_infinite_stack(), "should be infinite stack");
548 idealreg2spillmask[Op_VecX]->assignFrom(*idealreg2regmask[Op_VecX]);
549 idealreg2spillmask[Op_VecX]->or_with(aligned_stack_mask);
550 } else {
551 idealreg2spillmask[Op_VecX]->assignFrom(RegMask::EMPTY);
552 }
553
554 if (Matcher::vector_size_supported(T_FLOAT,8)) {
555 // For VecY we need octo alignment and 32 bytes (8 slots) for spills.
556 OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
557 for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecY); k++) {
558 aligned_stack_mask.remove(in);
559 in = OptoReg::add(in, -1);
560 }
561 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY);
562 assert(aligned_stack_mask.is_infinite_stack(), "should be infinite stack");
563 idealreg2spillmask[Op_VecY]->assignFrom(*idealreg2regmask[Op_VecY]);
564 idealreg2spillmask[Op_VecY]->or_with(aligned_stack_mask);
565 } else {
566 idealreg2spillmask[Op_VecY]->assignFrom(RegMask::EMPTY);
567 }
568
569 if (Matcher::vector_size_supported(T_FLOAT,16)) {
570 // For VecZ we need enough alignment and 64 bytes (16 slots) for spills.
571 OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
572 for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecZ); k++) {
573 aligned_stack_mask.remove(in);
574 in = OptoReg::add(in, -1);
575 }
576 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecZ);
577 assert(aligned_stack_mask.is_infinite_stack(), "should be infinite stack");
578 idealreg2spillmask[Op_VecZ]->assignFrom(*idealreg2regmask[Op_VecZ]);
579 idealreg2spillmask[Op_VecZ]->or_with(aligned_stack_mask);
580 } else {
581 idealreg2spillmask[Op_VecZ]->assignFrom(RegMask::EMPTY);
582 }
583
584 if (Matcher::supports_scalable_vector()) {
585 int k = 1;
586 OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
587 if (Matcher::has_predicated_vectors()) {
588 // Exclude last input arg stack slots to avoid spilling vector register there,
589 // otherwise RegVectMask spills could stomp over stack slots in caller frame.
590 for (; (in >= init_in) && (k < scalable_predicate_reg_slots()); k++) {
591 scalable_stack_mask.remove(in);
592 in = OptoReg::add(in, -1);
593 }
594
595 // For RegVectMask
596 scalable_stack_mask.clear_to_sets(scalable_predicate_reg_slots());
597 assert(scalable_stack_mask.is_infinite_stack(), "should be infinite stack");
598 idealreg2spillmask[Op_RegVectMask]->assignFrom(*idealreg2regmask[Op_RegVectMask]);
599 idealreg2spillmask[Op_RegVectMask]->or_with(scalable_stack_mask);
600 }
601
602 // Exclude last input arg stack slots to avoid spilling vector register there,
603 // otherwise vector spills could stomp over stack slots in caller frame.
604 for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
605 scalable_stack_mask.remove(in);
606 in = OptoReg::add(in, -1);
607 }
608
609 // For VecA
610 scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
611 assert(scalable_stack_mask.is_infinite_stack(), "should be infinite stack");
612 idealreg2spillmask[Op_VecA]->assignFrom(*idealreg2regmask[Op_VecA]);
613 idealreg2spillmask[Op_VecA]->or_with(scalable_stack_mask);
614 } else {
615 idealreg2spillmask[Op_VecA]->assignFrom(RegMask::EMPTY);
616 }
617
618 if (UseFPUForSpilling) {
619 // This mask logic assumes that the spill operations are
620 // symmetric and that the registers involved are the same size.
621 // On sparc for instance we may have to use 64 bit moves will
622 // kill 2 registers when used with F0-F31.
623 idealreg2spillmask[Op_RegI]->or_with(*idealreg2regmask[Op_RegF]);
624 idealreg2spillmask[Op_RegF]->or_with(*idealreg2regmask[Op_RegI]);
625 #ifdef _LP64
626 idealreg2spillmask[Op_RegN]->or_with(*idealreg2regmask[Op_RegF]);
627 idealreg2spillmask[Op_RegL]->or_with(*idealreg2regmask[Op_RegD]);
628 idealreg2spillmask[Op_RegD]->or_with(*idealreg2regmask[Op_RegL]);
629 idealreg2spillmask[Op_RegP]->or_with(*idealreg2regmask[Op_RegD]);
630 #else
631 idealreg2spillmask[Op_RegP]->or_with(*idealreg2regmask[Op_RegF]);
632 #ifdef ARM
633 // ARM has support for moving 64bit values between a pair of
634 // integer registers and a double register
635 idealreg2spillmask[Op_RegL]->or_with(*idealreg2regmask[Op_RegD]);
636 idealreg2spillmask[Op_RegD]->or_with(*idealreg2regmask[Op_RegL]);
637 #endif
638 #endif
639 }
640
641 // Make up debug masks. Any spill slot plus callee-save (SOE) registers.
642 // Caller-save (SOC, AS) registers are assumed to be trashable by the various
643 // inline-cache fixup routines.
644 idealreg2debugmask[Op_RegN]->assignFrom(*idealreg2spillmask[Op_RegN]);
645 idealreg2debugmask[Op_RegI]->assignFrom(*idealreg2spillmask[Op_RegI]);
646 idealreg2debugmask[Op_RegL]->assignFrom(*idealreg2spillmask[Op_RegL]);
647 idealreg2debugmask[Op_RegF]->assignFrom(*idealreg2spillmask[Op_RegF]);
648 idealreg2debugmask[Op_RegD]->assignFrom(*idealreg2spillmask[Op_RegD]);
649 idealreg2debugmask[Op_RegP]->assignFrom(*idealreg2spillmask[Op_RegP]);
650 idealreg2debugmask[Op_RegVectMask]->assignFrom(*idealreg2spillmask[Op_RegVectMask]);
651
652 idealreg2debugmask[Op_VecA]->assignFrom(*idealreg2spillmask[Op_VecA]);
653 idealreg2debugmask[Op_VecS]->assignFrom(*idealreg2spillmask[Op_VecS]);
654 idealreg2debugmask[Op_VecD]->assignFrom(*idealreg2spillmask[Op_VecD]);
655 idealreg2debugmask[Op_VecX]->assignFrom(*idealreg2spillmask[Op_VecX]);
656 idealreg2debugmask[Op_VecY]->assignFrom(*idealreg2spillmask[Op_VecY]);
657 idealreg2debugmask[Op_VecZ]->assignFrom(*idealreg2spillmask[Op_VecZ]);
658
659 // Prevent stub compilations from attempting to reference
660 // callee-saved (SOE) registers from debug info
661 bool exclude_soe = !Compile::current()->is_method_compilation();
662 RegMask* caller_save_mask = exclude_soe ? &caller_save_regmask_exclude_soe : &caller_save_regmask;
663
664 idealreg2debugmask[Op_RegN]->subtract(*caller_save_mask);
665 idealreg2debugmask[Op_RegI]->subtract(*caller_save_mask);
666 idealreg2debugmask[Op_RegL]->subtract(*caller_save_mask);
667 idealreg2debugmask[Op_RegF]->subtract(*caller_save_mask);
668 idealreg2debugmask[Op_RegD]->subtract(*caller_save_mask);
669 idealreg2debugmask[Op_RegP]->subtract(*caller_save_mask);
670 idealreg2debugmask[Op_RegVectMask]->subtract(*caller_save_mask);
671
672 idealreg2debugmask[Op_VecA]->subtract(*caller_save_mask);
673 idealreg2debugmask[Op_VecS]->subtract(*caller_save_mask);
674 idealreg2debugmask[Op_VecD]->subtract(*caller_save_mask);
675 idealreg2debugmask[Op_VecX]->subtract(*caller_save_mask);
676 idealreg2debugmask[Op_VecY]->subtract(*caller_save_mask);
677 idealreg2debugmask[Op_VecZ]->subtract(*caller_save_mask);
678 }
679
680 //---------------------------is_save_on_entry----------------------------------
681 bool Matcher::is_save_on_entry(int reg) {
682 return
683 _register_save_policy[reg] == 'E' ||
684 _register_save_policy[reg] == 'A'; // Save-on-entry register?
685 }
686
687 //---------------------------Fixup_Save_On_Entry-------------------------------
688 void Matcher::Fixup_Save_On_Entry( ) {
689 init_first_stack_mask();
690
691 Node *root = C->root(); // Short name for root
692 // Count number of save-on-entry registers.
693 uint soe_cnt = number_of_saved_registers();
694 uint i;
695
696 // Find the procedure Start Node
697 StartNode *start = C->start();
698 assert( start, "Expect a start node" );
699
700 // Input RegMask array shared by all Returns.
701 // The type for doubles and longs has a count of 2, but
702 // there is only 1 returned value
703 uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1);
704 RegMask *ret_rms = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
705 // Returns have 0 or 1 returned values depending on call signature.
706 // Return register is specified by return_value in the AD file.
707 if (ret_edge_cnt > TypeFunc::Parms) {
708 ret_rms[TypeFunc::Parms + 0].assignFrom(_return_value_mask);
709 }
710
711 // Input RegMask array shared by all ForwardExceptions
712 uint forw_exc_edge_cnt = TypeFunc::Parms;
713 RegMask* forw_exc_rms = init_input_masks( forw_exc_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
714
715 // Input RegMask array shared by all Rethrows.
716 uint reth_edge_cnt = TypeFunc::Parms+1;
717 RegMask *reth_rms = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
718 // Rethrow takes exception oop only, but in the argument 0 slot.
719 OptoReg::Name reg = find_receiver();
720 if (reg >= 0) {
721 reth_rms[TypeFunc::Parms].assignFrom(mreg2regmask[reg]);
722 #ifdef _LP64
723 // Need two slots for ptrs in 64-bit land
724 reth_rms[TypeFunc::Parms].insert(OptoReg::add(OptoReg::Name(reg), 1));
725 #endif
726 }
727
728 // Input RegMask array shared by all TailCalls
729 uint tail_call_edge_cnt = TypeFunc::Parms+2;
730 RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
731
732 // Input RegMask array shared by all TailJumps
733 uint tail_jump_edge_cnt = TypeFunc::Parms+2;
734 RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
735
736 // TailCalls have 2 returned values (target & moop), whose masks come
737 // from the usual MachNode/MachOper mechanism. Find a sample
738 // TailCall to extract these masks and put the correct masks into
739 // the tail_call_rms array.
740 for( i=1; i < root->req(); i++ ) {
741 MachReturnNode *m = root->in(i)->as_MachReturn();
742 if( m->ideal_Opcode() == Op_TailCall ) {
743 tail_call_rms[TypeFunc::Parms + 0].assignFrom(m->MachNode::in_RegMask(TypeFunc::Parms + 0));
744 tail_call_rms[TypeFunc::Parms + 1].assignFrom(m->MachNode::in_RegMask(TypeFunc::Parms + 1));
745 break;
746 }
747 }
748
749 // TailJumps have 2 returned values (target & ex_oop), whose masks come
750 // from the usual MachNode/MachOper mechanism. Find a sample
751 // TailJump to extract these masks and put the correct masks into
752 // the tail_jump_rms array.
753 for( i=1; i < root->req(); i++ ) {
754 MachReturnNode *m = root->in(i)->as_MachReturn();
755 if( m->ideal_Opcode() == Op_TailJump ) {
756 tail_jump_rms[TypeFunc::Parms + 0].assignFrom(m->MachNode::in_RegMask(TypeFunc::Parms + 0));
757 tail_jump_rms[TypeFunc::Parms + 1].assignFrom(m->MachNode::in_RegMask(TypeFunc::Parms + 1));
758 break;
759 }
760 }
761
762 // Input RegMask array shared by all Halts
763 uint halt_edge_cnt = TypeFunc::Parms;
764 RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
765
766 // Capture the return input masks into each exit flavor
767 for( i=1; i < root->req(); i++ ) {
768 MachReturnNode *exit = root->in(i)->as_MachReturn();
769 switch( exit->ideal_Opcode() ) {
770 case Op_Return : exit->_in_rms = ret_rms; break;
771 case Op_Rethrow : exit->_in_rms = reth_rms; break;
772 case Op_TailCall : exit->_in_rms = tail_call_rms; break;
773 case Op_TailJump : exit->_in_rms = tail_jump_rms; break;
774 case Op_ForwardException: exit->_in_rms = forw_exc_rms; break;
775 case Op_Halt : exit->_in_rms = halt_rms; break;
776 default : ShouldNotReachHere();
777 }
778 }
779
780 // Next unused projection number from Start.
781 int proj_cnt = C->tf()->domain()->cnt();
782
783 // Do all the save-on-entry registers. Make projections from Start for
784 // them, and give them a use at the exit points. To the allocator, they
785 // look like incoming register arguments.
786 for( i = 0; i < _last_Mach_Reg; i++ ) {
787 if( is_save_on_entry(i) ) {
788
789 // Add the save-on-entry to the mask array
790 ret_rms [ ret_edge_cnt].assignFrom(mreg2regmask[i]);
791 reth_rms [ reth_edge_cnt].assignFrom(mreg2regmask[i]);
792 tail_call_rms[tail_call_edge_cnt].assignFrom(mreg2regmask[i]);
793 tail_jump_rms[tail_jump_edge_cnt].assignFrom(mreg2regmask[i]);
794 forw_exc_rms [ forw_exc_edge_cnt].assignFrom(mreg2regmask[i]);
795 // Halts need the SOE registers, but only in the stack as debug info.
796 // A just-prior uncommon-trap or deoptimization will use the SOE regs.
797 halt_rms [ halt_edge_cnt].assignFrom(*idealreg2spillmask[_register_save_type[i]]);
798
799 Node *mproj;
800
801 // Is this a RegF low half of a RegD? Double up 2 adjacent RegF's
802 // into a single RegD.
803 if( (i&1) == 0 &&
804 _register_save_type[i ] == Op_RegF &&
805 _register_save_type[i+1] == Op_RegF &&
806 is_save_on_entry(i+1) ) {
807 // Add other bit for double
808 ret_rms [ ret_edge_cnt].insert(OptoReg::Name(i+1));
809 reth_rms [ reth_edge_cnt].insert(OptoReg::Name(i+1));
810 tail_call_rms[tail_call_edge_cnt].insert(OptoReg::Name(i+1));
811 tail_jump_rms[tail_jump_edge_cnt].insert(OptoReg::Name(i+1));
812 forw_exc_rms [ forw_exc_edge_cnt].insert(OptoReg::Name(i+1));
813 halt_rms [ halt_edge_cnt].insert(OptoReg::Name(i+1));
814 mproj = new MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD );
815 proj_cnt += 2; // Skip 2 for doubles
816 }
817 else if( (i&1) == 1 && // Else check for high half of double
818 _register_save_type[i-1] == Op_RegF &&
819 _register_save_type[i ] == Op_RegF &&
820 is_save_on_entry(i-1) ) {
821 ret_rms [ ret_edge_cnt].assignFrom(RegMask::EMPTY);
822 reth_rms [ reth_edge_cnt].assignFrom(RegMask::EMPTY);
823 tail_call_rms[tail_call_edge_cnt].assignFrom(RegMask::EMPTY);
824 tail_jump_rms[tail_jump_edge_cnt].assignFrom(RegMask::EMPTY);
825 forw_exc_rms [ forw_exc_edge_cnt].assignFrom(RegMask::EMPTY);
826 halt_rms [ halt_edge_cnt].assignFrom(RegMask::EMPTY);
827 mproj = C->top();
828 }
829 // Is this a RegI low half of a RegL? Double up 2 adjacent RegI's
830 // into a single RegL.
831 else if( (i&1) == 0 &&
832 _register_save_type[i ] == Op_RegI &&
833 _register_save_type[i+1] == Op_RegI &&
834 is_save_on_entry(i+1) ) {
835 // Add other bit for long
836 ret_rms [ ret_edge_cnt].insert(OptoReg::Name(i+1));
837 reth_rms [ reth_edge_cnt].insert(OptoReg::Name(i+1));
838 tail_call_rms[tail_call_edge_cnt].insert(OptoReg::Name(i+1));
839 tail_jump_rms[tail_jump_edge_cnt].insert(OptoReg::Name(i+1));
840 forw_exc_rms [ forw_exc_edge_cnt].insert(OptoReg::Name(i+1));
841 halt_rms [ halt_edge_cnt].insert(OptoReg::Name(i+1));
842 mproj = new MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL );
843 proj_cnt += 2; // Skip 2 for longs
844 }
845 else if( (i&1) == 1 && // Else check for high half of long
846 _register_save_type[i-1] == Op_RegI &&
847 _register_save_type[i ] == Op_RegI &&
848 is_save_on_entry(i-1) ) {
849 ret_rms [ ret_edge_cnt].assignFrom(RegMask::EMPTY);
850 reth_rms [ reth_edge_cnt].assignFrom(RegMask::EMPTY);
851 tail_call_rms[tail_call_edge_cnt].assignFrom(RegMask::EMPTY);
852 tail_jump_rms[tail_jump_edge_cnt].assignFrom(RegMask::EMPTY);
853 forw_exc_rms [ forw_exc_edge_cnt].assignFrom(RegMask::EMPTY);
854 halt_rms [ halt_edge_cnt].assignFrom(RegMask::EMPTY);
855 mproj = C->top();
856 } else {
857 // Make a projection for it off the Start
858 mproj = new MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] );
859 }
860
861 ret_edge_cnt ++;
862 reth_edge_cnt ++;
863 tail_call_edge_cnt ++;
864 tail_jump_edge_cnt ++;
865 forw_exc_edge_cnt++;
866 halt_edge_cnt ++;
867
868 // Add a use of the SOE register to all exit paths
869 for (uint j=1; j < root->req(); j++) {
870 root->in(j)->add_req(mproj);
871 }
872 } // End of if a save-on-entry register
873 } // End of for all machine registers
874 }
875
876 //------------------------------init_spill_mask--------------------------------
877 void Matcher::init_spill_mask( Node *ret ) {
878 if( idealreg2regmask[Op_RegI] ) return; // One time only init
879
880 OptoReg::c_frame_pointer = c_frame_pointer();
881 c_frame_ptr_mask.assignFrom(RegMask(c_frame_pointer()));
882 #ifdef _LP64
883 // pointers are twice as big
884 c_frame_ptr_mask.insert(OptoReg::add(c_frame_pointer(), 1));
885 #endif
886
887 // Start at OptoReg::stack0()
888 STACK_ONLY_mask.clear();
889 // STACK_ONLY_mask is all stack bits
890 STACK_ONLY_mask.set_all_from(OptoReg::stack2reg(0));
891
892 for (OptoReg::Name i = OptoReg::Name(0); i < OptoReg::Name(_last_Mach_Reg);
893 i = OptoReg::add(i, 1)) {
894 // Copy the register names over into the shared world.
895 // SharedInfo::regName[i] = regName[i];
896 // Handy RegMasks per machine register
897 mreg2regmask[i].insert(i);
898
899 // Set up regmasks used to exclude save-on-call (and always-save) registers from debug masks.
900 if (_register_save_policy[i] == 'C' ||
901 _register_save_policy[i] == 'A') {
902 caller_save_regmask.insert(i);
903 }
904 // Exclude save-on-entry registers from debug masks for stub compilations.
905 if (_register_save_policy[i] == 'C' ||
906 _register_save_policy[i] == 'A' ||
907 _register_save_policy[i] == 'E') {
908 caller_save_regmask_exclude_soe.insert(i);
909 }
910 }
911
912 // Grab the Frame Pointer
913 Node *fp = ret->in(TypeFunc::FramePtr);
914 // Share frame pointer while making spill ops
915 set_shared(fp);
916
917 // Get the ADLC notion of the right regmask, for each basic type.
918 #ifdef _LP64
919 idealreg2regmask[Op_RegN] = regmask_for_ideal_register(Op_RegN, ret);
920 #endif
921 idealreg2regmask[Op_RegI] = regmask_for_ideal_register(Op_RegI, ret);
922 idealreg2regmask[Op_RegP] = regmask_for_ideal_register(Op_RegP, ret);
923 idealreg2regmask[Op_RegF] = regmask_for_ideal_register(Op_RegF, ret);
924 idealreg2regmask[Op_RegD] = regmask_for_ideal_register(Op_RegD, ret);
925 idealreg2regmask[Op_RegL] = regmask_for_ideal_register(Op_RegL, ret);
926 idealreg2regmask[Op_VecA] = regmask_for_ideal_register(Op_VecA, ret);
927 idealreg2regmask[Op_VecS] = regmask_for_ideal_register(Op_VecS, ret);
928 idealreg2regmask[Op_VecD] = regmask_for_ideal_register(Op_VecD, ret);
929 idealreg2regmask[Op_VecX] = regmask_for_ideal_register(Op_VecX, ret);
930 idealreg2regmask[Op_VecY] = regmask_for_ideal_register(Op_VecY, ret);
931 idealreg2regmask[Op_VecZ] = regmask_for_ideal_register(Op_VecZ, ret);
932 idealreg2regmask[Op_RegVectMask] = regmask_for_ideal_register(Op_RegVectMask, ret);
933 }
934
935 #ifdef ASSERT
936 static void match_alias_type(Compile* C, Node* n, Node* m) {
937 if (!VerifyAliases) return; // do not go looking for trouble by default
938 const TypePtr* nat = n->adr_type();
939 const TypePtr* mat = m->adr_type();
940 int nidx = C->get_alias_index(nat);
941 int midx = C->get_alias_index(mat);
942 // Detune the assert for cases like (AndI 0xFF (LoadB p)).
943 if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) {
944 for (uint i = 1; i < n->req(); i++) {
945 Node* n1 = n->in(i);
946 const TypePtr* n1at = n1->adr_type();
947 if (n1at != nullptr) {
948 nat = n1at;
949 nidx = C->get_alias_index(n1at);
950 }
951 }
952 }
953 // %%% Kludgery. Instead, fix ideal adr_type methods for all these cases:
954 if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) {
955 switch (n->Opcode()) {
956 case Op_PrefetchAllocation:
957 nidx = Compile::AliasIdxRaw;
958 nat = TypeRawPtr::BOTTOM;
959 break;
960 }
961 }
962 if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) {
963 switch (n->Opcode()) {
964 case Op_ClearArray:
965 midx = Compile::AliasIdxRaw;
966 mat = TypeRawPtr::BOTTOM;
967 break;
968 }
969 }
970 if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) {
971 switch (n->Opcode()) {
972 case Op_Return:
973 case Op_Rethrow:
974 case Op_Halt:
975 case Op_TailCall:
976 case Op_TailJump:
977 case Op_ForwardException:
978 nidx = Compile::AliasIdxBot;
979 nat = TypePtr::BOTTOM;
980 break;
981 }
982 }
983 if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
984 switch (n->Opcode()) {
985 case Op_StrComp:
986 case Op_StrEquals:
987 case Op_StrIndexOf:
988 case Op_StrIndexOfChar:
989 case Op_AryEq:
990 case Op_VectorizedHashCode:
991 case Op_CountPositives:
992 case Op_MemBarVolatile:
993 case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
994 case Op_StrInflatedCopy:
995 case Op_StrCompressedCopy:
996 case Op_OnSpinWait:
997 case Op_EncodeISOArray:
998 nidx = Compile::AliasIdxTop;
999 nat = nullptr;
1000 break;
1001 }
1002 }
1003 if (nidx != midx) {
1004 if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) {
1005 tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx);
1006 n->dump();
1007 m->dump();
1008 }
1009 assert(C->subsume_loads() && C->must_alias(nat, midx),
1010 "must not lose alias info when matching");
1011 }
1012 }
1013 #endif
1014
1015 //------------------------------xform------------------------------------------
1016 // Given a Node in old-space, Match him (Label/Reduce) to produce a machine
1017 // Node in new-space. Given a new-space Node, recursively walk his children.
1018 Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; }
1019 Node *Matcher::xform( Node *n, int max_stack ) {
1020 // Use one stack to keep both: child's node/state and parent's node/index
1021 MStack mstack(max_stack * 2 * 2); // usually: C->live_nodes() * 2 * 2
1022 mstack.push(n, Visit, nullptr, -1); // set null as parent to indicate root
1023 while (mstack.is_nonempty()) {
1024 C->check_node_count(NodeLimitFudgeFactor, "too many nodes matching instructions");
1025 if (C->failing()) return nullptr;
1026 n = mstack.node(); // Leave node on stack
1027 Node_State nstate = mstack.state();
1028 if (nstate == Visit) {
1029 mstack.set_state(Post_Visit);
1030 Node *oldn = n;
1031 // Old-space or new-space check
1032 if (!C->node_arena()->contains(n)) {
1033 // Old space!
1034 Node* m = nullptr;
1035 if (has_new_node(n)) { // Not yet Label/Reduced
1036 m = new_node(n);
1037 } else {
1038 if (!is_dontcare(n)) { // Matcher can match this guy
1039 // Calls match special. They match alone with no children.
1040 // Their children, the incoming arguments, match normally.
1041 m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n);
1042 if (C->failing()) return nullptr;
1043 if (m == nullptr) { Matcher::soft_match_failure(); return nullptr; }
1044 if (n->is_MemBar()) {
1045 m->as_MachMemBar()->set_adr_type(n->adr_type());
1046 }
1047 } else { // Nothing the matcher cares about
1048 if (n->is_Proj() && n->in(0) != nullptr && n->in(0)->is_Multi()) { // Projections?
1049 if (n->in(0)->is_Initialize() && n->as_Proj()->_con == TypeFunc::Memory) {
1050 // Initialize may have multiple NarrowMem projections. They would all match to identical raw mem MachProjs.
1051 // We don't need multiple MachProjs. Create one if none already exist, otherwise use existing one.
1052 m = n->in(0)->as_Initialize()->mem_mach_proj();
1053 if (m == nullptr && has_new_node(n->in(0))) {
1054 InitializeNode* new_init = new_node(n->in(0))->as_Initialize();
1055 m = new_init->mem_mach_proj();
1056 }
1057 assert(m == nullptr || m->is_MachProj(), "no mem projection yet or a MachProj created during matching");
1058 }
1059 if (m == nullptr) {
1060 // Convert to machine-dependent projection
1061 m = n->in(0)->as_Multi()->match( n->as_Proj(), this );
1062 NOT_PRODUCT(record_new2old(m, n);)
1063 }
1064 if (m->in(0) != nullptr) // m might be top
1065 collect_null_checks(m, n);
1066 } else { // Else just a regular 'ol guy
1067 m = n->clone(); // So just clone into new-space
1068 NOT_PRODUCT(record_new2old(m, n);)
1069 // Def-Use edges will be added incrementally as Uses
1070 // of this node are matched.
1071 assert(m->outcnt() == 0, "no Uses of this clone yet");
1072 }
1073 }
1074
1075 set_new_node(n, m); // Map old to new
1076 if (_old_node_note_array != nullptr) {
1077 Node_Notes* nn = C->locate_node_notes(_old_node_note_array,
1078 n->_idx);
1079 C->set_node_notes_at(m->_idx, nn);
1080 }
1081 DEBUG_ONLY(match_alias_type(C, n, m));
1082 }
1083 n = m; // n is now a new-space node
1084 mstack.set_node(n);
1085 }
1086
1087 // New space!
1088 if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty())
1089
1090 int i;
1091 // Put precedence edges on stack first (match them last).
1092 for (i = oldn->req(); (uint)i < oldn->len(); i++) {
1093 Node *m = oldn->in(i);
1094 if (m == nullptr) break;
1095 // set -1 to call add_prec() instead of set_req() during Step1
1096 mstack.push(m, Visit, n, -1);
1097 }
1098
1099 // Handle precedence edges for interior nodes
1100 for (i = n->len()-1; (uint)i >= n->req(); i--) {
1101 Node *m = n->in(i);
1102 if (m == nullptr || C->node_arena()->contains(m)) continue;
1103 n->rm_prec(i);
1104 // set -1 to call add_prec() instead of set_req() during Step1
1105 mstack.push(m, Visit, n, -1);
1106 }
1107
1108 // For constant debug info, I'd rather have unmatched constants.
1109 int cnt = n->req();
1110 JVMState* jvms = n->jvms();
1111 int debug_cnt = jvms ? jvms->debug_start() : cnt;
1112
1113 // Now do only debug info. Clone constants rather than matching.
1114 // Constants are represented directly in the debug info without
1115 // the need for executable machine instructions.
1116 // Monitor boxes are also represented directly.
1117 for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do
1118 Node *m = n->in(i); // Get input
1119 int op = m->Opcode();
1120 assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites");
1121 if( op == Op_ConI || op == Op_ConP || op == Op_ConN || op == Op_ConNKlass ||
1122 op == Op_ConF || op == Op_ConD || op == Op_ConL
1123 // || op == Op_BoxLock // %%%% enable this and remove (+++) in chaitin.cpp
1124 ) {
1125 m = m->clone();
1126 NOT_PRODUCT(record_new2old(m, n));
1127 mstack.push(m, Post_Visit, n, i); // Don't need to visit
1128 mstack.push(m->in(0), Visit, m, 0);
1129 } else {
1130 mstack.push(m, Visit, n, i);
1131 }
1132 }
1133
1134 // And now walk his children, and convert his inputs to new-space.
1135 for( ; i >= 0; --i ) { // For all normal inputs do
1136 Node *m = n->in(i); // Get input
1137 if(m != nullptr)
1138 mstack.push(m, Visit, n, i);
1139 }
1140
1141 }
1142 else if (nstate == Post_Visit) {
1143 // Set xformed input
1144 Node *p = mstack.parent();
1145 if (p != nullptr) { // root doesn't have parent
1146 int i = (int)mstack.index();
1147 if (i >= 0)
1148 p->set_req(i, n); // required input
1149 else if (i == -1)
1150 p->add_prec(n); // precedence input
1151 else
1152 ShouldNotReachHere();
1153 }
1154 mstack.pop(); // remove processed node from stack
1155 }
1156 else {
1157 ShouldNotReachHere();
1158 }
1159 } // while (mstack.is_nonempty())
1160 return n; // Return new-space Node
1161 }
1162
1163 //------------------------------warp_outgoing_stk_arg------------------------
1164 OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) {
1165 // Convert outgoing argument location to a pre-biased stack offset
1166 if (reg->is_stack()) {
1167 OptoReg::Name warped = reg->reg2stack();
1168 // Adjust the stack slot offset to be the register number used
1169 // by the allocator.
1170 warped = OptoReg::add(begin_out_arg_area, warped);
1171 // Keep track of the largest numbered stack slot used for an arg.
1172 // Largest used slot per call-site indicates the amount of stack
1173 // that is killed by the call.
1174 if (warped >= out_arg_limit_per_call) {
1175 out_arg_limit_per_call = OptoReg::add(warped, 1);
1176 }
1177 return warped;
1178 }
1179 return OptoReg::as_OptoReg(reg);
1180 }
1181
1182
1183 //------------------------------match_sfpt-------------------------------------
1184 // Helper function to match call instructions. Calls match special.
1185 // They match alone with no children. Their children, the incoming
1186 // arguments, match normally.
1187 MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
1188 MachSafePointNode *msfpt = nullptr;
1189 MachCallNode *mcall = nullptr;
1190 uint cnt;
1191 // Split out case for SafePoint vs Call
1192 CallNode *call;
1193 const TypeTuple *domain;
1194 ciMethod* method = nullptr;
1195 if( sfpt->is_Call() ) {
1196 call = sfpt->as_Call();
1197 domain = call->tf()->domain();
1198 cnt = domain->cnt();
1199
1200 // Match just the call, nothing else
1201 MachNode *m = match_tree(call);
1202 if (C->failing()) return nullptr;
1203 if( m == nullptr ) { Matcher::soft_match_failure(); return nullptr; }
1204
1205 // Copy data from the Ideal SafePoint to the machine version
1206 mcall = m->as_MachCall();
1207
1208 mcall->set_tf( call->tf());
1209 mcall->set_entry_point( call->entry_point());
1210 mcall->set_cnt( call->cnt());
1211 mcall->set_guaranteed_safepoint(call->guaranteed_safepoint());
1212
1213 if( mcall->is_MachCallJava() ) {
1214 MachCallJavaNode *mcall_java = mcall->as_MachCallJava();
1215 const CallJavaNode *call_java = call->as_CallJava();
1216 assert(call_java->validate_symbolic_info(), "inconsistent info");
1217 method = call_java->method();
1218 mcall_java->_method = method;
1219 mcall_java->_optimized_virtual = call_java->is_optimized_virtual();
1220 mcall_java->_override_symbolic_info = call_java->override_symbolic_info();
1221 mcall_java->_arg_escape = call_java->arg_escape();
1222 if( mcall_java->is_MachCallStaticJava() )
1223 mcall_java->as_MachCallStaticJava()->_name =
1224 call_java->as_CallStaticJava()->_name;
1225 if( mcall_java->is_MachCallDynamicJava() )
1226 mcall_java->as_MachCallDynamicJava()->_vtable_index =
1227 call_java->as_CallDynamicJava()->_vtable_index;
1228 }
1229 else if( mcall->is_MachCallRuntime() ) {
1230 MachCallRuntimeNode* mach_call_rt = mcall->as_MachCallRuntime();
1231 mach_call_rt->_name = call->as_CallRuntime()->_name;
1232 mach_call_rt->_leaf_no_fp = call->is_CallLeafNoFP();
1233 }
1234 msfpt = mcall;
1235 }
1236 // This is a non-call safepoint
1237 else {
1238 call = nullptr;
1239 domain = nullptr;
1240 MachNode *mn = match_tree(sfpt);
1241 if (C->failing()) return nullptr;
1242 msfpt = mn->as_MachSafePoint();
1243 cnt = TypeFunc::Parms;
1244 }
1245 msfpt->_has_ea_local_in_scope = sfpt->has_ea_local_in_scope();
1246
1247 // Advertise the correct memory effects (for anti-dependence computation).
1248 msfpt->set_adr_type(sfpt->adr_type());
1249
1250 // Allocate a private array of RegMasks. These RegMasks are not shared.
1251 msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt );
1252 // Empty them all.
1253 for (uint i = 0; i < cnt; i++) {
1254 ::new (msfpt->_in_rms + i) RegMask(C->comp_arena());
1255 }
1256
1257 // Do all the pre-defined non-Empty register masks
1258 msfpt->_in_rms[TypeFunc::ReturnAdr].assignFrom(_return_addr_mask);
1259 msfpt->_in_rms[TypeFunc::FramePtr ].assignFrom(c_frame_ptr_mask);
1260
1261 // Place first outgoing argument can possibly be put.
1262 OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
1263 assert( is_even(begin_out_arg_area), "" );
1264 // Compute max outgoing register number per call site.
1265 OptoReg::Name out_arg_limit_per_call = begin_out_arg_area;
1266 // Calls to C may hammer extra stack slots above and beyond any arguments.
1267 // These are usually backing store for register arguments for varargs.
1268 if( call != nullptr && call->is_CallRuntime() )
1269 out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed());
1270
1271
1272 // Do the normal argument list (parameters) register masks
1273 int argcnt = cnt - TypeFunc::Parms;
1274 if( argcnt > 0 ) { // Skip it all if we have no args
1275 BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt );
1276 VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
1277 int i;
1278 for( i = 0; i < argcnt; i++ ) {
1279 sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
1280 }
1281 // V-call to pick proper calling convention
1282 call->calling_convention( sig_bt, parm_regs, argcnt );
1283
1284 #ifdef ASSERT
1285 // Sanity check users' calling convention. Really handy during
1286 // the initial porting effort. Fairly expensive otherwise.
1287 { for (int i = 0; i<argcnt; i++) {
1288 if( !parm_regs[i].first()->is_valid() &&
1289 !parm_regs[i].second()->is_valid() ) continue;
1290 VMReg reg1 = parm_regs[i].first();
1291 VMReg reg2 = parm_regs[i].second();
1292 for (int j = 0; j < i; j++) {
1293 if( !parm_regs[j].first()->is_valid() &&
1294 !parm_regs[j].second()->is_valid() ) continue;
1295 VMReg reg3 = parm_regs[j].first();
1296 VMReg reg4 = parm_regs[j].second();
1297 if( !reg1->is_valid() ) {
1298 assert( !reg2->is_valid(), "valid halvsies" );
1299 } else if( !reg3->is_valid() ) {
1300 assert( !reg4->is_valid(), "valid halvsies" );
1301 } else {
1302 assert( reg1 != reg2, "calling conv. must produce distinct regs");
1303 assert( reg1 != reg3, "calling conv. must produce distinct regs");
1304 assert( reg1 != reg4, "calling conv. must produce distinct regs");
1305 assert( reg2 != reg3, "calling conv. must produce distinct regs");
1306 assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs");
1307 assert( reg3 != reg4, "calling conv. must produce distinct regs");
1308 }
1309 }
1310 }
1311 }
1312 #endif
1313
1314 // Visit each argument. Compute its outgoing register mask.
1315 // Return results now can have 2 bits returned.
1316 // Compute max over all outgoing arguments both per call-site
1317 // and over the entire method.
1318 for( i = 0; i < argcnt; i++ ) {
1319 // Address of incoming argument mask to fill in
1320 RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms];
1321 VMReg first = parm_regs[i].first();
1322 VMReg second = parm_regs[i].second();
1323 if(!first->is_valid() &&
1324 !second->is_valid()) {
1325 continue; // Avoid Halves
1326 }
1327 // Handle case where arguments are in vector registers.
1328 if(call->in(TypeFunc::Parms + i)->bottom_type()->isa_vect()) {
1329 OptoReg::Name reg_fst = OptoReg::as_OptoReg(first);
1330 OptoReg::Name reg_snd = OptoReg::as_OptoReg(second);
1331 assert (reg_fst <= reg_snd, "fst=%d snd=%d", reg_fst, reg_snd);
1332 for (OptoReg::Name r = reg_fst; r <= reg_snd; r++) {
1333 rm->insert(r);
1334 }
1335 }
1336 // Grab first register, adjust stack slots and insert in mask.
1337 OptoReg::Name reg1 = warp_outgoing_stk_arg(first, begin_out_arg_area, out_arg_limit_per_call );
1338 if (OptoReg::is_valid(reg1))
1339 rm->insert(reg1);
1340 // Grab second register (if any), adjust stack slots and insert in mask.
1341 OptoReg::Name reg2 = warp_outgoing_stk_arg(second, begin_out_arg_area, out_arg_limit_per_call );
1342 if (OptoReg::is_valid(reg2))
1343 rm->insert(reg2);
1344 } // End of for all arguments
1345 }
1346
1347 // Compute the max stack slot killed by any call. These will not be
1348 // available for debug info, and will be used to adjust FIRST_STACK_mask
1349 // after all call sites have been visited.
1350 if( _out_arg_limit < out_arg_limit_per_call)
1351 _out_arg_limit = out_arg_limit_per_call;
1352
1353 if (mcall) {
1354 // Kill the outgoing argument area, including any non-argument holes and
1355 // any legacy C-killed slots. Use Fat-Projections to do the killing.
1356 // Since the max-per-method covers the max-per-call-site and debug info
1357 // is excluded on the max-per-method basis, debug info cannot land in
1358 // this killed area.
1359 uint r_cnt = mcall->tf()->range()->cnt();
1360 MachProjNode* proj = new MachProjNode(mcall, r_cnt + 10000, RegMask::EMPTY, MachProjNode::fat_proj);
1361 for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) {
1362 proj->_rout.insert(OptoReg::Name(i));
1363 }
1364 if (!proj->_rout.is_empty()) {
1365 push_projection(proj);
1366 }
1367 }
1368 // Transfer the safepoint information from the call to the mcall
1369 // Move the JVMState list
1370 msfpt->set_jvms(sfpt->jvms());
1371 for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) {
1372 jvms->set_map(sfpt);
1373 }
1374
1375 // Debug inputs begin just after the last incoming parameter
1376 assert((mcall == nullptr) || (mcall->jvms() == nullptr) ||
1377 (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "");
1378
1379 // Add additional edges.
1380 if (msfpt->mach_constant_base_node_input() != (uint)-1 && !msfpt->is_MachCallLeaf()) {
1381 // For these calls we can not add MachConstantBase in expand(), as the
1382 // ins are not complete then.
1383 msfpt->ins_req(msfpt->mach_constant_base_node_input(), C->mach_constant_base_node());
1384 if (msfpt->jvms() &&
1385 msfpt->mach_constant_base_node_input() <= msfpt->jvms()->debug_start() + msfpt->_jvmadj) {
1386 // We added an edge before jvms, so we must adapt the position of the ins.
1387 msfpt->jvms()->adapt_position(+1);
1388 }
1389 }
1390
1391 // Registers killed by the call are set in the local scheduling pass
1392 // of Global Code Motion.
1393 return msfpt;
1394 }
1395
1396 //---------------------------match_tree----------------------------------------
1397 // Match a Ideal Node DAG - turn it into a tree; Label & Reduce. Used as part
1398 // of the whole-sale conversion from Ideal to Mach Nodes. Also used for
1399 // making GotoNodes while building the CFG and in init_spill_mask() to identify
1400 // a Load's result RegMask for memoization in idealreg2regmask[]
1401 MachNode *Matcher::match_tree( const Node *n ) {
1402 assert( n->Opcode() != Op_Phi, "cannot match" );
1403 assert( !n->is_block_start(), "cannot match" );
1404 // Set the mark for all locally allocated State objects.
1405 // When this call returns, the _states_arena arena will be reset
1406 // freeing all State objects.
1407 ResourceMark rm( &_states_arena );
1408
1409 LabelRootDepth = 0;
1410
1411 // StoreNodes require their Memory input to match any LoadNodes
1412 Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ;
1413 #ifdef ASSERT
1414 Node* save_mem_node = _mem_node;
1415 _mem_node = n->is_Store() ? (Node*)n : nullptr;
1416 #endif
1417 // State object for root node of match tree
1418 // Allocate it on _states_arena - stack allocation can cause stack overflow.
1419 State *s = new (&_states_arena) State;
1420 s->_kids[0] = nullptr;
1421 s->_kids[1] = nullptr;
1422 s->_leaf = (Node*)n;
1423 // Label the input tree, allocating labels from top-level arena
1424 Node* root_mem = mem;
1425 Label_Root(n, s, n->in(0), root_mem);
1426 if (C->failing()) return nullptr;
1427
1428 // The minimum cost match for the whole tree is found at the root State
1429 uint mincost = max_juint;
1430 uint cost = max_juint;
1431 uint i;
1432 for (i = 0; i < NUM_OPERANDS; i++) {
1433 if (s->valid(i) && // valid entry and
1434 s->cost(i) < cost && // low cost and
1435 s->rule(i) >= NUM_OPERANDS) {// not an operand
1436 mincost = i;
1437 cost = s->cost(i);
1438 }
1439 }
1440 if (mincost == max_juint) {
1441 #ifndef PRODUCT
1442 tty->print("No matching rule for:");
1443 s->dump();
1444 #endif
1445 Matcher::soft_match_failure();
1446 return nullptr;
1447 }
1448 // Reduce input tree based upon the state labels to machine Nodes
1449 MachNode *m = ReduceInst(s, s->rule(mincost), mem);
1450 // New-to-old mapping is done in ReduceInst, to cover complex instructions.
1451 NOT_PRODUCT(_old2new_map.map(n->_idx, m);)
1452
1453 // Add any Matcher-ignored edges
1454 uint cnt = n->req();
1455 uint start = 1;
1456 if( mem != (Node*)1 ) start = MemNode::Memory+1;
1457 if( n->is_AddP() ) {
1458 assert( mem == (Node*)1, "" );
1459 start = AddPNode::Base+1;
1460 }
1461 for( i = start; i < cnt; i++ ) {
1462 if( !n->match_edge(i) ) {
1463 if( i < m->req() )
1464 m->ins_req( i, n->in(i) );
1465 else
1466 m->add_req( n->in(i) );
1467 }
1468 }
1469
1470 DEBUG_ONLY( _mem_node = save_mem_node; )
1471 return m;
1472 }
1473
1474
1475 //------------------------------match_into_reg---------------------------------
1476 // Choose to either match this Node in a register or part of the current
1477 // match tree. Return true for requiring a register and false for matching
1478 // as part of the current match tree.
1479 static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) {
1480
1481 const Type *t = m->bottom_type();
1482
1483 if (t->singleton()) {
1484 // Never force constants into registers. Allow them to match as
1485 // constants or registers. Copies of the same value will share
1486 // the same register. See find_shared_node.
1487 return false;
1488 } else { // Not a constant
1489 if (!shared && Matcher::is_encode_and_store_pattern(n, m)) {
1490 // Make it possible to match "encode and store" patterns with non-shared
1491 // encode operations that are pinned to a control node (e.g. by CastPP
1492 // node removal in final graph reshaping). The matched instruction cannot
1493 // float above the encode's control node because it is pinned to the
1494 // store's control node.
1495 return false;
1496 }
1497 // Stop recursion if they have different Controls.
1498 Node* m_control = m->in(0);
1499 // Control of load's memory can post-dominates load's control.
1500 // So use it since load can't float above its memory.
1501 Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : nullptr;
1502 if (control && m_control && control != m_control && control != mem_control) {
1503
1504 // Actually, we can live with the most conservative control we
1505 // find, if it post-dominates the others. This allows us to
1506 // pick up load/op/store trees where the load can float a little
1507 // above the store.
1508 Node *x = control;
1509 const uint max_scan = 6; // Arbitrary scan cutoff
1510 uint j;
1511 for (j=0; j<max_scan; j++) {
1512 if (x->is_Region()) // Bail out at merge points
1513 return true;
1514 x = x->in(0);
1515 if (x == m_control) // Does 'control' post-dominate
1516 break; // m->in(0)? If so, we can use it
1517 if (x == mem_control) // Does 'control' post-dominate
1518 break; // mem_control? If so, we can use it
1519 }
1520 if (j == max_scan) // No post-domination before scan end?
1521 return true; // Then break the match tree up
1522 }
1523 if ((m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) ||
1524 (m->is_DecodeNKlass() && Matcher::narrow_klass_use_complex_address())) {
1525 // These are commonly used in address expressions and can
1526 // efficiently fold into them on X64 in some cases.
1527 return false;
1528 }
1529 }
1530
1531 // Not forceable cloning. If shared, put it into a register.
1532 return shared;
1533 }
1534
1535
1536 //------------------------------Instruction Selection--------------------------
1537 // Label method walks a "tree" of nodes, using the ADLC generated DFA to match
1538 // ideal nodes to machine instructions. Trees are delimited by shared Nodes,
1539 // things the Matcher does not match (e.g., Memory), and things with different
1540 // Controls (hence forced into different blocks). We pass in the Control
1541 // selected for this entire State tree.
1542
1543 // The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the
1544 // Store and the Load must have identical Memories (as well as identical
1545 // pointers). Since the Matcher does not have anything for Memory (and
1546 // does not handle DAGs), I have to match the Memory input myself. If the
1547 // Tree root is a Store or if there are multiple Loads in the tree, I require
1548 // all Loads to have the identical memory.
1549 Node* Matcher::Label_Root(const Node* n, State* svec, Node* control, Node*& mem) {
1550 // Since Label_Root is a recursive function, its possible that we might run
1551 // out of stack space. See bugs 6272980 & 6227033 for more info.
1552 LabelRootDepth++;
1553 if (LabelRootDepth > MaxLabelRootDepth) {
1554 // Bailout. Can for example be hit with a deep chain of operations.
1555 C->record_method_not_compilable("Out of stack space, increase MaxLabelRootDepth");
1556 return nullptr;
1557 }
1558 uint care = 0; // Edges matcher cares about
1559 uint cnt = n->req();
1560 uint i = 0;
1561
1562 // Examine children for memory state
1563 // Can only subsume a child into your match-tree if that child's memory state
1564 // is not modified along the path to another input.
1565 // It is unsafe even if the other inputs are separate roots.
1566 Node *input_mem = nullptr;
1567 for( i = 1; i < cnt; i++ ) {
1568 if( !n->match_edge(i) ) continue;
1569 Node *m = n->in(i); // Get ith input
1570 assert( m, "expect non-null children" );
1571 if( m->is_Load() ) {
1572 if( input_mem == nullptr ) {
1573 input_mem = m->in(MemNode::Memory);
1574 if (mem == (Node*)1) {
1575 // Save this memory to bail out if there's another memory access
1576 // to a different memory location in the same tree.
1577 mem = input_mem;
1578 }
1579 } else if( input_mem != m->in(MemNode::Memory) ) {
1580 input_mem = NodeSentinel;
1581 }
1582 }
1583 }
1584
1585 for( i = 1; i < cnt; i++ ){// For my children
1586 if( !n->match_edge(i) ) continue;
1587 Node *m = n->in(i); // Get ith input
1588 // Allocate states out of a private arena
1589 State *s = new (&_states_arena) State;
1590 svec->_kids[care++] = s;
1591 assert( care <= 2, "binary only for now" );
1592
1593 // Recursively label the State tree.
1594 s->_kids[0] = nullptr;
1595 s->_kids[1] = nullptr;
1596 s->_leaf = m;
1597
1598 // Check for leaves of the State Tree; things that cannot be a part of
1599 // the current tree. If it finds any, that value is matched as a
1600 // register operand. If not, then the normal matching is used.
1601 if( match_into_reg(n, m, control, i, is_shared(m)) ||
1602 // Stop recursion if this is a LoadNode and there is another memory access
1603 // to a different memory location in the same tree (for example, a StoreNode
1604 // at the root of this tree or another LoadNode in one of the children).
1605 ((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ||
1606 // Can NOT include the match of a subtree when its memory state
1607 // is used by any of the other subtrees
1608 (input_mem == NodeSentinel) ) {
1609 // Print when we exclude matching due to different memory states at input-loads
1610 if (PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
1611 && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem)) {
1612 tty->print_cr("invalid input_mem");
1613 }
1614 // Switch to a register-only opcode; this value must be in a register
1615 // and cannot be subsumed as part of a larger instruction.
1616 s->DFA( m->ideal_reg(), m );
1617
1618 } else {
1619 // If match tree has no control and we do, adopt it for entire tree
1620 if( control == nullptr && m->in(0) != nullptr && m->req() > 1 )
1621 control = m->in(0); // Pick up control
1622 // Else match as a normal part of the match tree.
1623 control = Label_Root(m, s, control, mem);
1624 if (C->failing()) return nullptr;
1625 }
1626 }
1627
1628 // Call DFA to match this node, and return
1629 svec->DFA( n->Opcode(), n );
1630
1631 uint x;
1632 for( x = 0; x < _LAST_MACH_OPER; x++ )
1633 if( svec->valid(x) )
1634 break;
1635
1636 if (x >= _LAST_MACH_OPER) {
1637 #ifdef ASSERT
1638 n->dump();
1639 svec->dump();
1640 #endif
1641 assert( false, "bad AD file" );
1642 C->record_failure("bad AD file");
1643 }
1644 return control;
1645 }
1646
1647
1648 // Con nodes reduced using the same rule can share their MachNode
1649 // which reduces the number of copies of a constant in the final
1650 // program. The register allocator is free to split uses later to
1651 // split live ranges.
1652 MachNode* Matcher::find_shared_node(Node* leaf, uint rule) {
1653 if (!leaf->is_Con() && !leaf->is_DecodeNarrowPtr()) return nullptr;
1654
1655 // See if this Con has already been reduced using this rule.
1656 if (_shared_nodes.max() <= leaf->_idx) return nullptr;
1657 MachNode* last = (MachNode*)_shared_nodes.at(leaf->_idx);
1658 if (last != nullptr && rule == last->rule()) {
1659 // Don't expect control change for DecodeN
1660 if (leaf->is_DecodeNarrowPtr())
1661 return last;
1662 // Get the new space root.
1663 Node* xroot = new_node(C->root());
1664 if (xroot == nullptr) {
1665 // This shouldn't happen give the order of matching.
1666 return nullptr;
1667 }
1668
1669 // Shared constants need to have their control be root so they
1670 // can be scheduled properly.
1671 Node* control = last->in(0);
1672 if (control != xroot) {
1673 if (control == nullptr || control == C->root()) {
1674 last->set_req(0, xroot);
1675 } else {
1676 assert(false, "unexpected control");
1677 return nullptr;
1678 }
1679 }
1680 return last;
1681 }
1682 return nullptr;
1683 }
1684
1685
1686 //------------------------------ReduceInst-------------------------------------
1687 // Reduce a State tree (with given Control) into a tree of MachNodes.
1688 // This routine (and it's cohort ReduceOper) convert Ideal Nodes into
1689 // complicated machine Nodes. Each MachNode covers some tree of Ideal Nodes.
1690 // Each MachNode has a number of complicated MachOper operands; each
1691 // MachOper also covers a further tree of Ideal Nodes.
1692
1693 // The root of the Ideal match tree is always an instruction, so we enter
1694 // the recursion here. After building the MachNode, we need to recurse
1695 // the tree checking for these cases:
1696 // (1) Child is an instruction -
1697 // Build the instruction (recursively), add it as an edge.
1698 // Build a simple operand (register) to hold the result of the instruction.
1699 // (2) Child is an interior part of an instruction -
1700 // Skip over it (do nothing)
1701 // (3) Child is the start of a operand -
1702 // Build the operand, place it inside the instruction
1703 // Call ReduceOper.
1704 MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
1705 assert( rule >= NUM_OPERANDS, "called with operand rule" );
1706
1707 MachNode* shared_node = find_shared_node(s->_leaf, rule);
1708 if (shared_node != nullptr) {
1709 return shared_node;
1710 }
1711
1712 // Build the object to represent this state & prepare for recursive calls
1713 MachNode *mach = s->MachNodeGenerator(rule);
1714 guarantee(mach != nullptr, "Missing MachNode");
1715 mach->_opnds[0] = s->MachOperGenerator(_reduceOp[rule]);
1716 assert( mach->_opnds[0] != nullptr, "Missing result operand" );
1717 Node *leaf = s->_leaf;
1718 NOT_PRODUCT(record_new2old(mach, leaf);)
1719 // Check for instruction or instruction chain rule
1720 if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) {
1721 assert(C->node_arena()->contains(s->_leaf) || !has_new_node(s->_leaf),
1722 "duplicating node that's already been matched");
1723 // Instruction
1724 mach->add_req( leaf->in(0) ); // Set initial control
1725 // Reduce interior of complex instruction
1726 ReduceInst_Interior( s, rule, mem, mach, 1 );
1727 } else {
1728 // Instruction chain rules are data-dependent on their inputs
1729 mach->add_req(nullptr); // Set initial control to none
1730 ReduceInst_Chain_Rule( s, rule, mem, mach );
1731 }
1732
1733 // If a Memory was used, insert a Memory edge
1734 if( mem != (Node*)1 ) {
1735 mach->ins_req(MemNode::Memory,mem);
1736 #ifdef ASSERT
1737 // Verify adr type after matching memory operation
1738 const MachOper* oper = mach->memory_operand();
1739 if (oper != nullptr && oper != (MachOper*)-1) {
1740 // It has a unique memory operand. Find corresponding ideal mem node.
1741 Node* m = nullptr;
1742 if (leaf->is_Mem()) {
1743 m = leaf;
1744 } else {
1745 m = _mem_node;
1746 assert(m != nullptr && m->is_Mem(), "expecting memory node");
1747 }
1748 const Type* mach_at = mach->adr_type();
1749 // DecodeN node consumed by an address may have different type
1750 // than its input. Don't compare types for such case.
1751 if (m->adr_type() != mach_at &&
1752 (m->in(MemNode::Address)->is_DecodeNarrowPtr() ||
1753 (m->in(MemNode::Address)->is_AddP() &&
1754 m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeNarrowPtr()) ||
1755 (m->in(MemNode::Address)->is_AddP() &&
1756 m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() &&
1757 m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeNarrowPtr()))) {
1758 mach_at = m->adr_type();
1759 }
1760 if (m->adr_type() != mach_at) {
1761 m->dump();
1762 tty->print_cr("mach:");
1763 mach->dump(1);
1764 }
1765 assert(m->adr_type() == mach_at, "matcher should not change adr type");
1766 }
1767 #endif
1768 }
1769
1770 // If the _leaf is an AddP, insert the base edge
1771 if (leaf->is_AddP()) {
1772 mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
1773 }
1774
1775 uint number_of_projections_prior = number_of_projections();
1776
1777 // Perform any 1-to-many expansions required
1778 MachNode *ex = mach->Expand(s, _projection_list, mem);
1779 if (ex != mach) {
1780 assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
1781 if( ex->in(1)->is_Con() )
1782 ex->in(1)->set_req(0, C->root());
1783 // Remove old node from the graph
1784 for( uint i=0; i<mach->req(); i++ ) {
1785 mach->set_req(i,nullptr);
1786 }
1787 NOT_PRODUCT(record_new2old(ex, s->_leaf);)
1788 }
1789
1790 // PhaseChaitin::fixup_spills will sometimes generate spill code
1791 // via the matcher. By the time, nodes have been wired into the CFG,
1792 // and any further nodes generated by expand rules will be left hanging
1793 // in space, and will not get emitted as output code. Catch this.
1794 // Also, catch any new register allocation constraints ("projections")
1795 // generated belatedly during spill code generation.
1796 if (_allocation_started) {
1797 guarantee(ex == mach, "no expand rules during spill generation");
1798 guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation");
1799 }
1800
1801 if (leaf->is_Con() || leaf->is_DecodeNarrowPtr()) {
1802 // Record the con for sharing
1803 _shared_nodes.map(leaf->_idx, ex);
1804 }
1805
1806 // Have mach nodes inherit GC barrier data
1807 mach->set_barrier_data(MemNode::barrier_data(leaf));
1808
1809 return ex;
1810 }
1811
1812 void Matcher::handle_precedence_edges(Node* n, MachNode *mach) {
1813 for (uint i = n->req(); i < n->len(); i++) {
1814 if (n->in(i) != nullptr) {
1815 mach->add_prec(n->in(i));
1816 }
1817 }
1818 }
1819
1820 void Matcher::ReduceInst_Chain_Rule(State* s, int rule, Node* &mem, MachNode* mach) {
1821 // 'op' is what I am expecting to receive
1822 int op = _leftOp[rule];
1823 // Operand type to catch childs result
1824 // This is what my child will give me.
1825 unsigned int opnd_class_instance = s->rule(op);
1826 // Choose between operand class or not.
1827 // This is what I will receive.
1828 int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op;
1829 // New rule for child. Chase operand classes to get the actual rule.
1830 unsigned int newrule = s->rule(catch_op);
1831
1832 if (newrule < NUM_OPERANDS) {
1833 // Chain from operand or operand class, may be output of shared node
1834 assert(opnd_class_instance < NUM_OPERANDS, "Bad AD file: Instruction chain rule must chain from operand");
1835 // Insert operand into array of operands for this instruction
1836 mach->_opnds[1] = s->MachOperGenerator(opnd_class_instance);
1837
1838 ReduceOper(s, newrule, mem, mach);
1839 } else {
1840 // Chain from the result of an instruction
1841 assert(newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
1842 mach->_opnds[1] = s->MachOperGenerator(_reduceOp[catch_op]);
1843 Node *mem1 = (Node*)1;
1844 DEBUG_ONLY(Node *save_mem_node = _mem_node;)
1845 mach->add_req( ReduceInst(s, newrule, mem1) );
1846 DEBUG_ONLY(_mem_node = save_mem_node;)
1847 }
1848 return;
1849 }
1850
1851
1852 uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) {
1853 handle_precedence_edges(s->_leaf, mach);
1854
1855 if( s->_leaf->is_Load() ) {
1856 Node *mem2 = s->_leaf->in(MemNode::Memory);
1857 assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" );
1858 DEBUG_ONLY( if( mem == (Node*)1 ) _mem_node = s->_leaf;)
1859 mem = mem2;
1860 }
1861 if( s->_leaf->in(0) != nullptr && s->_leaf->req() > 1) {
1862 if( mach->in(0) == nullptr )
1863 mach->set_req(0, s->_leaf->in(0));
1864 }
1865
1866 // Now recursively walk the state tree & add operand list.
1867 for( uint i=0; i<2; i++ ) { // binary tree
1868 State *newstate = s->_kids[i];
1869 if( newstate == nullptr ) break; // Might only have 1 child
1870 // 'op' is what I am expecting to receive
1871 int op;
1872 if( i == 0 ) {
1873 op = _leftOp[rule];
1874 } else {
1875 op = _rightOp[rule];
1876 }
1877 // Operand type to catch childs result
1878 // This is what my child will give me.
1879 int opnd_class_instance = newstate->rule(op);
1880 // Choose between operand class or not.
1881 // This is what I will receive.
1882 int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op;
1883 // New rule for child. Chase operand classes to get the actual rule.
1884 int newrule = newstate->rule(catch_op);
1885
1886 if (newrule < NUM_OPERANDS) { // Operand/operandClass or internalOp/instruction?
1887 // Operand/operandClass
1888 // Insert operand into array of operands for this instruction
1889 mach->_opnds[num_opnds++] = newstate->MachOperGenerator(opnd_class_instance);
1890 ReduceOper(newstate, newrule, mem, mach);
1891
1892 } else { // Child is internal operand or new instruction
1893 if (newrule < _LAST_MACH_OPER) { // internal operand or instruction?
1894 // internal operand --> call ReduceInst_Interior
1895 // Interior of complex instruction. Do nothing but recurse.
1896 num_opnds = ReduceInst_Interior(newstate, newrule, mem, mach, num_opnds);
1897 } else {
1898 // instruction --> call build operand( ) to catch result
1899 // --> ReduceInst( newrule )
1900 mach->_opnds[num_opnds++] = s->MachOperGenerator(_reduceOp[catch_op]);
1901 Node *mem1 = (Node*)1;
1902 DEBUG_ONLY(Node *save_mem_node = _mem_node;)
1903 mach->add_req( ReduceInst( newstate, newrule, mem1 ) );
1904 DEBUG_ONLY(_mem_node = save_mem_node;)
1905 }
1906 }
1907 assert( mach->_opnds[num_opnds-1], "" );
1908 }
1909 return num_opnds;
1910 }
1911
1912 // This routine walks the interior of possible complex operands.
1913 // At each point we check our children in the match tree:
1914 // (1) No children -
1915 // We are a leaf; add _leaf field as an input to the MachNode
1916 // (2) Child is an internal operand -
1917 // Skip over it ( do nothing )
1918 // (3) Child is an instruction -
1919 // Call ReduceInst recursively and
1920 // and instruction as an input to the MachNode
1921 void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) {
1922 assert( rule < _LAST_MACH_OPER, "called with operand rule" );
1923 State *kid = s->_kids[0];
1924 assert( kid == nullptr || s->_leaf->in(0) == nullptr, "internal operands have no control" );
1925
1926 // Leaf? And not subsumed?
1927 if( kid == nullptr && !_swallowed[rule] ) {
1928 mach->add_req( s->_leaf ); // Add leaf pointer
1929 return; // Bail out
1930 }
1931
1932 if( s->_leaf->is_Load() ) {
1933 assert( mem == (Node*)1, "multiple Memories being matched at once?" );
1934 mem = s->_leaf->in(MemNode::Memory);
1935 DEBUG_ONLY(_mem_node = s->_leaf;)
1936 }
1937
1938 handle_precedence_edges(s->_leaf, mach);
1939
1940 if( s->_leaf->in(0) && s->_leaf->req() > 1) {
1941 if( !mach->in(0) )
1942 mach->set_req(0,s->_leaf->in(0));
1943 else {
1944 assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" );
1945 }
1946 }
1947
1948 for (uint i = 0; kid != nullptr && i < 2; kid = s->_kids[1], i++) { // binary tree
1949 int newrule;
1950 if( i == 0) {
1951 newrule = kid->rule(_leftOp[rule]);
1952 } else {
1953 newrule = kid->rule(_rightOp[rule]);
1954 }
1955
1956 if (newrule < _LAST_MACH_OPER) { // Operand or instruction?
1957 // Internal operand; recurse but do nothing else
1958 ReduceOper(kid, newrule, mem, mach);
1959
1960 } else { // Child is a new instruction
1961 // Reduce the instruction, and add a direct pointer from this
1962 // machine instruction to the newly reduced one.
1963 Node *mem1 = (Node*)1;
1964 DEBUG_ONLY(Node *save_mem_node = _mem_node;)
1965 mach->add_req( ReduceInst( kid, newrule, mem1 ) );
1966 DEBUG_ONLY(_mem_node = save_mem_node;)
1967 }
1968 }
1969 }
1970
1971
1972 // -------------------------------------------------------------------------
1973 // Java-Java calling convention
1974 // (what you use when Java calls Java)
1975
1976 //------------------------------find_receiver----------------------------------
1977 // For a given signature, return the OptoReg for parameter 0.
1978 OptoReg::Name Matcher::find_receiver() {
1979 VMRegPair regs;
1980 BasicType sig_bt = T_OBJECT;
1981 SharedRuntime::java_calling_convention(&sig_bt, ®s, 1);
1982 // Return argument 0 register. In the LP64 build pointers
1983 // take 2 registers, but the VM wants only the 'main' name.
1984 return OptoReg::as_OptoReg(regs.first());
1985 }
1986
1987 bool Matcher::is_vshift_con_pattern(Node* n, Node* m) {
1988 if (n != nullptr && m != nullptr) {
1989 return VectorNode::is_vector_shift(n) &&
1990 VectorNode::is_vector_shift_count(m) && m->in(1)->is_Con();
1991 }
1992 return false;
1993 }
1994
1995 bool Matcher::clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
1996 // Must clone all producers of flags, or we will not match correctly.
1997 // Suppose a compare setting int-flags is shared (e.g., a switch-tree)
1998 // then it will match into an ideal Op_RegFlags. Alas, the fp-flags
1999 // are also there, so we may match a float-branch to int-flags and
2000 // expect the allocator to haul the flags from the int-side to the
2001 // fp-side. No can do.
2002 if (_must_clone[m->Opcode()]) {
2003 mstack.push(m, Visit);
2004 return true;
2005 }
2006 return pd_clone_node(n, m, mstack);
2007 }
2008
2009 bool Matcher::clone_base_plus_offset_address(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2010 Node *off = m->in(AddPNode::Offset);
2011 if (off->is_Con()) {
2012 address_visited.test_set(m->_idx); // Flag as address_visited
2013 mstack.push(m->in(AddPNode::Address), Pre_Visit);
2014 // Clone X+offset as it also folds into most addressing expressions
2015 mstack.push(off, Visit);
2016 mstack.push(m->in(AddPNode::Base), Pre_Visit);
2017 return true;
2018 }
2019 return false;
2020 }
2021
2022 // A method-klass-holder may be passed in the inline_cache_reg
2023 // and then expanded into the inline_cache_reg and a method_ptr register
2024 // defined in ad_<arch>.cpp
2025
2026 //------------------------------find_shared------------------------------------
2027 // Set bits if Node is shared or otherwise a root
2028 void Matcher::find_shared(Node* n) {
2029 // Allocate stack of size C->live_nodes() * 2 to avoid frequent realloc
2030 MStack mstack(C->live_nodes() * 2);
2031 // Mark nodes as address_visited if they are inputs to an address expression
2032 VectorSet address_visited;
2033 mstack.push(n, Visit); // Don't need to pre-visit root node
2034 while (mstack.is_nonempty()) {
2035 n = mstack.node(); // Leave node on stack
2036 Node_State nstate = mstack.state();
2037 uint nop = n->Opcode();
2038 if (nstate == Pre_Visit) {
2039 if (address_visited.test(n->_idx)) { // Visited in address already?
2040 // Flag as visited and shared now.
2041 set_visited(n);
2042 }
2043 if (is_visited(n)) { // Visited already?
2044 // Node is shared and has no reason to clone. Flag it as shared.
2045 // This causes it to match into a register for the sharing.
2046 set_shared(n); // Flag as shared and
2047 if (n->is_DecodeNarrowPtr()) {
2048 // Oop field/array element loads must be shared but since
2049 // they are shared through a DecodeN they may appear to have
2050 // a single use so force sharing here.
2051 set_shared(n->in(1));
2052 }
2053 mstack.pop(); // remove node from stack
2054 continue;
2055 }
2056 nstate = Visit; // Not already visited; so visit now
2057 }
2058 if (nstate == Visit) {
2059 mstack.set_state(Post_Visit);
2060 set_visited(n); // Flag as visited now
2061 bool mem_op = false;
2062 int mem_addr_idx = MemNode::Address;
2063 if (find_shared_visit(mstack, n, nop, mem_op, mem_addr_idx)) {
2064 continue;
2065 }
2066 for (int i = n->req() - 1; i >= 0; --i) { // For my children
2067 Node* m = n->in(i); // Get ith input
2068 if (m == nullptr) {
2069 continue; // Ignore nulls
2070 }
2071 if (clone_node(n, m, mstack)) {
2072 continue;
2073 }
2074
2075 // Clone addressing expressions as they are "free" in memory access instructions
2076 if (mem_op && i == mem_addr_idx && m->is_AddP() &&
2077 // When there are other uses besides address expressions
2078 // put it on stack and mark as shared.
2079 !is_visited(m)) {
2080 // Some inputs for address expression are not put on stack
2081 // to avoid marking them as shared and forcing them into register
2082 // if they are used only in address expressions.
2083 // But they should be marked as shared if there are other uses
2084 // besides address expressions.
2085
2086 if (pd_clone_address_expressions(m->as_AddP(), mstack, address_visited)) {
2087 continue;
2088 }
2089 } // if( mem_op &&
2090 mstack.push(m, Pre_Visit);
2091 } // for(int i = ...)
2092 }
2093 else if (nstate == Alt_Post_Visit) {
2094 mstack.pop(); // Remove node from stack
2095 // We cannot remove the Cmp input from the Bool here, as the Bool may be
2096 // shared and all users of the Bool need to move the Cmp in parallel.
2097 // This leaves both the Bool and the If pointing at the Cmp. To
2098 // prevent the Matcher from trying to Match the Cmp along both paths
2099 // BoolNode::match_edge always returns a zero.
2100
2101 // We reorder the Op_If in a pre-order manner, so we can visit without
2102 // accidentally sharing the Cmp (the Bool and the If make 2 users).
2103 n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool
2104 }
2105 else if (nstate == Post_Visit) {
2106 mstack.pop(); // Remove node from stack
2107
2108 // Now hack a few special opcodes
2109 uint opcode = n->Opcode();
2110 bool gc_handled = BarrierSet::barrier_set()->barrier_set_c2()->matcher_find_shared_post_visit(this, n, opcode);
2111 if (!gc_handled) {
2112 find_shared_post_visit(n, opcode);
2113 }
2114 }
2115 else {
2116 ShouldNotReachHere();
2117 }
2118 } // end of while (mstack.is_nonempty())
2119 }
2120
2121 bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) {
2122 switch(opcode) { // Handle some opcodes special
2123 case Op_Phi: // Treat Phis as shared roots
2124 case Op_Parm:
2125 case Op_Proj: // All handled specially during matching
2126 case Op_SafePointScalarObject:
2127 set_shared(n);
2128 set_dontcare(n);
2129 break;
2130 case Op_If:
2131 case Op_CountedLoopEnd:
2132 mstack.set_state(Alt_Post_Visit); // Alternative way
2133 // Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)). Helps
2134 // with matching cmp/branch in 1 instruction. The Matcher needs the
2135 // Bool and CmpX side-by-side, because it can only get at constants
2136 // that are at the leaves of Match trees, and the Bool's condition acts
2137 // as a constant here.
2138 mstack.push(n->in(1), Visit); // Clone the Bool
2139 mstack.push(n->in(0), Pre_Visit); // Visit control input
2140 return true; // while (mstack.is_nonempty())
2141 case Op_ConvI2D: // These forms efficiently match with a prior
2142 case Op_ConvI2F: // Load but not a following Store
2143 if( n->in(1)->is_Load() && // Prior load
2144 n->outcnt() == 1 && // Not already shared
2145 n->unique_out()->is_Store() ) // Following store
2146 set_shared(n); // Force it to be a root
2147 break;
2148 case Op_ReverseBytesI:
2149 case Op_ReverseBytesL:
2150 if( n->in(1)->is_Load() && // Prior load
2151 n->outcnt() == 1 ) // Not already shared
2152 set_shared(n); // Force it to be a root
2153 break;
2154 case Op_BoxLock: // Can't match until we get stack-regs in ADLC
2155 case Op_IfFalse:
2156 case Op_IfTrue:
2157 case Op_MachProj:
2158 case Op_MergeMem:
2159 case Op_Catch:
2160 case Op_CatchProj:
2161 case Op_CProj:
2162 case Op_JumpProj:
2163 case Op_JProj:
2164 case Op_NeverBranch:
2165 set_dontcare(n);
2166 break;
2167 case Op_Jump:
2168 mstack.push(n->in(1), Pre_Visit); // Switch Value (could be shared)
2169 mstack.push(n->in(0), Pre_Visit); // Visit Control input
2170 return true; // while (mstack.is_nonempty())
2171 case Op_StrComp:
2172 case Op_StrEquals:
2173 case Op_StrIndexOf:
2174 case Op_StrIndexOfChar:
2175 case Op_AryEq:
2176 case Op_VectorizedHashCode:
2177 case Op_CountPositives:
2178 case Op_StrInflatedCopy:
2179 case Op_StrCompressedCopy:
2180 case Op_EncodeISOArray:
2181 case Op_FmaD:
2182 case Op_FmaF:
2183 case Op_FmaHF:
2184 case Op_FmaVD:
2185 case Op_FmaVF:
2186 case Op_FmaVHF:
2187 case Op_MacroLogicV:
2188 case Op_VectorCmpMasked:
2189 case Op_CompressV:
2190 case Op_CompressM:
2191 case Op_ExpandV:
2192 case Op_VectorLoadMask:
2193 set_shared(n); // Force result into register (it will be anyways)
2194 break;
2195 case Op_ConP: { // Convert pointers above the centerline to NUL
2196 TypeNode *tn = n->as_Type(); // Constants derive from type nodes
2197 const TypePtr* tp = tn->type()->is_ptr();
2198 if (tp->_ptr == TypePtr::AnyNull) {
2199 tn->set_type(TypePtr::NULL_PTR);
2200 }
2201 break;
2202 }
2203 case Op_ConN: { // Convert narrow pointers above the centerline to NUL
2204 TypeNode *tn = n->as_Type(); // Constants derive from type nodes
2205 const TypePtr* tp = tn->type()->make_ptr();
2206 if (tp && tp->_ptr == TypePtr::AnyNull) {
2207 tn->set_type(TypeNarrowOop::NULL_PTR);
2208 }
2209 break;
2210 }
2211 case Op_Binary: // These are introduced in the Post_Visit state.
2212 ShouldNotReachHere();
2213 break;
2214 case Op_ClearArray:
2215 case Op_SafePoint:
2216 mem_op = true;
2217 break;
2218 default:
2219 if( n->is_Store() ) {
2220 // Do match stores, despite no ideal reg
2221 mem_op = true;
2222 break;
2223 }
2224 if( n->is_Mem() ) { // Loads and LoadStores
2225 mem_op = true;
2226 // Loads must be root of match tree due to prior load conflict
2227 if( C->subsume_loads() == false )
2228 set_shared(n);
2229 }
2230 // Fall into default case
2231 if( !n->ideal_reg() )
2232 set_dontcare(n); // Unmatchable Nodes
2233 } // end_switch
2234 return false;
2235 }
2236
2237 void Matcher::find_shared_post_visit(Node* n, uint opcode) {
2238 if (n->is_predicated_vector()) {
2239 // Restructure into binary trees for Matching.
2240 if (n->req() == 4) {
2241 n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
2242 n->set_req(2, n->in(3));
2243 n->del_req(3);
2244 } else if (n->req() == 5) {
2245 n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
2246 n->set_req(2, new BinaryNode(n->in(3), n->in(4)));
2247 n->del_req(4);
2248 n->del_req(3);
2249 } else if (n->req() == 6) {
2250 Node* b3 = new BinaryNode(n->in(4), n->in(5));
2251 Node* b2 = new BinaryNode(n->in(3), b3);
2252 Node* b1 = new BinaryNode(n->in(2), b2);
2253 n->set_req(2, b1);
2254 n->del_req(5);
2255 n->del_req(4);
2256 n->del_req(3);
2257 }
2258 return;
2259 }
2260
2261 switch(opcode) { // Handle some opcodes special
2262 case Op_CompareAndExchangeB:
2263 case Op_CompareAndExchangeS:
2264 case Op_CompareAndExchangeI:
2265 case Op_CompareAndExchangeL:
2266 case Op_CompareAndExchangeP:
2267 case Op_CompareAndExchangeN:
2268 case Op_WeakCompareAndSwapB:
2269 case Op_WeakCompareAndSwapS:
2270 case Op_WeakCompareAndSwapI:
2271 case Op_WeakCompareAndSwapL:
2272 case Op_WeakCompareAndSwapP:
2273 case Op_WeakCompareAndSwapN:
2274 case Op_CompareAndSwapB:
2275 case Op_CompareAndSwapS:
2276 case Op_CompareAndSwapI:
2277 case Op_CompareAndSwapL:
2278 case Op_CompareAndSwapP:
2279 case Op_CompareAndSwapN: { // Convert trinary to binary-tree
2280 Node* newval = n->in(MemNode::ValueIn);
2281 Node* oldval = n->in(LoadStoreConditionalNode::ExpectedIn);
2282 Node* pair = new BinaryNode(oldval, newval);
2283 n->set_req(MemNode::ValueIn, pair);
2284 n->del_req(LoadStoreConditionalNode::ExpectedIn);
2285 break;
2286 }
2287 case Op_CMoveD: // Convert trinary to binary-tree
2288 case Op_CMoveF:
2289 case Op_CMoveI:
2290 case Op_CMoveL:
2291 case Op_CMoveN:
2292 case Op_CMoveP: {
2293 // Restructure into a binary tree for Matching. It's possible that
2294 // we could move this code up next to the graph reshaping for IfNodes
2295 // or vice-versa, but I do not want to debug this for Ladybird.
2296 // 10/2/2000 CNC.
2297 Node* pair1 = new BinaryNode(n->in(1), n->in(1)->in(1));
2298 n->set_req(1, pair1);
2299 Node* pair2 = new BinaryNode(n->in(2), n->in(3));
2300 n->set_req(2, pair2);
2301 n->del_req(3);
2302 break;
2303 }
2304 case Op_MacroLogicV: {
2305 Node* pair1 = new BinaryNode(n->in(1), n->in(2));
2306 Node* pair2 = new BinaryNode(n->in(3), n->in(4));
2307 n->set_req(1, pair1);
2308 n->set_req(2, pair2);
2309 n->del_req(4);
2310 n->del_req(3);
2311 break;
2312 }
2313 case Op_StoreVectorMasked: {
2314 Node* pair = new BinaryNode(n->in(3), n->in(4));
2315 n->set_req(3, pair);
2316 n->del_req(4);
2317 break;
2318 }
2319 case Op_SelectFromTwoVector:
2320 case Op_LoopLimit: {
2321 Node* pair1 = new BinaryNode(n->in(1), n->in(2));
2322 n->set_req(1, pair1);
2323 n->set_req(2, n->in(3));
2324 n->del_req(3);
2325 break;
2326 }
2327 case Op_StrEquals:
2328 case Op_StrIndexOfChar: {
2329 Node* pair1 = new BinaryNode(n->in(2), n->in(3));
2330 n->set_req(2, pair1);
2331 n->set_req(3, n->in(4));
2332 n->del_req(4);
2333 break;
2334 }
2335 case Op_StrComp:
2336 case Op_StrIndexOf:
2337 case Op_VectorizedHashCode: {
2338 Node* pair1 = new BinaryNode(n->in(2), n->in(3));
2339 n->set_req(2, pair1);
2340 Node* pair2 = new BinaryNode(n->in(4),n->in(5));
2341 n->set_req(3, pair2);
2342 n->del_req(5);
2343 n->del_req(4);
2344 break;
2345 }
2346 case Op_EncodeISOArray:
2347 case Op_StrCompressedCopy:
2348 case Op_StrInflatedCopy: {
2349 // Restructure into a binary tree for Matching.
2350 Node* pair = new BinaryNode(n->in(3), n->in(4));
2351 n->set_req(3, pair);
2352 n->del_req(4);
2353 break;
2354 }
2355 case Op_FmaD:
2356 case Op_FmaF:
2357 case Op_FmaHF:
2358 case Op_FmaVD:
2359 case Op_FmaVF:
2360 case Op_FmaVHF: {
2361 // Restructure into a binary tree for Matching.
2362 Node* pair = new BinaryNode(n->in(1), n->in(2));
2363 n->set_req(2, pair);
2364 n->set_req(1, n->in(3));
2365 n->del_req(3);
2366 break;
2367 }
2368 case Op_MulAddS2I: {
2369 Node* pair1 = new BinaryNode(n->in(1), n->in(2));
2370 Node* pair2 = new BinaryNode(n->in(3), n->in(4));
2371 n->set_req(1, pair1);
2372 n->set_req(2, pair2);
2373 n->del_req(4);
2374 n->del_req(3);
2375 break;
2376 }
2377 case Op_VectorCmpMasked:
2378 case Op_CopySignD:
2379 case Op_SignumVF:
2380 case Op_SignumVD:
2381 case Op_SignumF:
2382 case Op_SignumD: {
2383 Node* pair = new BinaryNode(n->in(2), n->in(3));
2384 n->set_req(2, pair);
2385 n->del_req(3);
2386 break;
2387 }
2388 case Op_VectorBlend:
2389 case Op_VectorInsert: {
2390 Node* pair = new BinaryNode(n->in(1), n->in(2));
2391 n->set_req(1, pair);
2392 n->set_req(2, n->in(3));
2393 n->del_req(3);
2394 break;
2395 }
2396 case Op_LoadVectorGatherMasked: // fall-through
2397 case Op_StoreVectorScatter: {
2398 Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1));
2399 n->set_req(MemNode::ValueIn, pair);
2400 n->del_req(MemNode::ValueIn+1);
2401 break;
2402 }
2403 case Op_StoreVectorScatterMasked: {
2404 Node* pair = new BinaryNode(n->in(MemNode::ValueIn+1), n->in(MemNode::ValueIn+2));
2405 n->set_req(MemNode::ValueIn+1, pair);
2406 n->del_req(MemNode::ValueIn+2);
2407 pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1));
2408 n->set_req(MemNode::ValueIn, pair);
2409 n->del_req(MemNode::ValueIn+1);
2410 break;
2411 }
2412 case Op_VectorMaskCmp: {
2413 n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
2414 n->set_req(2, n->in(3));
2415 n->del_req(3);
2416 break;
2417 }
2418 case Op_PartialSubtypeCheck: {
2419 if (UseSecondarySupersTable && n->in(2)->is_Con()) {
2420 // PartialSubtypeCheck uses both constant and register operands for superclass input.
2421 n->set_req(2, new BinaryNode(n->in(2), n->in(2)));
2422 break;
2423 }
2424 break;
2425 }
2426 default:
2427 break;
2428 }
2429 }
2430
2431 #ifndef PRODUCT
2432 void Matcher::record_new2old(Node* newn, Node* old) {
2433 _new2old_map.map(newn->_idx, old);
2434 if (!_reused.test_set(old->_igv_idx)) {
2435 // Reuse the Ideal-level IGV identifier so that the node can be tracked
2436 // across matching. If there are multiple machine nodes expanded from the
2437 // same Ideal node, only one will reuse its IGV identifier.
2438 newn->_igv_idx = old->_igv_idx;
2439 }
2440 }
2441
2442 // machine-independent root to machine-dependent root
2443 void Matcher::dump_old2new_map() {
2444 _old2new_map.dump();
2445 }
2446 #endif // !PRODUCT
2447
2448 //---------------------------collect_null_checks-------------------------------
2449 // Find null checks in the ideal graph; write a machine-specific node for
2450 // it. Used by later implicit-null-check handling. Actually collects
2451 // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
2452 // value being tested.
2453 void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
2454 Node *iff = proj->in(0);
2455 if( iff->Opcode() == Op_If ) {
2456 // During matching If's have Bool & Cmp side-by-side
2457 BoolNode *b = iff->in(1)->as_Bool();
2458 Node *cmp = iff->in(2);
2459 int opc = cmp->Opcode();
2460 if (opc != Op_CmpP && opc != Op_CmpN) return;
2461
2462 const Type* ct = cmp->in(2)->bottom_type();
2463 if (ct == TypePtr::NULL_PTR ||
2464 (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) {
2465
2466 bool push_it = false;
2467 if( proj->Opcode() == Op_IfTrue ) {
2468 #ifndef PRODUCT
2469 extern uint all_null_checks_found;
2470 all_null_checks_found++;
2471 #endif
2472 if( b->_test._test == BoolTest::ne ) {
2473 push_it = true;
2474 }
2475 } else {
2476 assert( proj->Opcode() == Op_IfFalse, "" );
2477 if( b->_test._test == BoolTest::eq ) {
2478 push_it = true;
2479 }
2480 }
2481 if( push_it ) {
2482 _null_check_tests.push(proj);
2483 Node* val = cmp->in(1);
2484 #ifdef _LP64
2485 if (val->bottom_type()->isa_narrowoop() &&
2486 !Matcher::narrow_oop_use_complex_address()) {
2487 //
2488 // Look for DecodeN node which should be pinned to orig_proj.
2489 // On platforms (Sparc) which can not handle 2 adds
2490 // in addressing mode we have to keep a DecodeN node and
2491 // use it to do implicit null check in address.
2492 //
2493 // DecodeN node was pinned to non-null path (orig_proj) during
2494 // CastPP transformation in final_graph_reshaping_impl().
2495 //
2496 uint cnt = orig_proj->outcnt();
2497 for (uint i = 0; i < orig_proj->outcnt(); i++) {
2498 Node* d = orig_proj->raw_out(i);
2499 if (d->is_DecodeN() && d->in(1) == val) {
2500 val = d;
2501 val->set_req(0, nullptr); // Unpin now.
2502 // Mark this as special case to distinguish from
2503 // a regular case: CmpP(DecodeN, null).
2504 val = (Node*)(((intptr_t)val) | 1);
2505 break;
2506 }
2507 }
2508 }
2509 #endif
2510 _null_check_tests.push(val);
2511 }
2512 }
2513 }
2514 }
2515
2516 //---------------------------validate_null_checks------------------------------
2517 // Its possible that the value being null checked is not the root of a match
2518 // tree. If so, I cannot use the value in an implicit null check.
2519 void Matcher::validate_null_checks( ) {
2520 uint cnt = _null_check_tests.size();
2521 for( uint i=0; i < cnt; i+=2 ) {
2522 Node *test = _null_check_tests[i];
2523 Node *val = _null_check_tests[i+1];
2524 bool is_decoden = ((intptr_t)val) & 1;
2525 val = (Node*)(((intptr_t)val) & ~1);
2526 if (has_new_node(val)) {
2527 Node* new_val = new_node(val);
2528 if (is_decoden) {
2529 assert(val->is_DecodeNarrowPtr() && val->in(0) == nullptr, "sanity");
2530 // Note: new_val may have a control edge if
2531 // the original ideal node DecodeN was matched before
2532 // it was unpinned in Matcher::collect_null_checks().
2533 // Unpin the mach node and mark it.
2534 new_val->set_req(0, nullptr);
2535 new_val = (Node*)(((intptr_t)new_val) | 1);
2536 }
2537 // Is a match-tree root, so replace with the matched value
2538 _null_check_tests.map(i+1, new_val);
2539 } else {
2540 // Yank from candidate list
2541 _null_check_tests.map(i+1,_null_check_tests[--cnt]);
2542 _null_check_tests.map(i,_null_check_tests[--cnt]);
2543 _null_check_tests.pop();
2544 _null_check_tests.pop();
2545 i-=2;
2546 }
2547 }
2548 }
2549
2550 bool Matcher::gen_narrow_oop_implicit_null_checks() {
2551 // Advice matcher to perform null checks on the narrow oop side.
2552 // Implicit checks are not possible on the uncompressed oop side anyway
2553 // (at least not for read accesses).
2554 // Performs significantly better (especially on Power 6).
2555 if (!os::zero_page_read_protected()) {
2556 return true;
2557 }
2558 return CompressedOops::use_implicit_null_checks() &&
2559 (narrow_oop_use_complex_address() ||
2560 CompressedOops::base() != nullptr);
2561 }
2562
2563 // Compute RegMask for an ideal register.
2564 const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) {
2565 assert(!C->failing_internal() || C->failure_is_artificial(), "already failing.");
2566 if (C->failing()) {
2567 return nullptr;
2568 }
2569 const Type* t = Type::mreg2type[ideal_reg];
2570 if (t == nullptr) {
2571 assert(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg);
2572 return nullptr; // not supported
2573 }
2574 Node* fp = ret->in(TypeFunc::FramePtr);
2575 Node* mem = ret->in(TypeFunc::Memory);
2576 const TypePtr* atp = TypePtr::BOTTOM;
2577 MemNode::MemOrd mo = MemNode::unordered;
2578
2579 Node* spill;
2580 switch (ideal_reg) {
2581 case Op_RegN: spill = new LoadNNode(nullptr, mem, fp, atp, t->is_narrowoop(), mo); break;
2582 case Op_RegI: spill = new LoadINode(nullptr, mem, fp, atp, t->is_int(), mo); break;
2583 case Op_RegP: spill = new LoadPNode(nullptr, mem, fp, atp, t->is_ptr(), mo); break;
2584 case Op_RegF: spill = new LoadFNode(nullptr, mem, fp, atp, t, mo); break;
2585 case Op_RegD: spill = new LoadDNode(nullptr, mem, fp, atp, t, mo); break;
2586 case Op_RegL: spill = new LoadLNode(nullptr, mem, fp, atp, t->is_long(), mo); break;
2587
2588 case Op_VecA: // fall-through
2589 case Op_VecS: // fall-through
2590 case Op_VecD: // fall-through
2591 case Op_VecX: // fall-through
2592 case Op_VecY: // fall-through
2593 case Op_VecZ: spill = new LoadVectorNode(nullptr, mem, fp, atp, t->is_vect()); break;
2594 case Op_RegVectMask: return Matcher::predicate_reg_mask();
2595
2596 default: ShouldNotReachHere();
2597 }
2598 MachNode* mspill = match_tree(spill);
2599 assert(mspill != nullptr || C->failure_is_artificial(), "matching failed: %d", ideal_reg);
2600 if (C->failing()) {
2601 return nullptr;
2602 }
2603 // Handle generic vector operand case
2604 if (Matcher::supports_generic_vector_operands && t->isa_vect()) {
2605 specialize_mach_node(mspill);
2606 }
2607 return &mspill->out_RegMask();
2608 }
2609
2610 // Process Mach IR right after selection phase is over.
2611 void Matcher::do_postselect_cleanup() {
2612 if (supports_generic_vector_operands) {
2613 specialize_generic_vector_operands();
2614 if (C->failing()) return;
2615 }
2616 }
2617
2618 //----------------------------------------------------------------------
2619 // Generic machine operands elision.
2620 //----------------------------------------------------------------------
2621
2622 // Compute concrete vector operand for a generic TEMP vector mach node based on its user info.
2623 void Matcher::specialize_temp_node(MachTempNode* tmp, MachNode* use, uint idx) {
2624 assert(use->in(idx) == tmp, "not a user");
2625 assert(!Matcher::is_generic_vector(use->_opnds[0]), "use not processed yet");
2626
2627 if ((uint)idx == use->two_adr()) { // DEF_TEMP case
2628 tmp->_opnds[0] = use->_opnds[0]->clone();
2629 } else {
2630 uint ideal_vreg = vector_ideal_reg(C->max_vector_size());
2631 tmp->_opnds[0] = Matcher::pd_specialize_generic_vector_operand(tmp->_opnds[0], ideal_vreg, true /*is_temp*/);
2632 }
2633 }
2634
2635 // Compute concrete vector operand for a generic DEF/USE vector operand (of mach node m at index idx).
2636 MachOper* Matcher::specialize_vector_operand(MachNode* m, uint opnd_idx) {
2637 assert(Matcher::is_generic_vector(m->_opnds[opnd_idx]), "repeated updates");
2638 Node* def = nullptr;
2639 if (opnd_idx == 0) { // DEF
2640 def = m; // use mach node itself to compute vector operand type
2641 } else {
2642 int base_idx = m->operand_index(opnd_idx);
2643 def = m->in(base_idx);
2644 if (def->is_Mach()) {
2645 if (def->is_MachTemp() && Matcher::is_generic_vector(def->as_Mach()->_opnds[0])) {
2646 specialize_temp_node(def->as_MachTemp(), m, base_idx); // MachTemp node use site
2647 } else if (is_reg2reg_move(def->as_Mach())) {
2648 def = def->in(1); // skip over generic reg-to-reg moves
2649 }
2650 }
2651 }
2652 assert(def->bottom_type()->isa_vect(), "not a vector");
2653 uint ideal_vreg = def->bottom_type()->ideal_reg();
2654 return Matcher::pd_specialize_generic_vector_operand(m->_opnds[opnd_idx], ideal_vreg, false /*is_temp*/);
2655 }
2656
2657 void Matcher::specialize_mach_node(MachNode* m) {
2658 assert(!m->is_MachTemp(), "processed along with its user");
2659 // For generic use operands pull specific register class operands from
2660 // its def instruction's output operand (def operand).
2661 for (uint i = 0; i < m->num_opnds(); i++) {
2662 if (Matcher::is_generic_vector(m->_opnds[i])) {
2663 m->_opnds[i] = specialize_vector_operand(m, i);
2664 }
2665 }
2666 }
2667
2668 // Replace generic vector operands with concrete vector operands and eliminate generic reg-to-reg moves from the graph.
2669 void Matcher::specialize_generic_vector_operands() {
2670 assert(supports_generic_vector_operands, "sanity");
2671 ResourceMark rm;
2672
2673 // Replace generic vector operands (vec/legVec) with concrete ones (vec[SDXYZ]/legVec[SDXYZ])
2674 // and remove reg-to-reg vector moves (MoveVec2Leg and MoveLeg2Vec).
2675 Unique_Node_List live_nodes;
2676 C->identify_useful_nodes(live_nodes);
2677
2678 while (live_nodes.size() > 0) {
2679 MachNode* m = live_nodes.pop()->isa_Mach();
2680 if (m != nullptr) {
2681 if (Matcher::is_reg2reg_move(m)) {
2682 // Register allocator properly handles vec <=> leg moves using register masks.
2683 int opnd_idx = m->operand_index(1);
2684 Node* def = m->in(opnd_idx);
2685 m->subsume_by(def, C);
2686 } else if (m->is_MachTemp()) {
2687 // process MachTemp nodes at use site (see Matcher::specialize_vector_operand)
2688 } else {
2689 specialize_mach_node(m);
2690 }
2691 }
2692 }
2693 }
2694
2695 uint Matcher::vector_length(const Node* n) {
2696 const TypeVect* vt = n->bottom_type()->is_vect();
2697 return vt->length();
2698 }
2699
2700 uint Matcher::vector_length(const MachNode* use, const MachOper* opnd) {
2701 int def_idx = use->operand_index(opnd);
2702 Node* def = use->in(def_idx);
2703 return def->bottom_type()->is_vect()->length();
2704 }
2705
2706 uint Matcher::vector_length_in_bytes(const Node* n) {
2707 const TypeVect* vt = n->bottom_type()->is_vect();
2708 return vt->length_in_bytes();
2709 }
2710
2711 uint Matcher::vector_length_in_bytes(const MachNode* use, const MachOper* opnd) {
2712 uint def_idx = use->operand_index(opnd);
2713 Node* def = use->in(def_idx);
2714 return def->bottom_type()->is_vect()->length_in_bytes();
2715 }
2716
2717 BasicType Matcher::vector_element_basic_type(const Node* n) {
2718 const TypeVect* vt = n->bottom_type()->is_vect();
2719 return vt->element_basic_type();
2720 }
2721
2722 BasicType Matcher::vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
2723 int def_idx = use->operand_index(opnd);
2724 Node* def = use->in(def_idx);
2725 return def->bottom_type()->is_vect()->element_basic_type();
2726 }
2727
2728 bool Matcher::is_non_long_integral_vector(const Node* n) {
2729 BasicType bt = vector_element_basic_type(n);
2730 assert(bt != T_CHAR, "char is not allowed in vector");
2731 return is_subword_type(bt) || bt == T_INT;
2732 }
2733
2734 bool Matcher::is_encode_and_store_pattern(const Node* n, const Node* m) {
2735 if (n == nullptr ||
2736 m == nullptr ||
2737 n->Opcode() != Op_StoreN ||
2738 !m->is_EncodeP() ||
2739 n->as_Store()->barrier_data() == 0) {
2740 return false;
2741 }
2742 assert(m == n->in(MemNode::ValueIn), "m should be input to n");
2743 return true;
2744 }
2745
2746 #ifdef ASSERT
2747 bool Matcher::verify_after_postselect_cleanup() {
2748 assert(!C->failing_internal() || C->failure_is_artificial(), "sanity");
2749 if (supports_generic_vector_operands) {
2750 Unique_Node_List useful;
2751 C->identify_useful_nodes(useful);
2752 for (uint i = 0; i < useful.size(); i++) {
2753 MachNode* m = useful.at(i)->isa_Mach();
2754 if (m != nullptr) {
2755 assert(!Matcher::is_reg2reg_move(m), "no MoveVec nodes allowed");
2756 for (uint j = 0; j < m->num_opnds(); j++) {
2757 assert(!Matcher::is_generic_vector(m->_opnds[j]), "no generic vector operands allowed");
2758 }
2759 }
2760 }
2761 }
2762 return true;
2763 }
2764 #endif // ASSERT
2765
2766 // Used by the DFA in dfa_xxx.cpp. Check for a following barrier or
2767 // atomic instruction acting as a store_load barrier without any
2768 // intervening volatile load, and thus we don't need a barrier here.
2769 // We retain the Node to act as a compiler ordering barrier.
2770 bool Matcher::post_store_load_barrier(const Node* vmb) {
2771 Compile* C = Compile::current();
2772 assert(vmb->is_MemBar(), "");
2773 assert(vmb->Opcode() != Op_MemBarAcquire && vmb->Opcode() != Op_LoadFence, "");
2774 const MemBarNode* membar = vmb->as_MemBar();
2775
2776 // Get the Ideal Proj node, ctrl, that can be used to iterate forward
2777 Node* ctrl = nullptr;
2778 for (DUIterator_Fast imax, i = membar->fast_outs(imax); i < imax; i++) {
2779 Node* p = membar->fast_out(i);
2780 assert(p->is_Proj(), "only projections here");
2781 if ((p->as_Proj()->_con == TypeFunc::Control) &&
2782 !C->node_arena()->contains(p)) { // Unmatched old-space only
2783 ctrl = p;
2784 break;
2785 }
2786 }
2787 assert((ctrl != nullptr), "missing control projection");
2788
2789 for (DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++) {
2790 Node *x = ctrl->fast_out(j);
2791 int xop = x->Opcode();
2792
2793 // We don't need current barrier if we see another or a lock
2794 // before seeing volatile load.
2795 //
2796 // Op_Fastunlock previously appeared in the Op_* list below.
2797 // With the advent of 1-0 lock operations we're no longer guaranteed
2798 // that a monitor exit operation contains a serializing instruction.
2799
2800 if (xop == Op_MemBarVolatile ||
2801 xop == Op_CompareAndExchangeB ||
2802 xop == Op_CompareAndExchangeS ||
2803 xop == Op_CompareAndExchangeI ||
2804 xop == Op_CompareAndExchangeL ||
2805 xop == Op_CompareAndExchangeP ||
2806 xop == Op_CompareAndExchangeN ||
2807 xop == Op_WeakCompareAndSwapB ||
2808 xop == Op_WeakCompareAndSwapS ||
2809 xop == Op_WeakCompareAndSwapL ||
2810 xop == Op_WeakCompareAndSwapP ||
2811 xop == Op_WeakCompareAndSwapN ||
2812 xop == Op_WeakCompareAndSwapI ||
2813 xop == Op_CompareAndSwapB ||
2814 xop == Op_CompareAndSwapS ||
2815 xop == Op_CompareAndSwapL ||
2816 xop == Op_CompareAndSwapP ||
2817 xop == Op_CompareAndSwapN ||
2818 xop == Op_CompareAndSwapI ||
2819 BarrierSet::barrier_set()->barrier_set_c2()->matcher_is_store_load_barrier(x, xop)) {
2820 return true;
2821 }
2822
2823 // Op_FastLock previously appeared in the Op_* list above.
2824 if (xop == Op_FastLock) {
2825 return true;
2826 }
2827
2828 if (x->is_MemBar()) {
2829 // We must retain this membar if there is an upcoming volatile
2830 // load, which will be followed by acquire membar.
2831 if (xop == Op_MemBarAcquire || xop == Op_LoadFence) {
2832 return false;
2833 } else {
2834 // For other kinds of barriers, check by pretending we
2835 // are them, and seeing if we can be removed.
2836 return post_store_load_barrier(x->as_MemBar());
2837 }
2838 }
2839
2840 // probably not necessary to check for these
2841 if (x->is_Call() || x->is_SafePoint() || x->is_block_proj()) {
2842 return false;
2843 }
2844 }
2845 return false;
2846 }
2847
2848 // Check whether node n is a branch to an uncommon trap that we could
2849 // optimize as test with very high branch costs in case of going to
2850 // the uncommon trap. The code must be able to be recompiled to use
2851 // a cheaper test.
2852 bool Matcher::branches_to_uncommon_trap(const Node *n) {
2853 // Don't do it for natives, adapters, or runtime stubs
2854 Compile *C = Compile::current();
2855 if (!C->is_method_compilation()) return false;
2856
2857 assert(n->is_If(), "You should only call this on if nodes.");
2858 IfNode *ifn = n->as_If();
2859
2860 Node *ifFalse = nullptr;
2861 for (DUIterator_Fast imax, i = ifn->fast_outs(imax); i < imax; i++) {
2862 if (ifn->fast_out(i)->is_IfFalse()) {
2863 ifFalse = ifn->fast_out(i);
2864 break;
2865 }
2866 }
2867 assert(ifFalse, "An If should have an ifFalse. Graph is broken.");
2868
2869 Node *reg = ifFalse;
2870 int cnt = 4; // We must protect against cycles. Limit to 4 iterations.
2871 // Alternatively use visited set? Seems too expensive.
2872 while (reg != nullptr && cnt > 0) {
2873 CallNode *call = nullptr;
2874 RegionNode *nxt_reg = nullptr;
2875 for (DUIterator_Fast imax, i = reg->fast_outs(imax); i < imax; i++) {
2876 Node *o = reg->fast_out(i);
2877 if (o->is_Call()) {
2878 call = o->as_Call();
2879 }
2880 if (o->is_Region()) {
2881 nxt_reg = o->as_Region();
2882 }
2883 }
2884
2885 if (call &&
2886 call->entry_point() == OptoRuntime::uncommon_trap_blob()->entry_point()) {
2887 const Type* trtype = call->in(TypeFunc::Parms)->bottom_type();
2888 if (trtype->isa_int() && trtype->is_int()->is_con()) {
2889 jint tr_con = trtype->is_int()->get_con();
2890 Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
2891 Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
2892 assert((int)reason < (int)BitsPerInt, "recode bit map");
2893
2894 if (is_set_nth_bit(C->allowed_deopt_reasons(), (int)reason)
2895 && action != Deoptimization::Action_none) {
2896 // This uncommon trap is sure to recompile, eventually.
2897 // When that happens, C->too_many_traps will prevent
2898 // this transformation from happening again.
2899 return true;
2900 }
2901 }
2902 }
2903
2904 reg = nxt_reg;
2905 cnt--;
2906 }
2907
2908 return false;
2909 }
2910
2911 //=============================================================================
2912 //---------------------------State---------------------------------------------
2913 State::State(void) : _rule() {
2914 #ifdef ASSERT
2915 _id = 0;
2916 _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
2917 _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
2918 #endif
2919 }
2920
2921 #ifdef ASSERT
2922 State::~State() {
2923 _id = 99;
2924 _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
2925 _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
2926 memset(_cost, -3, sizeof(_cost));
2927 memset(_rule, -3, sizeof(_rule));
2928 }
2929 #endif
2930
2931 #ifndef PRODUCT
2932 //---------------------------dump----------------------------------------------
2933 void State::dump() {
2934 tty->print("\n");
2935 dump(0);
2936 }
2937
2938 void State::dump(int depth) {
2939 for (int j = 0; j < depth; j++) {
2940 tty->print(" ");
2941 }
2942 tty->print("--N: ");
2943 _leaf->dump();
2944 uint i;
2945 for (i = 0; i < _LAST_MACH_OPER; i++) {
2946 // Check for valid entry
2947 if (valid(i)) {
2948 for (int j = 0; j < depth; j++) {
2949 tty->print(" ");
2950 }
2951 assert(cost(i) != max_juint, "cost must be a valid value");
2952 assert(rule(i) < _last_Mach_Node, "rule[i] must be valid rule");
2953 tty->print_cr("%s %d %s",
2954 ruleName[i], cost(i), ruleName[rule(i)] );
2955 }
2956 }
2957 tty->cr();
2958
2959 for (i = 0; i < 2; i++) {
2960 if (_kids[i]) {
2961 _kids[i]->dump(depth + 1);
2962 }
2963 }
2964 }
2965 #endif