Cdiff src/hotspot/share/opto/matcher.cpp

src/hotspot/share/opto/matcher.cpp

*** 164,10 ***
--- 164,55 ---
      }
    }
  }
  #endif
  
+ // Array of RegMask, one per returned values (inline type instances can
+ // be returned as multiple return values, one per field)
+ RegMask* Matcher::return_values_mask(const TypeFunc* tf) {
+   const TypeTuple* range = tf->range_cc();
+   uint cnt = range->cnt() - TypeFunc::Parms;
+   if (cnt == 0) {
+     return nullptr;
+   }
+   RegMask* mask = NEW_RESOURCE_ARRAY(RegMask, cnt);
+   BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, cnt);
+   VMRegPair* vm_parm_regs = NEW_RESOURCE_ARRAY(VMRegPair, cnt);
+   for (uint i = 0; i < cnt; i++) {
+     sig_bt[i] = range->field_at(i+TypeFunc::Parms)->basic_type();
+     new (mask + i) RegMask();
+   }
+ 
+   int regs = SharedRuntime::java_return_convention(sig_bt, vm_parm_regs, cnt);
+   if (regs <= 0) {
+     // We ran out of registers to store the null marker for a nullable inline type return.
+     // Since it is only set in the 'call_epilog', we can simply put it on the stack.
+     assert(tf->returns_inline_type_as_fields(), "should have been tested during graph construction");
+     // TODO 8284443 Can we teach the register allocator to reserve a stack slot instead?
+     // mask[--cnt] = STACK_ONLY_mask does not work (test with -XX:+StressGCM)
+     int slot = C->fixed_slots() - 2;
+     if (C->needs_stack_repair()) {
+       slot -= 2; // Account for stack increment value
+     }
+     mask[--cnt].clear();
+     mask[cnt].insert(OptoReg::stack2reg(slot));
+   }
+   for (uint i = 0; i < cnt; i++) {
+     mask[i].clear();
+ 
+     OptoReg::Name reg1 = OptoReg::as_OptoReg(vm_parm_regs[i].first());
+     if (OptoReg::is_valid(reg1)) {
+       mask[i].insert(reg1);
+     }
+     OptoReg::Name reg2 = OptoReg::as_OptoReg(vm_parm_regs[i].second());
+     if (OptoReg::is_valid(reg2)) {
+       mask[i].insert(reg2);
+     }
+   }
+ 
+   return mask;
+ }
  
  //---------------------------match---------------------------------------------
  void Matcher::match( ) {
    if( MaxLabelRootDepth < 100 ) { // Too small?
      assert(false, "invalid MaxLabelRootDepth, increase it to 100 minimum");

*** 184,33 ***
  #ifdef _LP64
    // Pointers take 2 slots in 64-bit land
    _return_addr_mask.insert(OptoReg::add(return_addr(), 1));
  #endif
  
!   // Map a Java-signature return type into return register-value
!   // machine registers for 0, 1 and 2 returned values.
!   const TypeTuple *range = C->tf()->range();
-   if( range->cnt() > TypeFunc::Parms ) { // If not a void function
-     // Get ideal-register return type
-     uint ireg = range->field_at(TypeFunc::Parms)->ideal_reg();
-     // Get machine return register
-     uint sop = C->start()->Opcode();
-     OptoRegPair regs = return_value(ireg);
- 
-     // And mask for same
-     _return_value_mask.assignFrom(RegMask(regs.first()));
-     if( OptoReg::is_valid(regs.second()) )
-       _return_value_mask.insert(regs.second());
-   }
  
    // ---------------
    // Frame Layout
  
    // Need the method signature to determine the incoming argument types,
    // because the types determine which registers the incoming arguments are
    // in, and this affects the matched code.
!   const TypeTuple *domain = C->tf()->domain();
    uint             argcnt = domain->cnt() - TypeFunc::Parms;
    BasicType *sig_bt        = NEW_RESOURCE_ARRAY( BasicType, argcnt );
    VMRegPair *vm_parm_regs  = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
    _parm_regs               = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt );
    _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt );
--- 229,21 ---
  #ifdef _LP64
    // Pointers take 2 slots in 64-bit land
    _return_addr_mask.insert(OptoReg::add(return_addr(), 1));
  #endif
  
!   // Map Java-signature return types into return register-value
!   // machine registers.
!   _return_values_mask = return_values_mask(C->tf());
  
    // ---------------
    // Frame Layout
  
    // Need the method signature to determine the incoming argument types,
    // because the types determine which registers the incoming arguments are
    // in, and this affects the matched code.
!   const TypeTuple *domain = C->tf()->domain_cc();
    uint             argcnt = domain->cnt() - TypeFunc::Parms;
    BasicType *sig_bt        = NEW_RESOURCE_ARRAY( BasicType, argcnt );
    VMRegPair *vm_parm_regs  = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
    _parm_regs               = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt );
    _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt );

*** 477,10 ***
--- 510,11 ---
    // Add in the incoming argument area
    OptoReg::Name init_in = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
    for (OptoReg::Name i = init_in; i < _in_arg_limit; i = OptoReg::add(i, 1)) {
      C->FIRST_STACK_mask().insert(i);
    }
+ 
    // Add in all bits past the outgoing argument area
    C->FIRST_STACK_mask().set_all_from(_out_arg_limit);
  
    // Make spill masks.  Registers for their class, plus FIRST_STACK_mask.
    RegMask aligned_stack_mask(C->FIRST_STACK_mask(), C->comp_arena());

*** 697,16 ***
    assert( start, "Expect a start node" );
  
    // Input RegMask array shared by all Returns.
    // The type for doubles and longs has a count of 2, but
    // there is only 1 returned value
!   uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1);
    RegMask *ret_rms  = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
!   // Returns have 0 or 1 returned values depending on call signature.
!   // Return register is specified by return_value in the AD file.
-   if (ret_edge_cnt > TypeFunc::Parms) {
-     ret_rms[TypeFunc::Parms + 0].assignFrom(_return_value_mask);
    }
  
    // Input RegMask array shared by all ForwardExceptions
    uint forw_exc_edge_cnt = TypeFunc::Parms;
    RegMask* forw_exc_rms  = init_input_masks( forw_exc_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
--- 731,14 ---
    assert( start, "Expect a start node" );
  
    // Input RegMask array shared by all Returns.
    // The type for doubles and longs has a count of 2, but
    // there is only 1 returned value
!   uint ret_edge_cnt = C->tf()->range_cc()->cnt();
    RegMask *ret_rms  = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
!   for (i = TypeFunc::Parms; i < ret_edge_cnt; i++) {
!     ret_rms[i].assignFrom(_return_values_mask[i-TypeFunc::Parms]);
    }
  
    // Input RegMask array shared by all ForwardExceptions
    uint forw_exc_edge_cnt = TypeFunc::Parms;
    RegMask* forw_exc_rms  = init_input_masks( forw_exc_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );

*** 775,11 ***
        default          : ShouldNotReachHere();
      }
    }
  
    // Next unused projection number from Start.
!   int proj_cnt = C->tf()->domain()->cnt();
  
    // Do all the save-on-entry registers.  Make projections from Start for
    // them, and give them a use at the exit points.  To the allocator, they
    // look like incoming register arguments.
    for( i = 0; i < _last_Mach_Reg; i++ ) {
--- 807,11 ---
        default          : ShouldNotReachHere();
      }
    }
  
    // Next unused projection number from Start.
!   int proj_cnt = C->tf()->domain_cc()->cnt();
  
    // Do all the save-on-entry registers.  Make projections from Start for
    // them, and give them a use at the exit points.  To the allocator, they
    // look like incoming register arguments.
    for( i = 0; i < _last_Mach_Reg; i++ ) {

*** 1055,11 ***
                  }
                  assert(m == nullptr || m->is_MachProj(), "no mem projection yet or a MachProj created during matching");
                }
                if (m == nullptr) {
                  // Convert to machine-dependent projection
!                 m = n->in(0)->as_Multi()->match( n->as_Proj(), this );
                  NOT_PRODUCT(record_new2old(m, n);)
                }
                if (m->in(0) != nullptr) // m might be top
                  collect_null_checks(m, n);
              } else {                // Else just a regular 'ol guy
--- 1087,15 ---
                  }
                  assert(m == nullptr || m->is_MachProj(), "no mem projection yet or a MachProj created during matching");
                }
                if (m == nullptr) {
                  // Convert to machine-dependent projection
!                 RegMask* mask = nullptr;
+                 if (n->in(0)->is_Call() && n->in(0)->as_Call()->tf()->returns_inline_type_as_fields()) {
+                   mask = return_values_mask(n->in(0)->as_Call()->tf());
+                 }
+                 m = n->in(0)->as_Multi()->match(n->as_Proj(), this, mask);
                  NOT_PRODUCT(record_new2old(m, n);)
                }
                if (m->in(0) != nullptr) // m might be top
                  collect_null_checks(m, n);
              } else {                // Else just a regular 'ol guy

*** 1191,11 ***
    CallNode *call;
    const TypeTuple *domain;
    ciMethod*        method = nullptr;
    if( sfpt->is_Call() ) {
      call = sfpt->as_Call();
!     domain = call->tf()->domain();
      cnt = domain->cnt();
  
      // Match just the call, nothing else
      MachNode *m = match_tree(call);
      if (C->failing())  return nullptr;
--- 1227,11 ---
    CallNode *call;
    const TypeTuple *domain;
    ciMethod*        method = nullptr;
    if( sfpt->is_Call() ) {
      call = sfpt->as_Call();
!     domain = call->tf()->domain_cc();
      cnt = domain->cnt();
  
      // Match just the call, nothing else
      MachNode *m = match_tree(call);
      if (C->failing())  return nullptr;

*** 1267,17 ***
    if( call != nullptr && call->is_CallRuntime() )
      out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed());
  
  
    // Do the normal argument list (parameters) register masks
!   int argcnt = cnt - TypeFunc::Parms;
    if( argcnt > 0 ) {          // Skip it all if we have no args
      BasicType *sig_bt  = NEW_RESOURCE_ARRAY( BasicType, argcnt );
      VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
      int i;
      for( i = 0; i < argcnt; i++ ) {
!       sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
      }
      // V-call to pick proper calling convention
      call->calling_convention( sig_bt, parm_regs, argcnt );
  
  #ifdef ASSERT
--- 1303,20 ---
    if( call != nullptr && call->is_CallRuntime() )
      out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed());
  
  
    // Do the normal argument list (parameters) register masks
!   // Null entry point is a special cast where the target of the call
+   // is in a register.
+   int adj = (call != nullptr && call->entry_point() == nullptr) ? 1 : 0;
+   int argcnt = cnt - TypeFunc::Parms - adj;
    if( argcnt > 0 ) {          // Skip it all if we have no args
      BasicType *sig_bt  = NEW_RESOURCE_ARRAY( BasicType, argcnt );
      VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
      int i;
      for( i = 0; i < argcnt; i++ ) {
!       sig_bt[i] = domain->field_at(i+TypeFunc::Parms+adj)->basic_type();
      }
      // V-call to pick proper calling convention
      call->calling_convention( sig_bt, parm_regs, argcnt );
  
  #ifdef ASSERT

*** 1314,11 ***
      // Return results now can have 2 bits returned.
      // Compute max over all outgoing arguments both per call-site
      // and over the entire method.
      for( i = 0; i < argcnt; i++ ) {
        // Address of incoming argument mask to fill in
!       RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms];
        VMReg first = parm_regs[i].first();
        VMReg second = parm_regs[i].second();
        if(!first->is_valid() &&
           !second->is_valid()) {
          continue;               // Avoid Halves
--- 1353,11 ---
      // Return results now can have 2 bits returned.
      // Compute max over all outgoing arguments both per call-site
      // and over the entire method.
      for( i = 0; i < argcnt; i++ ) {
        // Address of incoming argument mask to fill in
!       RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms+adj];
        VMReg first = parm_regs[i].first();
        VMReg second = parm_regs[i].second();
        if(!first->is_valid() &&
           !second->is_valid()) {
          continue;               // Avoid Halves

*** 1332,16 ***
            rm->insert(r);
          }
        }
        // Grab first register, adjust stack slots and insert in mask.
        OptoReg::Name reg1 = warp_outgoing_stk_arg(first, begin_out_arg_area, out_arg_limit_per_call );
!       if (OptoReg::is_valid(reg1))
!         rm->insert(reg1);
        // Grab second register (if any), adjust stack slots and insert in mask.
        OptoReg::Name reg2 = warp_outgoing_stk_arg(second, begin_out_arg_area, out_arg_limit_per_call );
!       if (OptoReg::is_valid(reg2))
!         rm->insert(reg2);
      } // End of for all arguments
    }
  
    // Compute the max stack slot killed by any call.  These will not be
    // available for debug info, and will be used to adjust FIRST_STACK_mask
--- 1371,18 ---
            rm->insert(r);
          }
        }
        // Grab first register, adjust stack slots and insert in mask.
        OptoReg::Name reg1 = warp_outgoing_stk_arg(first, begin_out_arg_area, out_arg_limit_per_call );
!       if (OptoReg::is_valid(reg1)) {
!         rm->insert( reg1 );
+       }
        // Grab second register (if any), adjust stack slots and insert in mask.
        OptoReg::Name reg2 = warp_outgoing_stk_arg(second, begin_out_arg_area, out_arg_limit_per_call );
!       if (OptoReg::is_valid(reg2)) {
!         rm->insert( reg2 );
+       }
      } // End of for all arguments
    }
  
    // Compute the max stack slot killed by any call.  These will not be
    // available for debug info, and will be used to adjust FIRST_STACK_mask

*** 1353,12 ***
      // Kill the outgoing argument area, including any non-argument holes and
      // any legacy C-killed slots.  Use Fat-Projections to do the killing.
      // Since the max-per-method covers the max-per-call-site and debug info
      // is excluded on the max-per-method basis, debug info cannot land in
      // this killed area.
!     uint r_cnt = mcall->tf()->range()->cnt();
!     MachProjNode* proj = new MachProjNode(mcall, r_cnt + 10000, RegMask::EMPTY, MachProjNode::fat_proj);
      for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) {
        proj->_rout.insert(OptoReg::Name(i));
      }
      if (!proj->_rout.is_empty()) {
        push_projection(proj);
--- 1394,12 ---
      // Kill the outgoing argument area, including any non-argument holes and
      // any legacy C-killed slots.  Use Fat-Projections to do the killing.
      // Since the max-per-method covers the max-per-call-site and debug info
      // is excluded on the max-per-method basis, debug info cannot land in
      // this killed area.
!     uint r_cnt = mcall->tf()->range_sig()->cnt();
!     MachProjNode *proj = new MachProjNode( mcall, r_cnt+10000, RegMask::EMPTY, MachProjNode::fat_proj );
      for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) {
        proj->_rout.insert(OptoReg::Name(i));
      }
      if (!proj->_rout.is_empty()) {
        push_projection(proj);

*** 1371,11 ***
      jvms->set_map(sfpt);
    }
  
    // Debug inputs begin just after the last incoming parameter
    assert((mcall == nullptr) || (mcall->jvms() == nullptr) ||
!          (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "");
  
    // Add additional edges.
    if (msfpt->mach_constant_base_node_input() != (uint)-1 && !msfpt->is_MachCallLeaf()) {
      // For these calls we can not add MachConstantBase in expand(), as the
      // ins are not complete then.
--- 1412,11 ---
      jvms->set_map(sfpt);
    }
  
    // Debug inputs begin just after the last incoming parameter
    assert((mcall == nullptr) || (mcall->jvms() == nullptr) ||
!          (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain_cc()->cnt()), "");
  
    // Add additional edges.
    if (msfpt->mach_constant_base_node_input() != (uint)-1 && !msfpt->is_MachCallLeaf()) {
      // For these calls we can not add MachConstantBase in expand(), as the
      // ins are not complete then.

*** 2060,11 ***
        bool mem_op = false;
        int mem_addr_idx = MemNode::Address;
        if (find_shared_visit(mstack, n, nop, mem_op, mem_addr_idx)) {
          continue;
        }
!       for (int i = n->req() - 1; i >= 0; --i) { // For my children
          Node* m = n->in(i); // Get ith input
          if (m == nullptr) {
            continue;  // Ignore nulls
          }
          if (clone_node(n, m, mstack)) {
--- 2101,11 ---
        bool mem_op = false;
        int mem_addr_idx = MemNode::Address;
        if (find_shared_visit(mstack, n, nop, mem_op, mem_addr_idx)) {
          continue;
        }
!       for (int i = n->len() - 1; i >= 0; --i) { // For my children
          Node* m = n->in(i); // Get ith input
          if (m == nullptr) {
            continue;  // Ignore nulls
          }
          if (clone_node(n, m, mstack)) {

*** 2371,10 ***
--- 2412,17 ---
        n->set_req(2, pair2);
        n->del_req(4);
        n->del_req(3);
        break;
      }
+     case Op_ClearArray: {
+       Node* pair = new BinaryNode(n->in(2), n->in(3));
+       n->set_req(2, pair);
+       n->set_req(3, n->in(4));
+       n->del_req(4);
+       break;
+     }
      case Op_VectorCmpMasked:
      case Op_CopySignD:
      case Op_SignumVF:
      case Op_SignumVD:
      case Op_SignumF:

*** 2420,10 ***
--- 2468,18 ---
          n->set_req(2, new BinaryNode(n->in(2), n->in(2)));
          break;
        }
        break;
      }
+     case Op_StoreLSpecial: {
+       if (n->req() > (MemNode::ValueIn + 1) && n->in(MemNode::ValueIn + 1) != nullptr) {
+         Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn + 1));
+         n->set_req(MemNode::ValueIn, pair);
+         n->del_req(MemNode::ValueIn + 1);
+       }
+       break;
+     }
      default:
        break;
    }
  }

< prev index next >