< prev index next >

src/hotspot/share/opto/macroArrayCopy.cpp

Print this page
@@ -21,10 +21,11 @@
   * questions.
   *
   */
  
  #include "precompiled.hpp"
+ #include "ci/ciFlatArrayKlass.hpp"
  #include "gc/shared/barrierSet.hpp"
  #include "gc/shared/tlab_globals.hpp"
  #include "opto/arraycopynode.hpp"
  #include "oops/objArrayKlass.hpp"
  #include "opto/convertnode.hpp"

@@ -137,14 +138,18 @@
    *ctrl = if_fast;
  
    return if_slow;
  }
  
- inline Node* PhaseMacroExpand::generate_slow_guard(Node** ctrl, Node* test, RegionNode* region) {
+ Node* PhaseMacroExpand::generate_slow_guard(Node** ctrl, Node* test, RegionNode* region) {
    return generate_guard(ctrl, test, region, PROB_UNLIKELY_MAG(3));
  }
  
+ inline Node* PhaseMacroExpand::generate_fair_guard(Node** ctrl, Node* test, RegionNode* region) {
+   return generate_guard(ctrl, test, region, PROB_FAIR);
+ }
+ 
  void PhaseMacroExpand::generate_negative_guard(Node** ctrl, Node* index, RegionNode* region) {
    if ((*ctrl)->is_top())
      return;                // already stopped
    if (_igvn.type(index)->higher_equal(TypeInt::POS)) // [0,maxint]
      return;                // index is already adequately typed

@@ -281,10 +286,30 @@
    Node* is_notp = generate_guard(ctrl, bol_le, NULL, PROB_MIN);
  
    return is_notp;
  }
  
+ Node* PhaseMacroExpand::array_lh_test(Node* array, jint mask) {
+   Node* klass_adr = basic_plus_adr(array, oopDesc::klass_offset_in_bytes());
+   Node* klass = transform_later(LoadKlassNode::make(_igvn, NULL, C->immutable_memory(), klass_adr, TypeInstPtr::KLASS, TypeInstKlassPtr::OBJECT));
+   Node* lh_addr = basic_plus_adr(klass, in_bytes(Klass::layout_helper_offset()));
+   Node* lh_val = _igvn.transform(LoadNode::make(_igvn, NULL, C->immutable_memory(), lh_addr, lh_addr->bottom_type()->is_ptr(), TypeInt::INT, T_INT, MemNode::unordered));
+   Node* masked = transform_later(new AndINode(lh_val, intcon(mask)));
+   Node* cmp = transform_later(new CmpINode(masked, intcon(0)));
+   return transform_later(new BoolNode(cmp, BoolTest::ne));
+ }
+ 
+ Node* PhaseMacroExpand::generate_flat_array_guard(Node** ctrl, Node* array, RegionNode* region) {
+   assert(UseFlatArray, "can never be flattened");
+   return generate_fair_guard(ctrl, array_lh_test(array, Klass::_lh_array_tag_vt_value_bit_inplace), region);
+ }
+ 
+ Node* PhaseMacroExpand::generate_null_free_array_guard(Node** ctrl, Node* array, RegionNode* region) {
+   assert(EnableValhalla, "can never be null free");
+   return generate_fair_guard(ctrl, array_lh_test(array, Klass::_lh_null_free_bit_inplace), region);
+ }
+ 
  void PhaseMacroExpand::finish_arraycopy_call(Node* call, Node** ctrl, MergeMemNode** mem, const TypePtr* adr_type) {
    transform_later(call);
  
    *ctrl = new ProjNode(call,TypeFunc::Control);
    transform_later(*ctrl);

@@ -333,10 +358,29 @@
    }
  
    return StubRoutines::select_arraycopy_function(t, aligned, disjoint, name, dest_uninitialized);
  }
  
+ bool PhaseMacroExpand::can_try_zeroing_elimination(AllocateArrayNode* alloc,
+                                                    Node* src,
+                                                    Node* dest) const {
+   const TypeAryPtr* top_dest = _igvn.type(dest)->isa_aryptr();
+ 
+   if (top_dest != NULL) {
+     if (top_dest->klass() == NULL) {
+       return false;
+     }
+   }
+ 
+   return ReduceBulkZeroing
+     && !(UseTLAB && ZeroTLAB) // pointless if already zeroed
+     && !src->eqv_uncast(dest)
+     && alloc != NULL
+     && _igvn.find_int_con(alloc->in(AllocateNode::ALength), 1) > 0
+     && alloc->maybe_set_complete(&_igvn);
+ }
+ 
  #define XTOP LP64_ONLY(COMMA top())
  
  // Generate an optimized call to arraycopy.
  // Caller must guard against non-arrays.
  // Caller must determine a common array basic-type for both arrays.

@@ -375,10 +419,11 @@
                                             const TypePtr* adr_type,
                                             BasicType basic_elem_type,
                                             Node* src,  Node* src_offset,
                                             Node* dest, Node* dest_offset,
                                             Node* copy_length,
+                                            Node* dest_length,
                                             bool disjoint_bases,
                                             bool length_never_negative,
                                             RegionNode* slow_region) {
    if (slow_region == NULL) {
      slow_region = new RegionNode(1);

@@ -386,10 +431,12 @@
    }
  
    Node* original_dest = dest;
    bool  dest_needs_zeroing   = false;
    bool  acopy_to_uninitialized = false;
+   Node* default_value = NULL;
+   Node* raw_default_value = NULL;
  
    // See if this is the initialization of a newly-allocated array.
    // If so, we will take responsibility here for initializing it to zero.
    // (Note:  Because tightly_coupled_allocation performs checks on the
    // out-edges of the dest, we need to avoid making derived pointers

@@ -416,10 +463,12 @@
        // From this point on, every exit path is responsible for
        // initializing any non-copied parts of the object to zero.
        // Also, if this flag is set we make sure that arraycopy interacts properly
        // with G1, eliding pre-barriers. See CR 6627983.
        dest_needs_zeroing = true;
+       default_value = alloc->in(AllocateNode::DefaultValue);
+       raw_default_value = alloc->in(AllocateNode::RawDefaultValue);
      } else {
        // dest_need_zeroing = false;
      }
    } else {
      // No zeroing elimination needed here.

@@ -485,18 +534,19 @@
      }
  
      // copy_length is 0.
      if (dest_needs_zeroing) {
        assert(!local_ctrl->is_top(), "no ctrl?");
-       Node* dest_length = alloc->in(AllocateNode::ALength);
        if (copy_length->eqv_uncast(dest_length)
            || _igvn.find_int_con(dest_length, 1) <= 0) {
          // There is no zeroing to do. No need for a secondary raw memory barrier.
        } else {
          // Clear the whole thing since there are no source elements to copy.
          generate_clear_array(local_ctrl, local_mem,
-                              adr_type, dest, basic_elem_type,
+                              adr_type, dest,
+                              default_value, raw_default_value,
+                              basic_elem_type,
                               intcon(0), NULL,
                               alloc->in(AllocateNode::AllocSize));
          // Use a secondary InitializeNode as raw memory barrier.
          // Currently it is needed only on this path since other
          // paths have stub or runtime calls as raw memory barriers.

@@ -523,17 +573,18 @@
    if (!(*ctrl)->is_top() && dest_needs_zeroing) {
      // We have to initialize the *uncopied* part of the array to zero.
      // The copy destination is the slice dest[off..off+len].  The other slices
      // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
      Node* dest_size   = alloc->in(AllocateNode::AllocSize);
-     Node* dest_length = alloc->in(AllocateNode::ALength);
      Node* dest_tail   = transform_later( new AddINode(dest_offset, copy_length));
  
      // If there is a head section that needs zeroing, do it now.
      if (_igvn.find_int_con(dest_offset, -1) != 0) {
        generate_clear_array(*ctrl, mem,
-                            adr_type, dest, basic_elem_type,
+                            adr_type, dest,
+                            default_value, raw_default_value,
+                            basic_elem_type,
                             intcon(0), dest_offset,
                             NULL);
      }
  
      // Next, perform a dynamic check on the tail length.

@@ -578,21 +629,25 @@
      if (tail_ctl != NULL) {
        Node* notail_ctl = (*ctrl)->is_top() ? NULL : *ctrl;
        *ctrl = tail_ctl;
        if (notail_ctl == NULL) {
          generate_clear_array(*ctrl, mem,
-                              adr_type, dest, basic_elem_type,
+                              adr_type, dest,
+                              default_value, raw_default_value,
+                              basic_elem_type,
                               dest_tail, NULL,
                               dest_size);
        } else {
          // Make a local merge.
          Node* done_ctl = transform_later(new RegionNode(3));
          Node* done_mem = transform_later(new PhiNode(done_ctl, Type::MEMORY, adr_type));
          done_ctl->init_req(1, notail_ctl);
          done_mem->init_req(1, mem->memory_at(alias_idx));
          generate_clear_array(*ctrl, mem,
-                              adr_type, dest, basic_elem_type,
+                              adr_type, dest,
+                              default_value, raw_default_value,
+                              basic_elem_type,
                               dest_tail, NULL,
                               dest_size);
          done_ctl->init_req(2, *ctrl);
          done_mem->init_req(2, mem->memory_at(alias_idx));
          *ctrl = done_ctl;

@@ -766,11 +821,13 @@
      // Generate the slow path, if needed.
      local_mem->set_memory_at(alias_idx, slow_mem);
  
      if (dest_needs_zeroing) {
        generate_clear_array(local_ctrl, local_mem,
-                            adr_type, dest, basic_elem_type,
+                            adr_type, dest,
+                            default_value, raw_default_value,
+                            basic_elem_type,
                             intcon(0), NULL,
                             alloc->in(AllocateNode::AllocSize));
      }
  
      local_mem = generate_slow_arraycopy(ac,

@@ -819,24 +876,28 @@
      // Do not let stores that initialize this object be reordered with
      // a subsequent store that would make this object accessible by
      // other threads.
      insert_mem_bar(ctrl, &out_mem, Op_MemBarStoreStore);
    } else {
+     // Do not let reads from the destination float above the arraycopy.
+     // Since we cannot type the arrays, we don't know which slices
+     // might be affected.  We could restrict this barrier only to those
+     // memory slices which pertain to array elements--but don't bother.
      insert_mem_bar(ctrl, &out_mem, Op_MemBarCPUOrder);
    }
  
    if (is_partial_array_copy) {
      assert((*ctrl)->is_Proj(), "MemBar control projection");
      assert((*ctrl)->in(0)->isa_MemBar(), "MemBar node");
      (*ctrl)->in(0)->isa_MemBar()->set_trailing_partial_array_copy();
    }
  
-   _igvn.replace_node(_callprojs.fallthrough_memproj, out_mem);
-   if (_callprojs.fallthrough_ioproj != NULL) {
-     _igvn.replace_node(_callprojs.fallthrough_ioproj, *io);
+   _igvn.replace_node(_callprojs->fallthrough_memproj, out_mem);
+   if (_callprojs->fallthrough_ioproj != NULL) {
+     _igvn.replace_node(_callprojs->fallthrough_ioproj, *io);
    }
-   _igvn.replace_node(_callprojs.fallthrough_catchproj, *ctrl);
+   _igvn.replace_node(_callprojs->fallthrough_catchproj, *ctrl);
  
  #ifdef ASSERT
    const TypeOopPtr* dest_t = _igvn.type(dest)->is_oopptr();
    if (dest_t->is_known_instance() && !is_partial_array_copy) {
      ArrayCopyNode* ac = NULL;

@@ -872,10 +933,12 @@
  // If dest_size is non-NULL, zeroing extends to the end of the object.
  // If slice_len is non-NULL, the slice_idx value must be a constant.
  void PhaseMacroExpand::generate_clear_array(Node* ctrl, MergeMemNode* merge_mem,
                                              const TypePtr* adr_type,
                                              Node* dest,
+                                             Node* val,
+                                             Node* raw_val,
                                              BasicType basic_elem_type,
                                              Node* slice_idx,
                                              Node* slice_len,
                                              Node* dest_size) {
    // one or the other but not both of slice_len and dest_size:

@@ -887,10 +950,11 @@
  
    // operate on this memory slice:
    Node* mem = merge_mem->memory_at(alias_idx); // memory slice to operate on
  
    // scaling and rounding of indexes:
+   assert(basic_elem_type != T_INLINE_TYPE, "should have been converted to a basic type copy");
    int scale = exact_log2(type2aelembytes(basic_elem_type));
    int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
    int clear_low = (-1 << scale) & (BytesPerInt  - 1);
    int bump_bit  = (-1 << scale) & BytesPerInt;
  

@@ -910,16 +974,16 @@
                         BytesPerLong);
    }
  
    if (start_con >= 0 && end_con >= 0) {
      // Constant start and end.  Simple.
-     mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
+     mem = ClearArrayNode::clear_memory(ctrl, mem, dest, val, raw_val,
                                         start_con, end_con, &_igvn);
    } else if (start_con >= 0 && dest_size != top()) {
      // Constant start, pre-rounded end after the tail of the array.
      Node* end = dest_size;
-     mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
+     mem = ClearArrayNode::clear_memory(ctrl, mem, dest, val, raw_val,
                                         start_con, end, &_igvn);
    } else if (start_con >= 0 && slice_len != top()) {
      // Constant start, non-constant end.  End needs rounding up.
      // End offset = round_up(abase + ((slice_idx_con + slice_len) << scale), 8)
      intptr_t end_base  = abase + (slice_idx_con << scale);

@@ -928,11 +992,11 @@
      if (scale != 0)
        end = transform_later(new LShiftXNode(end, intcon(scale) ));
      end_base += end_round;
      end = transform_later(new AddXNode(end, MakeConX(end_base)) );
      end = transform_later(new AndXNode(end, MakeConX(~end_round)) );
-     mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
+     mem = ClearArrayNode::clear_memory(ctrl, mem, dest, val, raw_val,
                                         start_con, end, &_igvn);
    } else if (start_con < 0 && dest_size != top()) {
      // Non-constant start, pre-rounded end after the tail of the array.
      // This is almost certainly a "round-to-end" operation.
      Node* start = slice_idx;

@@ -957,16 +1021,22 @@
        start = transform_later(new AndXNode(start, MakeConX(~to_clear)) );
        if (bump_bit != 0) {
          // Store a zero to the immediately preceding jint:
          Node* x1 = transform_later(new AddXNode(start, MakeConX(-bump_bit)) );
          Node* p1 = basic_plus_adr(dest, x1);
-         mem = StoreNode::make(_igvn, ctrl, mem, p1, adr_type, intcon(0), T_INT, MemNode::unordered);
+         if (val == NULL) {
+           assert(raw_val == NULL, "val may not be null");
+           mem = StoreNode::make(_igvn, ctrl, mem, p1, adr_type, intcon(0), T_INT, MemNode::unordered);
+         } else {
+           assert(_igvn.type(val)->isa_narrowoop(), "should be narrow oop");
+           mem = new StoreNNode(ctrl, mem, p1, adr_type, val, MemNode::unordered);
+         }
          mem = transform_later(mem);
        }
      }
      Node* end = dest_size; // pre-rounded
-     mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
+     mem = ClearArrayNode::clear_memory(ctrl, mem, dest, raw_val,
                                         start, end, &_igvn);
    } else {
      // Non-constant start, unrounded non-constant end.
      // (Nobody zeroes a random midsection of an array using this routine.)
      ShouldNotReachHere();       // fix caller

@@ -1078,15 +1148,15 @@
  
    call->set_cnt(PROB_UNLIKELY_MAG(4));  // Same effect as RC_UNCOMMON.
    _igvn.replace_node(ac, call);
    transform_later(call);
  
-   call->extract_projections(&_callprojs, false /*separate_io_proj*/, false /*do_asserts*/);
-   *ctrl = _callprojs.fallthrough_catchproj->clone();
+   _callprojs = call->extract_projections(false /*separate_io_proj*/, false /*do_asserts*/);
+   *ctrl = _callprojs->fallthrough_catchproj->clone();
    transform_later(*ctrl);
  
-   Node* m = _callprojs.fallthrough_memproj->clone();
+   Node* m = _callprojs->fallthrough_memproj->clone();
    transform_later(m);
  
    uint alias_idx = C->get_alias_index(adr_type);
    MergeMemNode* out_mem;
    if (alias_idx != Compile::AliasIdxBot) {

@@ -1097,12 +1167,12 @@
    }
    transform_later(out_mem);
  
    // When src is negative and arraycopy is before an infinite loop,_callprojs.fallthrough_ioproj
    // could be NULL. Skip clone and update NULL fallthrough_ioproj.
-   if (_callprojs.fallthrough_ioproj != NULL) {
-     *io = _callprojs.fallthrough_ioproj->clone();
+   if (_callprojs->fallthrough_ioproj != NULL) {
+     *io = _callprojs->fallthrough_ioproj->clone();
      transform_later(*io);
    } else {
      *io = NULL;
    }
  

@@ -1230,10 +1300,46 @@
      return true;
    }
    return false;
  }
  
+ const TypePtr* PhaseMacroExpand::adjust_for_flat_array(const TypeAryPtr* top_dest, Node*& src_offset,
+                                                        Node*& dest_offset, Node*& length, BasicType& dest_elem,
+                                                        Node*& dest_length) {
+ #ifdef ASSERT
+   BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+   bool needs_barriers = top_dest->elem()->inline_klass()->contains_oops() &&
+     bs->array_copy_requires_gc_barriers(dest_length != NULL, T_OBJECT, false, false, BarrierSetC2::Optimization);
+   assert(!needs_barriers || StressReflectiveCode, "Flat arracopy would require GC barriers");
+ #endif
+   int elem_size = top_dest->klass()->as_flat_array_klass()->element_byte_size();
+   if (elem_size >= 8) {
+     if (elem_size > 8) {
+       // treat as array of long but scale length, src offset and dest offset
+       assert((elem_size % 8) == 0, "not a power of 2?");
+       int factor = elem_size / 8;
+       length = transform_later(new MulINode(length, intcon(factor)));
+       src_offset = transform_later(new MulINode(src_offset, intcon(factor)));
+       dest_offset = transform_later(new MulINode(dest_offset, intcon(factor)));
+       if (dest_length != NULL) {
+         dest_length = transform_later(new MulINode(dest_length, intcon(factor)));
+       }
+       elem_size = 8;
+     }
+     dest_elem = T_LONG;
+   } else if (elem_size == 4) {
+     dest_elem = T_INT;
+   } else if (elem_size == 2) {
+     dest_elem = T_CHAR;
+   } else if (elem_size == 1) {
+     dest_elem = T_BYTE;
+   } else {
+     ShouldNotReachHere();
+   }
+   return TypeRawPtr::BOTTOM;
+ }
+ 
  #undef XTOP
  
  void PhaseMacroExpand::expand_arraycopy_node(ArrayCopyNode *ac) {
    Node* ctrl = ac->in(TypeFunc::Control);
    Node* io = ac->in(TypeFunc::I_O);

@@ -1247,29 +1353,60 @@
    if (ac->is_clonebasic()) {
      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
      bs->clone_at_expansion(this, ac);
      return;
    } else if (ac->is_copyof() || ac->is_copyofrange() || ac->is_clone_oop_array()) {
+     const Type* src_type = _igvn.type(src);
+     const Type* dest_type = _igvn.type(dest);
+     const TypeAryPtr* top_src = src_type->isa_aryptr();
+     const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+     BasicType dest_elem = T_OBJECT;
+     if (top_dest != NULL && top_dest->klass() != NULL) {
+       dest_elem = top_dest->klass()->as_array_klass()->element_type()->basic_type();
+     }
+     if (dest_elem == T_ARRAY || (dest_elem == T_INLINE_TYPE && top_dest->klass()->is_obj_array_klass())) {
+       dest_elem = T_OBJECT;
+     }
+     if (top_src != NULL && top_src->is_flat()) {
+       // If src is flat, dest is guaranteed to be flat as well
+       dest_elem = T_INLINE_TYPE;
+       top_dest = top_src;
+     }
+ 
      Node* mem = ac->in(TypeFunc::Memory);
      merge_mem = MergeMemNode::make(mem);
      transform_later(merge_mem);
  
      AllocateArrayNode* alloc = NULL;
+     Node* dest_length = NULL;
      if (ac->is_alloc_tightly_coupled()) {
        alloc = AllocateArrayNode::Ideal_array_allocation(dest, &_igvn);
        assert(alloc != NULL, "expect alloc");
+       dest_length = alloc->in(AllocateNode::ALength);
      }
  
-     const TypePtr* adr_type = _igvn.type(dest)->is_oopptr()->add_offset(Type::OffsetBot);
-     if (ac->_dest_type != TypeOopPtr::BOTTOM) {
-       adr_type = ac->_dest_type->add_offset(Type::OffsetBot)->is_ptr();
+     const TypePtr* adr_type = NULL;
+     if (dest_elem == T_INLINE_TYPE) {
+       assert(dest_length != NULL || StressReflectiveCode, "must be tightly coupled");
+       // Copy to a flat array modifies multiple memory slices. Conservatively insert a barrier
+       // on all slices to prevent writes into the source from floating below the arraycopy.
+       insert_mem_bar(&ctrl, &mem, Op_MemBarCPUOrder);
+       adr_type = adjust_for_flat_array(top_dest, src_offset, dest_offset, length, dest_elem, dest_length);
+     } else {
+       adr_type = dest_type->is_oopptr()->add_offset(Type::OffsetBot);
+       if (ac->_dest_type != TypeOopPtr::BOTTOM) {
+         adr_type = ac->_dest_type->add_offset(Type::OffsetBot)->is_ptr();
+       }
+       if (ac->_src_type != ac->_dest_type) {
+         adr_type = TypeRawPtr::BOTTOM;
+       }
      }
      generate_arraycopy(ac, alloc, &ctrl, merge_mem, &io,
-                        adr_type, T_OBJECT,
+                        adr_type, dest_elem,
                         src, src_offset, dest, dest_offset, length,
+                        dest_length,
                         true, !ac->is_copyofrange());
- 
      return;
    }
  
    AllocateArrayNode* alloc = NULL;
    if (ac->is_alloc_tightly_coupled()) {

@@ -1296,16 +1433,18 @@
      dest_elem = top_dest->klass()->as_array_klass()->element_type()->basic_type();
    }
    if (top_src != NULL && top_src->klass() != NULL) {
      src_elem = top_src->klass()->as_array_klass()->element_type()->basic_type();
    }
-   if (is_reference_type(src_elem))  src_elem  = T_OBJECT;
-   if (is_reference_type(dest_elem)) dest_elem = T_OBJECT;
+   if (src_elem == T_ARRAY || (src_elem == T_INLINE_TYPE && top_src->klass()->is_obj_array_klass())) {
+     src_elem = T_OBJECT;
+   }
+   if (dest_elem == T_ARRAY || (dest_elem == T_INLINE_TYPE && top_dest->klass()->is_obj_array_klass())) {
+     dest_elem = T_OBJECT;
+   }
  
-   if (ac->is_arraycopy_validated() &&
-       dest_elem != T_CONFLICT &&
-       src_elem == T_CONFLICT) {
+   if (ac->is_arraycopy_validated() && dest_elem != T_CONFLICT && src_elem == T_CONFLICT) {
      src_elem = dest_elem;
    }
  
    if (src_elem == T_CONFLICT || dest_elem == T_CONFLICT) {
      // Conservatively insert a memory barrier on all memory slices.

@@ -1320,33 +1459,42 @@
  
      // Call StubRoutines::generic_arraycopy stub.
      Node* mem = generate_arraycopy(ac, NULL, &ctrl, merge_mem, &io,
                                     TypeRawPtr::BOTTOM, T_CONFLICT,
                                     src, src_offset, dest, dest_offset, length,
+                                    NULL,
                                     // If a  negative length guard was generated for the ArrayCopyNode,
                                     // the length of the array can never be negative.
                                     false, ac->has_negative_length_guard());
      return;
    }
  
-   assert(!ac->is_arraycopy_validated() || (src_elem == dest_elem && dest_elem != T_VOID), "validated but different basic types");
+   assert(!ac->is_arraycopy_validated() || (src_elem == dest_elem && dest_elem != T_VOID) ||
+          (src_elem == T_INLINE_TYPE && StressReflectiveCode), "validated but different basic types");
  
    // (2) src and dest arrays must have elements of the same BasicType
    // Figure out the size and type of the elements we will be copying.
-   if (src_elem != dest_elem || dest_elem == T_VOID) {
+   //
+   // We have no stub to copy flattened inline type arrays with oop
+   // fields if we need to emit write barriers.
+   //
+   BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+   if (src_elem != dest_elem || dest_elem == T_VOID ||
+       (dest_elem == T_INLINE_TYPE && top_dest->elem()->inline_klass()->contains_oops() &&
+        bs->array_copy_requires_gc_barriers(alloc != NULL, T_OBJECT, false, false, BarrierSetC2::Optimization))) {
      // The component types are not the same or are not recognized.  Punt.
      // (But, avoid the native method wrapper to JVM_ArrayCopy.)
      {
        Node* mem = ac->in(TypeFunc::Memory);
        merge_mem = generate_slow_arraycopy(ac, &ctrl, mem, &io, TypePtr::BOTTOM, src, src_offset, dest, dest_offset, length, false);
      }
  
-     _igvn.replace_node(_callprojs.fallthrough_memproj, merge_mem);
-     if (_callprojs.fallthrough_ioproj != NULL) {
-       _igvn.replace_node(_callprojs.fallthrough_ioproj, io);
+     _igvn.replace_node(_callprojs->fallthrough_memproj, merge_mem);
+     if (_callprojs->fallthrough_ioproj != NULL) {
+       _igvn.replace_node(_callprojs->fallthrough_ioproj, io);
      }
-     _igvn.replace_node(_callprojs.fallthrough_catchproj, ctrl);
+     _igvn.replace_node(_callprojs->fallthrough_catchproj, ctrl);
      return;
    }
  
    //---------------------------------------------------------------------------
    // We will make a fast path for this call to arraycopy.

@@ -1359,15 +1507,13 @@
    // (6) length must not be negative.
    // (7) src_offset + length must not exceed length of src.
    // (8) dest_offset + length must not exceed length of dest.
    // (9) each element of an oop array must be assignable
  
-   {
-     Node* mem = ac->in(TypeFunc::Memory);
-     merge_mem = MergeMemNode::make(mem);
-     transform_later(merge_mem);
-   }
+   Node* mem = ac->in(TypeFunc::Memory);
+   merge_mem = MergeMemNode::make(mem);
+   transform_later(merge_mem);
  
    RegionNode* slow_region = new RegionNode(1);
    transform_later(slow_region);
  
    if (!ac->is_arraycopy_validated()) {

@@ -1404,21 +1550,46 @@
                           alen,
                           slow_region);
  
      // (9) each element of an oop array must be assignable
      // The generate_arraycopy subroutine checks this.
+ 
+     // Handle inline type arrays
+     if (!top_src->is_flat()) {
+       if (UseFlatArray && !top_src->is_not_flat()) {
+         // Src might be flat and dest might not be flat. Go to the slow path if src is flat.
+         generate_flat_array_guard(&ctrl, src, slow_region);
+       }
+       if (EnableValhalla) {
+         // No validation. The subtype check emitted at macro expansion time will not go to the slow
+         // path but call checkcast_arraycopy which can not handle flat/null-free inline type arrays.
+         generate_null_free_array_guard(&ctrl, dest, slow_region);
+       }
+     } else {
+       assert(top_dest->is_flat(), "dest array must be flat");
+     }
    }
+ 
    // This is where the memory effects are placed:
    const TypePtr* adr_type = NULL;
-   if (ac->_dest_type != TypeOopPtr::BOTTOM) {
+   Node* dest_length = (alloc != NULL) ? alloc->in(AllocateNode::ALength) : NULL;
+ 
+   if (dest_elem == T_INLINE_TYPE) {
+     // Copy to a flat array modifies multiple memory slices. Conservatively insert a barrier
+     // on all slices to prevent writes into the source from floating below the arraycopy.
+     insert_mem_bar(&ctrl, &mem, Op_MemBarCPUOrder);
+     adr_type = adjust_for_flat_array(top_dest, src_offset, dest_offset, length, dest_elem, dest_length);
+   } else if (ac->_dest_type != TypeOopPtr::BOTTOM) {
      adr_type = ac->_dest_type->add_offset(Type::OffsetBot)->is_ptr();
    } else {
      adr_type = TypeAryPtr::get_array_body_type(dest_elem);
    }
  
    generate_arraycopy(ac, alloc, &ctrl, merge_mem, &io,
                       adr_type, dest_elem,
                       src, src_offset, dest, dest_offset, length,
+                      dest_length,
                       // If a  negative length guard was generated for the ArrayCopyNode,
                       // the length of the array can never be negative.
-                      false, ac->has_negative_length_guard(), slow_region);
+                      false, ac->has_negative_length_guard(),
+                      slow_region);
  }
< prev index next >