< prev index next >

src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp

Print this page
@@ -99,11 +99,14 @@
    // values on its own
  
    int reg_offset_in_bytes(Register r);
    int r0_offset_in_bytes()    { return reg_offset_in_bytes(r0); }
    int rscratch1_offset_in_bytes()    { return reg_offset_in_bytes(rscratch1); }
-   int v0_offset_in_bytes(void)   { return 0; }
+   int v0_offset_in_bytes();
+ 
+   // Total stack size in bytes for saving sve predicate registers.
+   int total_sve_predicate_in_bytes();
  
    // Capture info about frame layout
    // Note this is only correct when not saving full vectors.
    enum layout {
                  fpu_state_off = 0,

@@ -137,38 +140,64 @@
      }
  #endif
    }
  #endif
  
-   int r0_offset = (slots_per_vect * FloatRegisterImpl::number_of_registers) * BytesPerInt;
+   int r0_offset = v0_offset_in_bytes() + (slots_per_vect * FloatRegisterImpl::number_of_registers) * BytesPerInt;
    return r0_offset + r->encoding() * wordSize;
  }
  
+ int RegisterSaver::v0_offset_in_bytes() {
+   // The floating point registers are located above the predicate registers if
+   // they are present in the stack frame pushed by save_live_registers(). So the
+   // offset depends on the saved total predicate vectors in the stack frame.
+   return (total_sve_predicate_in_bytes() / VMRegImpl::stack_slot_size) * BytesPerInt;
+ }
+ 
+ int RegisterSaver::total_sve_predicate_in_bytes() {
+ #ifdef COMPILER2
+   if (_save_vectors && Matcher::supports_scalable_vector()) {
+     // The number of total predicate bytes is unlikely to be a multiple
+     // of 16 bytes so we manually align it up.
+     return align_up(Matcher::scalable_predicate_reg_slots() *
+                     VMRegImpl::stack_slot_size *
+                     PRegisterImpl::number_of_saved_registers, 16);
+   }
+ #endif
+   return 0;
+ }
+ 
  OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
    bool use_sve = false;
    int sve_vector_size_in_bytes = 0;
    int sve_vector_size_in_slots = 0;
+   int sve_predicate_size_in_slots = 0;
+   int total_predicate_in_bytes = total_sve_predicate_in_bytes();
+   int total_predicate_in_slots = total_predicate_in_bytes / VMRegImpl::stack_slot_size;
  
  #ifdef COMPILER2
    use_sve = Matcher::supports_scalable_vector();
-   sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
-   sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
+   if (use_sve) {
+     sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+     sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
+     sve_predicate_size_in_slots = Matcher::scalable_predicate_reg_slots();
+   }
  #endif
  
  #if COMPILER2_OR_JVMCI
    if (_save_vectors) {
-     int vect_words = 0;
      int extra_save_slots_per_register = 0;
      // Save upper half of vector registers
      if (use_sve) {
        extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register;
      } else {
        extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
      }
-     vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
-                  VMRegImpl::slots_per_word;
-     additional_frame_words += vect_words;
+     int extra_vector_bytes = extra_save_slots_per_register *
+                              VMRegImpl::stack_slot_size *
+                              FloatRegisterImpl::number_of_registers;
+     additional_frame_words += ((extra_vector_bytes + total_predicate_in_bytes) / wordSize);
    }
  #else
    assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
  #endif
  

@@ -182,11 +211,11 @@
    int frame_size_in_words = frame_size_in_bytes / wordSize;
    *total_frame_words = frame_size_in_words;
  
    // Save Integer and Float registers.
    __ enter();
-   __ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes);
+   __ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes, total_predicate_in_bytes);
  
    // Set an oopmap for the call site.  This oopmap will map all
    // oop-registers and debug-info registers as callee-saved.  This
    // will allow deoptimization at this safepoint to find all possible
    // debug-info recordings, as well as let GC find all oops.

@@ -199,35 +228,41 @@
      if (r <= rfp && r != rscratch1 && r != rscratch2) {
        // SP offsets are in 4-byte words.
        // Register slots are 8 bytes wide, 32 floating-point registers.
        int sp_offset = RegisterImpl::max_slots_per_register * i +
                        FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers;
-       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
-                                 r->as_VMReg());
+       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg());
      }
    }
  
    for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
      FloatRegister r = as_FloatRegister(i);
      int sp_offset = 0;
      if (_save_vectors) {
-       sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
+       sp_offset = use_sve ? (total_predicate_in_slots + sve_vector_size_in_slots * i) :
                              (FloatRegisterImpl::slots_per_neon_register * i);
      } else {
        sp_offset = FloatRegisterImpl::save_slots_per_register * i;
      }
-     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
-                               r->as_VMReg());
+     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
+   }
+ 
+   if (_save_vectors && use_sve) {
+     for (int i = 0; i < PRegisterImpl::number_of_saved_registers; i++) {
+       PRegister r = as_PRegister(i);
+       int sp_offset = sve_predicate_size_in_slots * i;
+       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
+     }
    }
  
    return oop_map;
  }
  
  void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
  #ifdef COMPILER2
    __ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
-                    Matcher::scalable_vector_reg_size(T_BYTE));
+                    Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes());
  #else
  #if !INCLUDE_JVMCI
    assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
  #endif
    __ pop_CPU_state(_save_vectors);

@@ -236,12 +271,14 @@
  
  }
  
  // Is vector's size (in bytes) bigger than a size saved by default?
  // 8 bytes vector registers are saved by default on AArch64.
+ // The SVE supported min vector size is 8 bytes and we need to save
+ // predicate registers when the vector size is 8 bytes as well.
  bool SharedRuntime::is_wide_vector(int size) {
-   return size > 8;
+   return size > 8 || (UseSVE > 0 && size >= 8);
  }
  
  // The java_calling_convention describes stack locations as ideal slots on
  // a frame with no abi restrictions. Since we must observe abi restrictions
  // (like the placement of the register window) the slots must be biased by
< prev index next >