1 /*
  2  * Copyright (c) 2006, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "runtime/sharedRuntime.hpp"
 26 #include "utilities/align.hpp"
 27 #include "utilities/byteswap.hpp"
 28 #include "utilities/copy.hpp"
 29 #include "utilities/debug.hpp"
 30 
 31 
 32 // Copy bytes; larger units are filled atomically if everything is aligned.
 33 void Copy::conjoint_memory_atomic(const void* from, void* to, size_t size) {
 34   uintptr_t bits = (uintptr_t) from | (uintptr_t) to | (uintptr_t) size;
 35 
 36   // (Note:  We could improve performance by ignoring the low bits of size,
 37   // and putting a short cleanup loop after each bulk copy loop.
 38   // There are plenty of other ways to make this faster also,
 39   // and it's a slippery slope.  For now, let's keep this code simple
 40   // since the simplicity helps clarify the atomicity semantics of
 41   // this operation.  There are also CPU-specific assembly versions
 42   // which may or may not want to include such optimizations.)
 43 
 44   if (bits % sizeof(jlong) == 0) {
 45     Copy::conjoint_jlongs_atomic((const jlong*) from, (jlong*) to, size / sizeof(jlong));
 46   } else if (bits % sizeof(jint) == 0) {
 47     Copy::conjoint_jints_atomic((const jint*) from, (jint*) to, size / sizeof(jint));
 48   } else if (bits % sizeof(jshort) == 0) {
 49     Copy::conjoint_jshorts_atomic((const jshort*) from, (jshort*) to, size / sizeof(jshort));
 50   } else {
 51     // Not aligned, so no need to be atomic.
 52     Copy::conjoint_jbytes((const void*) from, (void*) to, size);
 53   }
 54 }
 55 
 56 #define COPY_ALIGNED_SEGMENT(t)                                                \
 57   if (bits % sizeof(t) == 0) {                                                 \
 58     size_t segment = remaining_bytes / sizeof(t);                              \
 59     if (segment > 0) {                                                         \
 60       Copy::conjoint_##t##s_atomic((const t*)cursor_from, (t*)cursor_to,       \
 61                                    segment);                                   \
 62       remaining_bytes -= segment * sizeof(t);                                  \
 63       cursor_from = (void*)(((char*)cursor_from) + segment * sizeof(t));       \
 64       cursor_to = (void*)(((char*)cursor_to) + segment * sizeof(t));           \
 65     }                                                                          \
 66   }
 67 
 68 void Copy::copy_value_content(const void* from, void* to, size_t size) {
 69   // The alignment of the containing object must satisfy the alignment of all
 70   // its fields, as such we can safely copy atomically starting at the largest
 71   // atomic size which the payloads are aligned to. Any trailing payload smaller
 72   // than this atomic size must have a lower alignment requirement. This
 73   // property holds recursively down to the smallest atomic size.
 74   const uintptr_t bits = (uintptr_t)from | (uintptr_t)to;
 75   const void* cursor_from = from;
 76   void* cursor_to = to;
 77   size_t remaining_bytes = size;
 78   COPY_ALIGNED_SEGMENT(jlong)
 79   COPY_ALIGNED_SEGMENT(jint)
 80   COPY_ALIGNED_SEGMENT(jshort)
 81   if (remaining_bytes > 0) {
 82     Copy::conjoint_jbytes(cursor_from, cursor_to, remaining_bytes);
 83   }
 84 }
 85 
 86 #undef COPY_ALIGNED_SEGMENT
 87 
 88 template <typename T>
 89 static void clear_value_content_helper(uintptr_t* to_cursor_addr, size_t* remaining_bytes_addr) {
 90   uintptr_t& to_cursor = *to_cursor_addr;
 91   size_t& remaining_bytes = *remaining_bytes_addr;
 92 
 93   if (to_cursor % sizeof(T) == 0 && remaining_bytes >= sizeof(T)) {
 94     const size_t copy_bytes = align_down(remaining_bytes, sizeof(T));
 95     Copy::fill_to_memory_atomic((void*)to_cursor, copy_bytes);
 96     to_cursor += copy_bytes;
 97     remaining_bytes -= copy_bytes;
 98   }
 99 }
100 
101 void Copy::clear_value_content(void* to, size_t size) {
102   // The alignment of the containing object must satisfy the alignment of all
103   // its fields, as such we can safely copy atomically starting at the largest
104   // atomic size which the payloads are aligned to. Any trailing payload smaller
105   // than this atomic size must have a lower alignment requirement. This
106   // property holds recursively down to the smallest atomic size.
107   uintptr_t to_cursor = uintptr_t(to);
108   size_t remaining_bytes = size;
109 
110   // Clear jlong alignend segments
111   clear_value_content_helper<jlong>(&to_cursor, &remaining_bytes);
112 
113   // Clear jint alignend segments
114   clear_value_content_helper<jint>(&to_cursor, &remaining_bytes);
115 
116   // Clear jshort alignend segments
117   clear_value_content_helper<jshort>(&to_cursor, &remaining_bytes);
118 
119   // Clear remaining bytes
120   clear_value_content_helper<jbyte>(&to_cursor, &remaining_bytes);
121 
122   postcond(remaining_bytes == 0);
123   postcond(to_cursor - size == uintptr_t(to));
124 }
125 
126 class CopySwap : AllStatic {
127 public:
128   /**
129    * Copy and optionally byte swap elements
130    *
131    * <swap> - true if elements should be byte swapped
132    *
133    * @param src address of source
134    * @param dst address of destination
135    * @param byte_count number of bytes to copy
136    * @param elem_size size of the elements to copy-swap
137    */
138   template<bool swap>
139   static void conjoint_swap_if_needed(const void* src, void* dst, size_t byte_count, size_t elem_size) {
140     assert(src != nullptr, "address must not be null");
141     assert(dst != nullptr, "address must not be null");
142     assert(elem_size == 2 || elem_size == 4 || elem_size == 8,
143            "incorrect element size: %zu", elem_size);
144     assert(is_aligned(byte_count, elem_size),
145            "byte_count %zu must be multiple of element size %zu", byte_count, elem_size);
146 
147     address src_end = (address)src + byte_count;
148 
149     if (dst <= src || dst >= src_end) {
150       do_conjoint_swap<RIGHT,swap>(src, dst, byte_count, elem_size);
151     } else {
152       do_conjoint_swap<LEFT,swap>(src, dst, byte_count, elem_size);
153     }
154   }
155 
156 private:
157   enum CopyDirection {
158     RIGHT, // lower -> higher address
159     LEFT   // higher -> lower address
160   };
161 
162   /**
163    * Copy and byte swap elements
164    *
165    * <T> - type of element to copy
166    * <D> - copy direction
167    * <is_src_aligned> - true if src argument is aligned to element size
168    * <is_dst_aligned> - true if dst argument is aligned to element size
169    *
170    * @param src address of source
171    * @param dst address of destination
172    * @param byte_count number of bytes to copy
173    */
174   template <typename T, CopyDirection D, bool swap, bool is_src_aligned, bool is_dst_aligned>
175   static void do_conjoint_swap(const void* src, void* dst, size_t byte_count) {
176     const char* cur_src;
177     char* cur_dst;
178 
179     switch (D) {
180     case RIGHT:
181       cur_src = (const char*)src;
182       cur_dst = (char*)dst;
183       break;
184     case LEFT:
185       cur_src = (const char*)src + byte_count - sizeof(T);
186       cur_dst = (char*)dst + byte_count - sizeof(T);
187       break;
188     }
189 
190     for (size_t i = 0; i < byte_count / sizeof(T); i++) {
191       T tmp;
192 
193       if (is_src_aligned) {
194         tmp = *(T*)cur_src;
195       } else {
196         memcpy(&tmp, cur_src, sizeof(T));
197       }
198 
199       if (swap) {
200         tmp = byteswap(tmp);
201       }
202 
203       if (is_dst_aligned) {
204         *(T*)cur_dst = tmp;
205       } else {
206         memcpy(cur_dst, &tmp, sizeof(T));
207       }
208 
209       switch (D) {
210       case RIGHT:
211         cur_src += sizeof(T);
212         cur_dst += sizeof(T);
213         break;
214       case LEFT:
215         cur_src -= sizeof(T);
216         cur_dst -= sizeof(T);
217         break;
218       }
219     }
220   }
221 
222   /**
223    * Copy and byte swap elements
224    *
225    * <T>    - type of element to copy
226    * <D>    - copy direction
227    * <swap> - true if elements should be byte swapped
228    *
229    * @param src address of source
230    * @param dst address of destination
231    * @param byte_count number of bytes to copy
232    */
233   template <typename T, CopyDirection direction, bool swap>
234   static void do_conjoint_swap(const void* src, void* dst, size_t byte_count) {
235     if (is_aligned(src, sizeof(T))) {
236       if (is_aligned(dst, sizeof(T))) {
237         do_conjoint_swap<T,direction,swap,true,true>(src, dst, byte_count);
238       } else {
239         do_conjoint_swap<T,direction,swap,true,false>(src, dst, byte_count);
240       }
241     } else {
242       if (is_aligned(dst, sizeof(T))) {
243         do_conjoint_swap<T,direction,swap,false,true>(src, dst, byte_count);
244       } else {
245         do_conjoint_swap<T,direction,swap,false,false>(src, dst, byte_count);
246       }
247     }
248   }
249 
250 
251   /**
252    * Copy and byte swap elements
253    *
254    * <D>    - copy direction
255    * <swap> - true if elements should be byte swapped
256    *
257    * @param src address of source
258    * @param dst address of destination
259    * @param byte_count number of bytes to copy
260    * @param elem_size size of the elements to copy-swap
261    */
262   template <CopyDirection D, bool swap>
263   static void do_conjoint_swap(const void* src, void* dst, size_t byte_count, size_t elem_size) {
264     switch (elem_size) {
265     case 2: do_conjoint_swap<uint16_t,D,swap>(src, dst, byte_count); break;
266     case 4: do_conjoint_swap<uint32_t,D,swap>(src, dst, byte_count); break;
267     case 8: do_conjoint_swap<uint64_t,D,swap>(src, dst, byte_count); break;
268     default: guarantee(false, "do_conjoint_swap: Invalid elem_size %zu\n", elem_size);
269     }
270   }
271 };
272 
273 void Copy::conjoint_copy(const void* src, void* dst, size_t byte_count, size_t elem_size) {
274   CopySwap::conjoint_swap_if_needed<false>(src, dst, byte_count, elem_size);
275 }
276 
277 void Copy::conjoint_swap(const void* src, void* dst, size_t byte_count, size_t elem_size) {
278   CopySwap::conjoint_swap_if_needed<true>(src, dst, byte_count, elem_size);
279 }
280 
281 // Fill bytes; larger units are filled atomically if everything is aligned.
282 void Copy::fill_to_memory_atomic(void* to, size_t size, jubyte value) {
283   address dst = (address)to;
284   uintptr_t bits = (uintptr_t)to | (uintptr_t)size;
285   if (bits % sizeof(jlong) == 0) {
286     jlong fill = (julong)((jubyte)value);  // zero-extend
287     if (fill != 0) {
288       fill += fill << 8;
289       fill += fill << 16;
290       fill += fill << 32;
291     }
292     // Copy::fill_to_jlongs_atomic((jlong*) dst, size / sizeof(jlong));
293     for (uintptr_t off = 0; off < size; off += sizeof(jlong)) {
294       *(jlong*)(dst + off) = fill;
295     }
296   } else if (bits % sizeof(jint) == 0) {
297     jint fill = (juint)((jubyte)value);  // zero-extend
298     if (fill != 0) {
299       fill += fill << 8;
300       fill += fill << 16;
301     }
302     // Copy::fill_to_jints_atomic((jint*) dst, size / sizeof(jint));
303     for (uintptr_t off = 0; off < size; off += sizeof(jint)) {
304       *(jint*)(dst + off) = fill;
305     }
306   } else if (bits % sizeof(jshort) == 0) {
307     jshort fill = (jushort)((jubyte)value);  // zero-extend
308     fill += (jshort)(fill << 8);
309     // Copy::fill_to_jshorts_atomic((jshort*) dst, size / sizeof(jshort));
310     for (uintptr_t off = 0; off < size; off += sizeof(jshort)) {
311       *(jshort*)(dst + off) = fill;
312     }
313   } else {
314     // Not aligned, so no need to be atomic.
315 #ifdef MUSL_LIBC
316     // This code is used by Unsafe and may hit the next page after truncation
317     // of mapped memory. Therefore, we use volatile to prevent compilers from
318     // replacing the loop by memset which may not trigger SIGBUS as needed
319     // (observed on Alpine Linux x86_64)
320     jbyte fill = value;
321     for (uintptr_t off = 0; off < size; off += sizeof(jbyte)) {
322       *(volatile jbyte*)(dst + off) = fill;
323     }
324 #else
325     Copy::fill_to_bytes(dst, size, value);
326 #endif
327   }
328 }