New src/hotspot/share/utilities/copy.cpp

  1 /*
  2  * Copyright (c) 2006, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "utilities/copy.hpp"
 27 #include "runtime/sharedRuntime.hpp"
 28 #include "utilities/align.hpp"
 29 #include "utilities/byteswap.hpp"
 30 #include "utilities/copy.hpp"
 31 
 32 
 33 // Copy bytes; larger units are filled atomically if everything is aligned.
 34 void Copy::conjoint_memory_atomic(const void* from, void* to, size_t size) {
 35   uintptr_t bits = (uintptr_t) from | (uintptr_t) to | (uintptr_t) size;
 36 
 37   // (Note:  We could improve performance by ignoring the low bits of size,
 38   // and putting a short cleanup loop after each bulk copy loop.
 39   // There are plenty of other ways to make this faster also,
 40   // and it's a slippery slope.  For now, let's keep this code simple
 41   // since the simplicity helps clarify the atomicity semantics of
 42   // this operation.  There are also CPU-specific assembly versions
 43   // which may or may not want to include such optimizations.)
 44 
 45   if (bits % sizeof(jlong) == 0) {
 46     Copy::conjoint_jlongs_atomic((const jlong*) from, (jlong*) to, size / sizeof(jlong));
 47   } else if (bits % sizeof(jint) == 0) {
 48     Copy::conjoint_jints_atomic((const jint*) from, (jint*) to, size / sizeof(jint));
 49   } else if (bits % sizeof(jshort) == 0) {
 50     Copy::conjoint_jshorts_atomic((const jshort*) from, (jshort*) to, size / sizeof(jshort));
 51   } else {
 52     // Not aligned, so no need to be atomic.
 53     Copy::conjoint_jbytes((const void*) from, (void*) to, size);
 54   }
 55 }
 56 
 57 #define COPY_ALIGNED_SEGMENT(t) \
 58   if (bits % sizeof(t) == 0) { \
 59     size_t segment = remain / sizeof(t); \
 60     if (segment > 0) { \
 61       Copy::conjoint_##t##s_atomic((const t*) cursor_from, (t*) cursor_to, segment); \
 62       remain -= segment * sizeof(t); \
 63       cursor_from = (void*)(((char*)cursor_from) + segment * sizeof(t)); \
 64       cursor_to = (void*)(((char*)cursor_to) + segment * sizeof(t)); \
 65     } \
 66   } \
 67 
 68 void Copy::copy_value_content(const void* from, void* to, size_t size) {
 69   // Simple cases first
 70   uintptr_t bits = (uintptr_t) from | (uintptr_t) to | (uintptr_t) size;
 71   if (bits % sizeof(jlong) == 0) {
 72     Copy::conjoint_jlongs_atomic((const jlong*) from, (jlong*) to, size / sizeof(jlong));
 73     return;
 74   } else if (bits % sizeof(jint) == 0) {
 75     Copy::conjoint_jints_atomic((const jint*) from, (jint*) to, size / sizeof(jint));
 76     return;
 77   } else if (bits % sizeof(jshort) == 0) {
 78     Copy::conjoint_jshorts_atomic((const jshort*) from, (jshort*) to, size / sizeof(jshort));
 79     return;
 80   }
 81 
 82   // Complex cases
 83   bits = (uintptr_t) from | (uintptr_t) to;
 84   const void* cursor_from = from;
 85   void* cursor_to = to;
 86   size_t remain = size;
 87   COPY_ALIGNED_SEGMENT(jlong)
 88   COPY_ALIGNED_SEGMENT(jint)
 89   COPY_ALIGNED_SEGMENT(jshort)
 90   if (remain > 0) {
 91     Copy::conjoint_jbytes((const void*) cursor_from, (void*) cursor_to, remain);
 92   }
 93 }
 94 
 95 #undef COPY_ALIGNED_SEGMENT
 96 
 97 class CopySwap : AllStatic {
 98 public:
 99   /**
100    * Copy and optionally byte swap elements
101    *
102    * <swap> - true if elements should be byte swapped
103    *
104    * @param src address of source
105    * @param dst address of destination
106    * @param byte_count number of bytes to copy
107    * @param elem_size size of the elements to copy-swap
108    */
109   template<bool swap>
110   static void conjoint_swap_if_needed(const void* src, void* dst, size_t byte_count, size_t elem_size) {
111     assert(src != nullptr, "address must not be null");
112     assert(dst != nullptr, "address must not be null");
113     assert(elem_size == 2 || elem_size == 4 || elem_size == 8,
114            "incorrect element size: " SIZE_FORMAT, elem_size);
115     assert(is_aligned(byte_count, elem_size),
116            "byte_count " SIZE_FORMAT " must be multiple of element size " SIZE_FORMAT, byte_count, elem_size);
117 
118     address src_end = (address)src + byte_count;
119 
120     if (dst <= src || dst >= src_end) {
121       do_conjoint_swap<RIGHT,swap>(src, dst, byte_count, elem_size);
122     } else {
123       do_conjoint_swap<LEFT,swap>(src, dst, byte_count, elem_size);
124     }
125   }
126 
127 private:
128   enum CopyDirection {
129     RIGHT, // lower -> higher address
130     LEFT   // higher -> lower address
131   };
132 
133   /**
134    * Copy and byte swap elements
135    *
136    * <T> - type of element to copy
137    * <D> - copy direction
138    * <is_src_aligned> - true if src argument is aligned to element size
139    * <is_dst_aligned> - true if dst argument is aligned to element size
140    *
141    * @param src address of source
142    * @param dst address of destination
143    * @param byte_count number of bytes to copy
144    */
145   template <typename T, CopyDirection D, bool swap, bool is_src_aligned, bool is_dst_aligned>
146   static void do_conjoint_swap(const void* src, void* dst, size_t byte_count) {
147     const char* cur_src;
148     char* cur_dst;
149 
150     switch (D) {
151     case RIGHT:
152       cur_src = (const char*)src;
153       cur_dst = (char*)dst;
154       break;
155     case LEFT:
156       cur_src = (const char*)src + byte_count - sizeof(T);
157       cur_dst = (char*)dst + byte_count - sizeof(T);
158       break;
159     }
160 
161     for (size_t i = 0; i < byte_count / sizeof(T); i++) {
162       T tmp;
163 
164       if (is_src_aligned) {
165         tmp = *(T*)cur_src;
166       } else {
167         memcpy(&tmp, cur_src, sizeof(T));
168       }
169 
170       if (swap) {
171         tmp = byteswap(tmp);
172       }
173 
174       if (is_dst_aligned) {
175         *(T*)cur_dst = tmp;
176       } else {
177         memcpy(cur_dst, &tmp, sizeof(T));
178       }
179 
180       switch (D) {
181       case RIGHT:
182         cur_src += sizeof(T);
183         cur_dst += sizeof(T);
184         break;
185       case LEFT:
186         cur_src -= sizeof(T);
187         cur_dst -= sizeof(T);
188         break;
189       }
190     }
191   }
192 
193   /**
194    * Copy and byte swap elements
195    *
196    * <T>    - type of element to copy
197    * <D>    - copy direction
198    * <swap> - true if elements should be byte swapped
199    *
200    * @param src address of source
201    * @param dst address of destination
202    * @param byte_count number of bytes to copy
203    */
204   template <typename T, CopyDirection direction, bool swap>
205   static void do_conjoint_swap(const void* src, void* dst, size_t byte_count) {
206     if (is_aligned(src, sizeof(T))) {
207       if (is_aligned(dst, sizeof(T))) {
208         do_conjoint_swap<T,direction,swap,true,true>(src, dst, byte_count);
209       } else {
210         do_conjoint_swap<T,direction,swap,true,false>(src, dst, byte_count);
211       }
212     } else {
213       if (is_aligned(dst, sizeof(T))) {
214         do_conjoint_swap<T,direction,swap,false,true>(src, dst, byte_count);
215       } else {
216         do_conjoint_swap<T,direction,swap,false,false>(src, dst, byte_count);
217       }
218     }
219   }
220 
221 
222   /**
223    * Copy and byte swap elements
224    *
225    * <D>    - copy direction
226    * <swap> - true if elements should be byte swapped
227    *
228    * @param src address of source
229    * @param dst address of destination
230    * @param byte_count number of bytes to copy
231    * @param elem_size size of the elements to copy-swap
232    */
233   template <CopyDirection D, bool swap>
234   static void do_conjoint_swap(const void* src, void* dst, size_t byte_count, size_t elem_size) {
235     switch (elem_size) {
236     case 2: do_conjoint_swap<uint16_t,D,swap>(src, dst, byte_count); break;
237     case 4: do_conjoint_swap<uint32_t,D,swap>(src, dst, byte_count); break;
238     case 8: do_conjoint_swap<uint64_t,D,swap>(src, dst, byte_count); break;
239     default: guarantee(false, "do_conjoint_swap: Invalid elem_size " SIZE_FORMAT "\n", elem_size);
240     }
241   }
242 };
243 
244 void Copy::conjoint_copy(const void* src, void* dst, size_t byte_count, size_t elem_size) {
245   CopySwap::conjoint_swap_if_needed<false>(src, dst, byte_count, elem_size);
246 }
247 
248 void Copy::conjoint_swap(const void* src, void* dst, size_t byte_count, size_t elem_size) {
249   CopySwap::conjoint_swap_if_needed<true>(src, dst, byte_count, elem_size);
250 }
251 
252 // Fill bytes; larger units are filled atomically if everything is aligned.
253 void Copy::fill_to_memory_atomic(void* to, size_t size, jubyte value) {
254   address dst = (address)to;
255   uintptr_t bits = (uintptr_t)to | (uintptr_t)size;
256   if (bits % sizeof(jlong) == 0) {
257     jlong fill = (julong)((jubyte)value);  // zero-extend
258     if (fill != 0) {
259       fill += fill << 8;
260       fill += fill << 16;
261       fill += fill << 32;
262     }
263     // Copy::fill_to_jlongs_atomic((jlong*) dst, size / sizeof(jlong));
264     for (uintptr_t off = 0; off < size; off += sizeof(jlong)) {
265       *(jlong*)(dst + off) = fill;
266     }
267   } else if (bits % sizeof(jint) == 0) {
268     jint fill = (juint)((jubyte)value);  // zero-extend
269     if (fill != 0) {
270       fill += fill << 8;
271       fill += fill << 16;
272     }
273     // Copy::fill_to_jints_atomic((jint*) dst, size / sizeof(jint));
274     for (uintptr_t off = 0; off < size; off += sizeof(jint)) {
275       *(jint*)(dst + off) = fill;
276     }
277   } else if (bits % sizeof(jshort) == 0) {
278     jshort fill = (jushort)((jubyte)value);  // zero-extend
279     fill += (jshort)(fill << 8);
280     // Copy::fill_to_jshorts_atomic((jshort*) dst, size / sizeof(jshort));
281     for (uintptr_t off = 0; off < size; off += sizeof(jshort)) {
282       *(jshort*)(dst + off) = fill;
283     }
284   } else {
285     // Not aligned, so no need to be atomic.
286 #ifdef MUSL_LIBC
287     // This code is used by Unsafe and may hit the next page after truncation
288     // of mapped memory. Therefore, we use volatile to prevent compilers from
289     // replacing the loop by memset which may not trigger SIGBUS as needed
290     // (observed on Alpine Linux x86_64)
291     jbyte fill = value;
292     for (uintptr_t off = 0; off < size; off += sizeof(jbyte)) {
293       *(volatile jbyte*)(dst + off) = fill;
294     }
295 #else
296     Copy::fill_to_bytes(dst, size, value);
297 #endif
298   }
299 }