1 /*
  2  * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef OS_LINUX_OS_LINUX_HPP
 26 #define OS_LINUX_OS_LINUX_HPP
 27 
 28 #include "runtime/os.hpp"
 29 
 30 // os::Linux defines the interface to Linux operating systems
 31 
 32 class os::Linux {
 33   friend class CgroupSubsystem;
 34   friend class os;
 35   friend class OSContainer;
 36   friend class TestReserveMemorySpecial;
 37 
 38   static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *);
 39   static int (*_pthread_setname_np)(pthread_t, const char*);
 40 
 41   static address   _initial_thread_stack_bottom;
 42   static uintptr_t _initial_thread_stack_size;
 43 
 44   static const char *_libc_version;
 45   static const char *_libpthread_version;
 46 
 47   static bool _supports_fast_thread_cpu_time;
 48 
 49   static GrowableArray<int>* _cpu_to_node;
 50   static GrowableArray<int>* _nindex_to_node;
 51 
 52   static julong available_memory_in_container();
 53 
 54  protected:
 55 
 56   static julong _physical_memory;
 57   static pthread_t _main_thread;
 58 
 59   static julong available_memory();
 60   static julong free_memory();
 61 
 62   static int active_processor_count();
 63 
 64   static void initialize_system_info();
 65 
 66   static int commit_memory_impl(char* addr, size_t bytes, bool exec);
 67   static int commit_memory_impl(char* addr, size_t bytes,
 68                                 size_t alignment_hint, bool exec);
 69 
 70   static void set_libc_version(const char *s)       { _libc_version = s; }
 71   static void set_libpthread_version(const char *s) { _libpthread_version = s; }
 72 
 73   static void rebuild_cpu_to_node_map();
 74   static void rebuild_nindex_to_node_map();
 75   static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
 76   static GrowableArray<int>* nindex_to_node()  { return _nindex_to_node; }
 77 
 78   static bool setup_large_page_type(size_t page_size);
 79   static bool transparent_huge_pages_sanity_check(bool warn, size_t pages_size);
 80   static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
 81   static bool shm_hugetlbfs_sanity_check(bool warn, size_t page_size);
 82 
 83   static int hugetlbfs_page_size_flag(size_t page_size);
 84 
 85   static char* reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec);
 86   static char* reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, size_t page_size, char* req_addr, bool exec);
 87   static bool commit_memory_special(size_t bytes, size_t page_size, char* req_addr, bool exec);
 88 
 89   static bool release_memory_special_impl(char* base, size_t bytes);
 90   static bool release_memory_special_shm(char* base, size_t bytes);
 91   static bool release_memory_special_huge_tlbfs(char* base, size_t bytes);
 92 
 93   static void print_process_memory_info(outputStream* st);
 94   static void print_system_memory_info(outputStream* st);
 95   static bool print_container_info(outputStream* st);
 96   static void print_steal_info(outputStream* st);
 97   static void print_distro_info(outputStream* st);
 98   static void print_libversion_info(outputStream* st);
 99   static void print_proc_sys_info(outputStream* st);
100   static bool print_ld_preload_file(outputStream* st);
101   static void print_uptime_info(outputStream* st);
102 
103  public:
104   struct CPUPerfTicks {
105     uint64_t used;
106     uint64_t usedKernel;
107     uint64_t total;
108     uint64_t steal;
109     bool     has_steal_ticks;
110   };
111 
112   static void kernel_version(long* major, long* minor);
113 
114   // which_logical_cpu=-1 returns accumulated ticks for all cpus.
115   static bool get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu);
116   static bool _stack_is_executable;
117   static void *dlopen_helper(const char *name, char *ebuf, int ebuflen);
118   static void *dll_load_in_vmthread(const char *name, char *ebuf, int ebuflen);
119   static const char *dll_path(void* lib);
120 
121   static void init_thread_fpu_state();
122   static int  get_fpu_control_word();
123   static void set_fpu_control_word(int fpu_control);
124   static pthread_t main_thread(void)                                { return _main_thread; }
125   // returns kernel thread id (similar to LWP id on Solaris), which can be
126   // used to access /proc
127   static pid_t gettid();
128 
129   static address   initial_thread_stack_bottom(void)                { return _initial_thread_stack_bottom; }
130   static uintptr_t initial_thread_stack_size(void)                  { return _initial_thread_stack_size; }
131 
132   static julong physical_memory() { return _physical_memory; }
133   static julong host_swap();
134 
135   static intptr_t* ucontext_get_sp(const ucontext_t* uc);
136   static intptr_t* ucontext_get_fp(const ucontext_t* uc);
137 
138   // GNU libc and libpthread version strings
139   static const char *libc_version()           { return _libc_version; }
140   static const char *libpthread_version()     { return _libpthread_version; }
141 
142   static void libpthread_init();
143   static void sched_getcpu_init();
144   static bool libnuma_init();
145   static void* libnuma_dlsym(void* handle, const char* name);
146   // libnuma v2 (libnuma_1.2) symbols
147   static void* libnuma_v2_dlsym(void* handle, const char* name);
148 
149   // Return default guard size for the specified thread type
150   static size_t default_guard_size(os::ThreadType thr_type);
151 
152   static bool adjustStackSizeForGuardPages(); // See comments in os_linux.cpp
153 
154   static void capture_initial_stack(size_t max_size);
155 
156   // Stack overflow handling
157   static bool manually_expand_stack(JavaThread * t, address addr);
158   static void expand_stack_to(address bottom);
159 
160   // fast POSIX clocks support
161   static void fast_thread_clock_init(void);
162 
163   static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) {
164     return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1;
165   }
166 
167   static bool supports_fast_thread_cpu_time() {
168     return _supports_fast_thread_cpu_time;
169   }
170 
171   static jlong fast_thread_cpu_time(clockid_t clockid);
172 
173   // Determine if the vmid is the parent pid for a child in a PID namespace.
174   // Return the namespace pid if so, otherwise -1.
175   static int get_namespace_pid(int vmid);
176 
177   // Output structure for query_process_memory_info() (all values in KB)
178   struct meminfo_t {
179     ssize_t vmsize;     // current virtual size
180     ssize_t vmpeak;     // peak virtual size
181     ssize_t vmrss;      // current resident set size
182     ssize_t vmhwm;      // peak resident set size
183     ssize_t vmswap;     // swapped out
184     ssize_t rssanon;    // resident set size (anonymous mappings, needs 4.5)
185     ssize_t rssfile;    // resident set size (file mappings, needs 4.5)
186     ssize_t rssshmem;   // resident set size (shared mappings, needs 4.5)
187   };
188 
189   // Attempts to query memory information about the current process and return it in the output structure.
190   // May fail (returns false) or succeed (returns true) but not all output fields are available; unavailable
191   // fields will contain -1.
192   static bool query_process_memory_info(meminfo_t* info);
193 
194   // Stack repair handling
195 
196   // none present
197 
198  private:
199   static void numa_init();
200 
201   typedef int (*sched_getcpu_func_t)(void);
202   typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
203   typedef int (*numa_node_to_cpus_v2_func_t)(int node, void *mask);
204   typedef int (*numa_max_node_func_t)(void);
205   typedef int (*numa_num_configured_nodes_func_t)(void);
206   typedef int (*numa_available_func_t)(void);
207   typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
208   typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
209   typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t size, struct bitmask* mask);
210   typedef struct bitmask* (*numa_get_membind_func_t)(void);
211   typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void);
212   typedef long (*numa_move_pages_func_t)(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags);
213   typedef void (*numa_set_preferred_func_t)(int node);
214   typedef void (*numa_set_bind_policy_func_t)(int policy);
215   typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
216   typedef int (*numa_distance_func_t)(int node1, int node2);
217 
218   static sched_getcpu_func_t _sched_getcpu;
219   static numa_node_to_cpus_func_t _numa_node_to_cpus;
220   static numa_node_to_cpus_v2_func_t _numa_node_to_cpus_v2;
221   static numa_max_node_func_t _numa_max_node;
222   static numa_num_configured_nodes_func_t _numa_num_configured_nodes;
223   static numa_available_func_t _numa_available;
224   static numa_tonode_memory_func_t _numa_tonode_memory;
225   static numa_interleave_memory_func_t _numa_interleave_memory;
226   static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2;
227   static numa_set_bind_policy_func_t _numa_set_bind_policy;
228   static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
229   static numa_distance_func_t _numa_distance;
230   static numa_get_membind_func_t _numa_get_membind;
231   static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
232   static numa_move_pages_func_t _numa_move_pages;
233   static numa_set_preferred_func_t _numa_set_preferred;
234   static unsigned long* _numa_all_nodes;
235   static struct bitmask* _numa_all_nodes_ptr;
236   static struct bitmask* _numa_nodes_ptr;
237   static struct bitmask* _numa_interleave_bitmask;
238   static struct bitmask* _numa_membind_bitmask;
239 
240   static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
241   static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
242   static void set_numa_node_to_cpus_v2(numa_node_to_cpus_v2_func_t func) { _numa_node_to_cpus_v2 = func; }
243   static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
244   static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; }
245   static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
246   static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
247   static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
248   static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; }
249   static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
250   static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
251   static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
252   static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
253   static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; }
254   static void set_numa_move_pages(numa_move_pages_func_t func) { _numa_move_pages = func; }
255   static void set_numa_set_preferred(numa_set_preferred_func_t func) { _numa_set_preferred = func; }
256   static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
257   static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == nullptr ? nullptr : *ptr); }
258   static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == nullptr ? nullptr : *ptr); }
259   static void set_numa_interleave_bitmask(struct bitmask* ptr)     { _numa_interleave_bitmask = ptr ;   }
260   static void set_numa_membind_bitmask(struct bitmask* ptr)        { _numa_membind_bitmask = ptr ;      }
261   static int sched_getcpu_syscall(void);
262 
263   enum NumaAllocationPolicy{
264     NotInitialized,
265     Membind,
266     Interleave
267   };
268   static NumaAllocationPolicy _current_numa_policy;
269 
270  public:
271   static int sched_getcpu()  { return _sched_getcpu != nullptr ? _sched_getcpu() : -1; }
272   static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen);
273   static int numa_max_node() { return _numa_max_node != nullptr ? _numa_max_node() : -1; }
274   static int numa_num_configured_nodes() {
275     return _numa_num_configured_nodes != nullptr ? _numa_num_configured_nodes() : -1;
276   }
277   static int numa_available() { return _numa_available != nullptr ? _numa_available() : -1; }
278   static int numa_tonode_memory(void *start, size_t size, int node) {
279     return _numa_tonode_memory != nullptr ? _numa_tonode_memory(start, size, node) : -1;
280   }
281 
282   static bool is_running_in_interleave_mode() {
283     return _current_numa_policy == Interleave;
284   }
285 
286   static void set_configured_numa_policy(NumaAllocationPolicy numa_policy) {
287     _current_numa_policy = numa_policy;
288   }
289 
290   static NumaAllocationPolicy identify_numa_policy() {
291     for (int node = 0; node <= Linux::numa_max_node(); node++) {
292       if (Linux::_numa_bitmask_isbitset(Linux::_numa_interleave_bitmask, node)) {
293         return Interleave;
294       }
295     }
296     return Membind;
297   }
298 
299   static void numa_interleave_memory(void *start, size_t size) {
300     // Prefer v2 API
301     if (_numa_interleave_memory_v2 != nullptr) {
302       if (is_running_in_interleave_mode()) {
303         _numa_interleave_memory_v2(start, size, _numa_interleave_bitmask);
304       } else if (_numa_membind_bitmask != nullptr) {
305         _numa_interleave_memory_v2(start, size, _numa_membind_bitmask);
306       }
307     } else if (_numa_interleave_memory != nullptr) {
308       _numa_interleave_memory(start, size, _numa_all_nodes);
309     }
310   }
311   static void numa_set_preferred(int node) {
312     if (_numa_set_preferred != nullptr) {
313       _numa_set_preferred(node);
314     }
315   }
316   static void numa_set_bind_policy(int policy) {
317     if (_numa_set_bind_policy != nullptr) {
318       _numa_set_bind_policy(policy);
319     }
320   }
321   static int numa_distance(int node1, int node2) {
322     return _numa_distance != nullptr ? _numa_distance(node1, node2) : -1;
323   }
324   static long numa_move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags) {
325     return _numa_move_pages != nullptr ? _numa_move_pages(pid, count, pages, nodes, status, flags) : -1;
326   }
327   static int get_node_by_cpu(int cpu_id);
328   static int get_existing_num_nodes();
329   // Check if numa node is configured (non-zero memory node).
330   static bool is_node_in_configured_nodes(unsigned int n) {
331     if (_numa_bitmask_isbitset != nullptr && _numa_all_nodes_ptr != nullptr) {
332       return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
333     } else
334       return false;
335   }
336   // Check if numa node exists in the system (including zero memory nodes).
337   static bool is_node_in_existing_nodes(unsigned int n) {
338     if (_numa_bitmask_isbitset != nullptr && _numa_nodes_ptr != nullptr) {
339       return _numa_bitmask_isbitset(_numa_nodes_ptr, n);
340     } else if (_numa_bitmask_isbitset != nullptr && _numa_all_nodes_ptr != nullptr) {
341       // Not all libnuma API v2 implement numa_nodes_ptr, so it's not possible
342       // to trust the API version for checking its absence. On the other hand,
343       // numa_nodes_ptr found in libnuma 2.0.9 and above is the only way to get
344       // a complete view of all numa nodes in the system, hence numa_nodes_ptr
345       // is used to handle CPU and nodes on architectures (like PowerPC) where
346       // there can exist nodes with CPUs but no memory or vice-versa and the
347       // nodes may be non-contiguous. For most of the architectures, like
348       // x86_64, numa_node_ptr presents the same node set as found in
349       // numa_all_nodes_ptr so it's possible to use numa_all_nodes_ptr as a
350       // substitute.
351       return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
352     } else
353       return false;
354   }
355   // Check if node is in bound node set.
356   static bool is_node_in_bound_nodes(int node) {
357     if (_numa_bitmask_isbitset != nullptr) {
358       if (is_running_in_interleave_mode()) {
359         return _numa_bitmask_isbitset(_numa_interleave_bitmask, node);
360       } else {
361         return _numa_membind_bitmask != nullptr ? _numa_bitmask_isbitset(_numa_membind_bitmask, node) : false;
362       }
363     }
364     return false;
365   }
366   // Check if bound to only one numa node.
367   // Returns true if bound to a single numa node, otherwise returns false.
368   static bool is_bound_to_single_node() {
369     int nodes = 0;
370     unsigned int node = 0;
371     unsigned int highest_node_number = 0;
372 
373     if (_numa_membind_bitmask != nullptr && _numa_max_node != nullptr && _numa_bitmask_isbitset != nullptr) {
374       highest_node_number = _numa_max_node();
375     } else {
376       return false;
377     }
378 
379     for (node = 0; node <= highest_node_number; node++) {
380       if (_numa_bitmask_isbitset(_numa_membind_bitmask, node)) {
381         nodes++;
382       }
383     }
384 
385     if (nodes == 1) {
386       return true;
387     } else {
388       return false;
389     }
390   }
391 
392   static const GrowableArray<int>* numa_nindex_to_node() {
393     return _nindex_to_node;
394   }
395 
396   static void* resolve_function_descriptor(void* p);
397 
398 #ifdef __GLIBC__
399   // os::Linux::get_mallinfo() hides the complexity of dealing with mallinfo() or
400   // mallinfo2() from the user. Use this function instead of raw mallinfo/mallinfo2()
401   // to keep the JVM runtime-compatible with different glibc versions.
402   //
403   // mallinfo2() was added with glibc (>2.32). Legacy mallinfo() was deprecated with
404   // 2.33 and may vanish in future glibcs. So we may have both or either one of
405   // them.
406   //
407   // mallinfo2() is functionally equivalent to legacy mallinfo but returns sizes as
408   // 64-bit on 64-bit platforms. Legacy mallinfo uses 32-bit fields. However, legacy
409   // mallinfo is still perfectly fine to use if we know the sizes cannot have wrapped.
410   // For example, if the process virtual size does not exceed 4G, we cannot have
411   // malloc'ed more than 4G, so the results from legacy mallinfo() can still be used.
412   //
413   // os::Linux::get_mallinfo() will always prefer mallinfo2() if found, but will fall back
414   // to legacy mallinfo() if only that is available. In that case, it will return true
415   // in *might_have_wrapped.
416   struct glibc_mallinfo {
417     size_t arena;
418     size_t ordblks;
419     size_t smblks;
420     size_t hblks;
421     size_t hblkhd;
422     size_t usmblks;
423     size_t fsmblks;
424     size_t uordblks;
425     size_t fordblks;
426     size_t keepcost;
427   };
428   static void get_mallinfo(glibc_mallinfo* out, bool* might_have_wrapped);
429 
430   // Calls out to GNU extension malloc_info if available
431   // otherwise does nothing and returns -2.
432   static int malloc_info(FILE* stream);
433 #endif // GLIBC
434 };
435 
436 #endif // OS_LINUX_OS_LINUX_HPP