Thu, 22 Mar 2018 21:47:01 -0400
8198794: Hotspot crash on Cassandra 3.11.1 startup with libnuma 2.0.3
Reviewed-by: dholmes, phh
1 /*
2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef OS_LINUX_VM_OS_LINUX_HPP
26 #define OS_LINUX_VM_OS_LINUX_HPP
28 // Linux_OS defines the interface to Linux operating systems
30 /* pthread_getattr_np comes with LinuxThreads-0.9-7 on RedHat 7.1 */
31 typedef int (*pthread_getattr_func_type) (pthread_t, pthread_attr_t *);
33 // Information about the protection of the page at address '0' on this os.
34 static bool zero_page_read_protected() { return true; }
36 class Linux {
37 friend class os;
38 friend class TestReserveMemorySpecial;
40 // For signal-chaining
41 #define MAXSIGNUM 32
42 static struct sigaction sigact[MAXSIGNUM]; // saved preinstalled sigactions
43 static unsigned int sigs; // mask of signals that have
44 // preinstalled signal handlers
45 static bool libjsig_is_loaded; // libjsig that interposes sigaction(),
46 // __sigaction(), signal() is loaded
47 static struct sigaction *(*get_signal_action)(int);
48 static struct sigaction *get_preinstalled_handler(int);
49 static void save_preinstalled_handler(int, struct sigaction&);
51 static void check_signal_handler(int sig);
53 // For signal flags diagnostics
54 static int sigflags[MAXSIGNUM];
56 static int (*_clock_gettime)(clockid_t, struct timespec *);
57 static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *);
59 static address _initial_thread_stack_bottom;
60 static uintptr_t _initial_thread_stack_size;
62 static const char *_glibc_version;
63 static const char *_libpthread_version;
65 static bool _is_floating_stack;
66 static bool _is_NPTL;
67 static bool _supports_fast_thread_cpu_time;
69 static GrowableArray<int>* _cpu_to_node;
70 static GrowableArray<int>* _nindex_to_node;
72 protected:
74 static julong _physical_memory;
75 static pthread_t _main_thread;
76 static Mutex* _createThread_lock;
77 static int _page_size;
78 static const int _vm_default_page_size;
80 static julong available_memory();
81 static julong physical_memory() { return _physical_memory; }
82 static void initialize_system_info();
84 static int commit_memory_impl(char* addr, size_t bytes, bool exec);
85 static int commit_memory_impl(char* addr, size_t bytes,
86 size_t alignment_hint, bool exec);
88 static void set_glibc_version(const char *s) { _glibc_version = s; }
89 static void set_libpthread_version(const char *s) { _libpthread_version = s; }
91 static bool supports_variable_stack_size();
93 static void set_is_NPTL() { _is_NPTL = true; }
94 static void set_is_LinuxThreads() { _is_NPTL = false; }
95 static void set_is_floating_stack() { _is_floating_stack = true; }
97 static void rebuild_cpu_to_node_map();
98 static void rebuild_nindex_to_node_map();
99 static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; }
100 static GrowableArray<int>* nindex_to_node() { return _nindex_to_node; }
102 static size_t find_large_page_size();
103 static size_t setup_large_page_size();
105 static bool setup_large_page_type(size_t page_size);
106 static bool transparent_huge_pages_sanity_check(bool warn, size_t pages_size);
107 static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
109 static char* reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec);
110 static char* reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec);
111 static char* reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec);
112 static char* reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec);
114 static bool release_memory_special_impl(char* base, size_t bytes);
115 static bool release_memory_special_shm(char* base, size_t bytes);
116 static bool release_memory_special_huge_tlbfs(char* base, size_t bytes);
118 static void print_full_memory_info(outputStream* st);
119 static void print_distro_info(outputStream* st);
120 static void print_libversion_info(outputStream* st);
122 public:
123 static bool _stack_is_executable;
124 static void *dlopen_helper(const char *name, char *ebuf, int ebuflen);
125 static void *dll_load_in_vmthread(const char *name, char *ebuf, int ebuflen);
127 static void init_thread_fpu_state();
128 static int get_fpu_control_word();
129 static void set_fpu_control_word(int fpu_control);
130 static pthread_t main_thread(void) { return _main_thread; }
131 // returns kernel thread id (similar to LWP id on Solaris), which can be
132 // used to access /proc
133 static pid_t gettid();
134 static void set_createThread_lock(Mutex* lk) { _createThread_lock = lk; }
135 static Mutex* createThread_lock(void) { return _createThread_lock; }
136 static void hotspot_sigmask(Thread* thread);
138 static address initial_thread_stack_bottom(void) { return _initial_thread_stack_bottom; }
139 static uintptr_t initial_thread_stack_size(void) { return _initial_thread_stack_size; }
141 static int page_size(void) { return _page_size; }
142 static void set_page_size(int val) { _page_size = val; }
144 static int vm_default_page_size(void) { return _vm_default_page_size; }
146 static address ucontext_get_pc(ucontext_t* uc);
147 static intptr_t* ucontext_get_sp(ucontext_t* uc);
148 static intptr_t* ucontext_get_fp(ucontext_t* uc);
150 // For Analyzer Forte AsyncGetCallTrace profiling support:
151 //
152 // This interface should be declared in os_linux_i486.hpp, but
153 // that file provides extensions to the os class and not the
154 // Linux class.
155 static ExtendedPC fetch_frame_from_ucontext(Thread* thread, ucontext_t* uc,
156 intptr_t** ret_sp, intptr_t** ret_fp);
158 // This boolean allows users to forward their own non-matching signals
159 // to JVM_handle_linux_signal, harmlessly.
160 static bool signal_handlers_are_installed;
162 static int get_our_sigflags(int);
163 static void set_our_sigflags(int, int);
164 static void signal_sets_init();
165 static void install_signal_handlers();
166 static void set_signal_handler(int, bool);
167 static bool is_sig_ignored(int sig);
169 static sigset_t* unblocked_signals();
170 static sigset_t* vm_signals();
171 static sigset_t* allowdebug_blocked_signals();
173 // For signal-chaining
174 static struct sigaction *get_chained_signal_action(int sig);
175 static bool chained_handler(int sig, siginfo_t* siginfo, void* context);
177 // GNU libc and libpthread version strings
178 static const char *glibc_version() { return _glibc_version; }
179 static const char *libpthread_version() { return _libpthread_version; }
181 // NPTL or LinuxThreads?
182 static bool is_LinuxThreads() { return !_is_NPTL; }
183 static bool is_NPTL() { return _is_NPTL; }
185 // NPTL is always floating stack. LinuxThreads could be using floating
186 // stack or fixed stack.
187 static bool is_floating_stack() { return _is_floating_stack; }
189 static void libpthread_init();
190 static bool libnuma_init();
191 static void* libnuma_dlsym(void* handle, const char* name);
192 // libnuma v2 (libnuma_1.2) symbols
193 static void* libnuma_v2_dlsym(void* handle, const char* name);
194 // Minimum stack size a thread can be created with (allowing
195 // the VM to completely create the thread and enter user code)
196 static size_t min_stack_allowed;
198 // Return default stack size or guard size for the specified thread type
199 static size_t default_stack_size(os::ThreadType thr_type);
200 static size_t default_guard_size(os::ThreadType thr_type);
202 static void capture_initial_stack(size_t max_size);
204 // Stack overflow handling
205 static bool manually_expand_stack(JavaThread * t, address addr);
206 static int max_register_window_saves_before_flushing();
208 // Real-time clock functions
209 static void clock_init(void);
211 // fast POSIX clocks support
212 static void fast_thread_clock_init(void);
214 static inline bool supports_monotonic_clock() {
215 return _clock_gettime != NULL;
216 }
218 static int clock_gettime(clockid_t clock_id, struct timespec *tp) {
219 return _clock_gettime ? _clock_gettime(clock_id, tp) : -1;
220 }
222 static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) {
223 return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1;
224 }
226 static bool supports_fast_thread_cpu_time() {
227 return _supports_fast_thread_cpu_time;
228 }
230 static jlong fast_thread_cpu_time(clockid_t clockid);
232 // pthread_cond clock suppport
233 private:
234 static pthread_condattr_t _condattr[1];
236 public:
237 static pthread_condattr_t* condAttr() { return _condattr; }
239 // Stack repair handling
241 // none present
243 // LinuxThreads work-around for 6292965
244 static int safe_cond_timedwait(pthread_cond_t *_cond, pthread_mutex_t *_mutex, const struct timespec *_abstime);
246 private:
247 typedef int (*sched_getcpu_func_t)(void);
248 typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
249 typedef int (*numa_max_node_func_t)(void);
250 typedef int (*numa_num_configured_nodes_func_t)(void);
251 typedef int (*numa_available_func_t)(void);
252 typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
253 typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
254 typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t size, struct bitmask* mask);
256 typedef void (*numa_set_bind_policy_func_t)(int policy);
257 typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
258 typedef int (*numa_distance_func_t)(int node1, int node2);
260 static sched_getcpu_func_t _sched_getcpu;
261 static numa_node_to_cpus_func_t _numa_node_to_cpus;
262 static numa_max_node_func_t _numa_max_node;
263 static numa_num_configured_nodes_func_t _numa_num_configured_nodes;
264 static numa_available_func_t _numa_available;
265 static numa_tonode_memory_func_t _numa_tonode_memory;
266 static numa_interleave_memory_func_t _numa_interleave_memory;
267 static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2;
268 static numa_set_bind_policy_func_t _numa_set_bind_policy;
269 static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
270 static numa_distance_func_t _numa_distance;
271 static unsigned long* _numa_all_nodes;
272 static struct bitmask* _numa_all_nodes_ptr;
273 static struct bitmask* _numa_nodes_ptr;
275 static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
276 static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
277 static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
278 static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; }
279 static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
280 static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
281 static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
282 static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; }
283 static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
284 static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
285 static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
286 static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
287 static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
288 static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
289 static int sched_getcpu_syscall(void);
290 public:
291 static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
292 static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
293 return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1;
294 }
295 static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
296 static int numa_num_configured_nodes() {
297 return _numa_num_configured_nodes != NULL ? _numa_num_configured_nodes() : -1;
298 }
299 static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
300 static int numa_tonode_memory(void *start, size_t size, int node) {
301 return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
302 }
303 static void numa_interleave_memory(void *start, size_t size) {
304 // Use v2 api if available
305 if (_numa_interleave_memory_v2 != NULL && _numa_all_nodes_ptr != NULL) {
306 _numa_interleave_memory_v2(start, size, _numa_all_nodes_ptr);
307 } else if (_numa_interleave_memory != NULL && _numa_all_nodes != NULL) {
308 _numa_interleave_memory(start, size, _numa_all_nodes);
309 }
310 }
311 static void numa_set_bind_policy(int policy) {
312 if (_numa_set_bind_policy != NULL) {
313 _numa_set_bind_policy(policy);
314 }
315 }
316 static int numa_distance(int node1, int node2) {
317 return _numa_distance != NULL ? _numa_distance(node1, node2) : -1;
318 }
319 static int get_node_by_cpu(int cpu_id);
320 static int get_existing_num_nodes();
321 // Check if numa node is configured (non-zero memory node).
322 static bool isnode_in_configured_nodes(unsigned int n) {
323 if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
324 return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
325 } else
326 return 0;
327 }
328 // Check if numa node exists in the system (including zero memory nodes).
329 static bool isnode_in_existing_nodes(unsigned int n) {
330 if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) {
331 return _numa_bitmask_isbitset(_numa_nodes_ptr, n);
332 } else if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
333 // Not all libnuma API v2 implement numa_nodes_ptr, so it's not possible
334 // to trust the API version for checking its absence. On the other hand,
335 // numa_nodes_ptr found in libnuma 2.0.9 and above is the only way to get
336 // a complete view of all numa nodes in the system, hence numa_nodes_ptr
337 // is used to handle CPU and nodes on architectures (like PowerPC) where
338 // there can exist nodes with CPUs but no memory or vice-versa and the
339 // nodes may be non-contiguous. For most of the architectures, like
340 // x86_64, numa_node_ptr presents the same node set as found in
341 // numa_all_nodes_ptr so it's possible to use numa_all_nodes_ptr as a
342 // substitute.
343 return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
344 } else
345 return 0;
346 }
347 };
350 class PlatformEvent : public CHeapObj<mtInternal> {
351 private:
352 double CachePad [4] ; // increase odds that _mutex is sole occupant of cache line
353 volatile int _Event ;
354 volatile int _nParked ;
355 pthread_mutex_t _mutex [1] ;
356 pthread_cond_t _cond [1] ;
357 double PostPad [2] ;
358 Thread * _Assoc ;
360 public: // TODO-FIXME: make dtor private
361 ~PlatformEvent() { guarantee (0, "invariant") ; }
363 public:
364 PlatformEvent() {
365 int status;
366 status = pthread_cond_init (_cond, os::Linux::condAttr());
367 assert_status(status == 0, status, "cond_init");
368 status = pthread_mutex_init (_mutex, NULL);
369 assert_status(status == 0, status, "mutex_init");
370 _Event = 0 ;
371 _nParked = 0 ;
372 _Assoc = NULL ;
373 }
375 // Use caution with reset() and fired() -- they may require MEMBARs
376 void reset() { _Event = 0 ; }
377 int fired() { return _Event; }
378 void park () ;
379 void unpark () ;
380 int TryPark () ;
381 int park (jlong millis) ; // relative timed-wait only
382 void SetAssociation (Thread * a) { _Assoc = a ; }
383 } ;
385 class PlatformParker : public CHeapObj<mtInternal> {
386 protected:
387 enum {
388 REL_INDEX = 0,
389 ABS_INDEX = 1
390 };
391 int _cur_index; // which cond is in use: -1, 0, 1
392 pthread_mutex_t _mutex [1] ;
393 pthread_cond_t _cond [2] ; // one for relative times and one for abs.
395 public: // TODO-FIXME: make dtor private
396 ~PlatformParker() { guarantee (0, "invariant") ; }
398 public:
399 PlatformParker() {
400 int status;
401 status = pthread_cond_init (&_cond[REL_INDEX], os::Linux::condAttr());
402 assert_status(status == 0, status, "cond_init rel");
403 status = pthread_cond_init (&_cond[ABS_INDEX], NULL);
404 assert_status(status == 0, status, "cond_init abs");
405 status = pthread_mutex_init (_mutex, NULL);
406 assert_status(status == 0, status, "mutex_init");
407 _cur_index = -1; // mark as unused
408 }
409 };
411 #endif // OS_LINUX_VM_OS_LINUX_HPP