Wed, 29 May 2019 09:09:43 +0100
8202884: SA: Attach/detach might fail on Linux if debugee application create/destroy threads during attaching
Summary: While doing a ptrace attach, do not attach to threads which are in the process of exiting or are zombies -- skip these threads.
Reviewed-by: jcbeyler, ysuenaga
1.1 --- a/agent/src/os/linux/libproc_impl.c Thu May 23 04:05:08 2019 +0100 1.2 +++ b/agent/src/os/linux/libproc_impl.c Wed May 29 09:09:43 2019 +0100 1.3 @@ -1,5 +1,5 @@ 1.4 /* 1.5 - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. 1.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 * 1.9 * This code is free software; you can redistribute it and/or modify it 1.10 @@ -256,6 +256,26 @@ 1.11 return newthr; 1.12 } 1.13 1.14 +void delete_thread_info(struct ps_prochandle* ph, thread_info* thr_to_be_removed) { 1.15 + thread_info* current_thr = ph->threads; 1.16 + 1.17 + if (thr_to_be_removed == ph->threads) { 1.18 + ph->threads = ph->threads->next; 1.19 + } else { 1.20 + thread_info* previous_thr; 1.21 + while (current_thr && current_thr != thr_to_be_removed) { 1.22 + previous_thr = current_thr; 1.23 + current_thr = current_thr->next; 1.24 + } 1.25 + if (current_thr == NULL) { 1.26 + print_error("Could not find the thread to be removed"); 1.27 + return; 1.28 + } 1.29 + previous_thr->next = current_thr->next; 1.30 + } 1.31 + ph->num_threads--; 1.32 + free(current_thr); 1.33 +} 1.34 1.35 // struct used for client data from thread_db callback 1.36 struct thread_db_client_data { 1.37 @@ -278,6 +298,11 @@ 1.38 1.39 print_debug("thread_db : pthread %d (lwp %d)\n", ti.ti_tid, ti.ti_lid); 1.40 1.41 + if (ti.ti_state == TD_THR_UNKNOWN || ti.ti_state == TD_THR_ZOMBIE) { 1.42 + print_debug("Skipping pthread %d (lwp %d)\n", ti.ti_tid, ti.ti_lid); 1.43 + return TD_OK; 1.44 + } 1.45 + 1.46 if (ptr->callback(ptr->ph, ti.ti_tid, ti.ti_lid) != true) 1.47 return TD_ERR; 1.48
2.1 --- a/agent/src/os/linux/libproc_impl.h Thu May 23 04:05:08 2019 +0100 2.2 +++ b/agent/src/os/linux/libproc_impl.h Wed May 29 09:09:43 2019 +0100 2.3 @@ -1,5 +1,5 @@ 2.4 /* 2.5 - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 2.6 + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. 2.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2.8 * 2.9 * This code is free software; you can redistribute it and/or modify it 2.10 @@ -113,6 +113,9 @@ 2.11 // reads thread info using libthread_db and calls above callback for each thread 2.12 bool read_thread_info(struct ps_prochandle* ph, thread_info_callback cb); 2.13 2.14 +// deletes a thread from the thread list 2.15 +void delete_thread_info(struct ps_prochandle* ph, thread_info* thr); 2.16 + 2.17 // adds a new shared object to lib list, returns NULL on failure 2.18 lib_info* add_lib_info(struct ps_prochandle* ph, const char* libname, uintptr_t base); 2.19
3.1 --- a/agent/src/os/linux/ps_proc.c Thu May 23 04:05:08 2019 +0100 3.2 +++ b/agent/src/os/linux/ps_proc.c Wed May 29 09:09:43 2019 +0100 3.3 @@ -28,6 +28,7 @@ 3.4 #include <signal.h> 3.5 #include <errno.h> 3.6 #include <elf.h> 3.7 +#include <ctype.h> 3.8 #include <sys/types.h> 3.9 #include <sys/wait.h> 3.10 #include <sys/ptrace.h> 3.11 @@ -45,6 +46,12 @@ 3.12 // This file has the libproc implementation specific to live process 3.13 // For core files, refer to ps_core.c 3.14 3.15 +typedef enum { 3.16 + ATTACH_SUCCESS, 3.17 + ATTACH_FAIL, 3.18 + ATTACH_THREAD_DEAD 3.19 +} attach_state_t; 3.20 + 3.21 static inline uintptr_t align(uintptr_t ptr, size_t size) { 3.22 return (ptr & ~(size - 1)); 3.23 } 3.24 @@ -167,9 +174,10 @@ 3.25 3.26 // waits until the ATTACH has stopped the process 3.27 // by signal SIGSTOP 3.28 -static bool ptrace_waitpid(pid_t pid) { 3.29 +static attach_state_t ptrace_waitpid(pid_t pid) { 3.30 int ret; 3.31 int status; 3.32 + errno = 0; 3.33 while (true) { 3.34 // Wait for debuggee to stop. 3.35 ret = waitpid(pid, &status, 0); 3.36 @@ -184,15 +192,15 @@ 3.37 // will go to sleep. 3.38 if (WSTOPSIG(status) == SIGSTOP) { 3.39 // Debuggee stopped by SIGSTOP. 3.40 - return true; 3.41 + return ATTACH_SUCCESS; 3.42 } 3.43 if (!ptrace_continue(pid, WSTOPSIG(status))) { 3.44 print_error("Failed to correctly attach to VM. VM might HANG! [PTRACE_CONT failed, stopped by %d]\n", WSTOPSIG(status)); 3.45 - return false; 3.46 + return ATTACH_FAIL; 3.47 } 3.48 } else { 3.49 - print_debug("waitpid(): Child process exited/terminated (status = 0x%x)\n", status); 3.50 - return false; 3.51 + print_debug("waitpid(): Child process %d exited/terminated (status = 0x%x)\n", pid, status); 3.52 + return ATTACH_THREAD_DEAD; 3.53 } 3.54 } else { 3.55 switch (errno) { 3.56 @@ -201,29 +209,89 @@ 3.57 break; 3.58 case ECHILD: 3.59 print_debug("waitpid() failed. Child process pid (%d) does not exist \n", pid); 3.60 - break; 3.61 + return ATTACH_THREAD_DEAD; 3.62 case EINVAL: 3.63 - print_debug("waitpid() failed. Invalid options argument.\n"); 3.64 - break; 3.65 + print_error("waitpid() failed. Invalid options argument.\n"); 3.66 + return ATTACH_FAIL; 3.67 default: 3.68 - print_debug("waitpid() failed. Unexpected error %d\n",errno); 3.69 - break; 3.70 + print_error("waitpid() failed. Unexpected error %d\n",errno); 3.71 + return ATTACH_FAIL; 3.72 } 3.73 - return false; 3.74 + } // else 3.75 + } // while 3.76 +} 3.77 + 3.78 +// checks the state of the thread/process specified by "pid", by reading 3.79 +// in the 'State:' value from the /proc/<pid>/status file. From the proc 3.80 +// man page, "Current state of the process. One of "R (running)", 3.81 +// "S (sleeping)", "D (disk sleep)", "T (stopped)", "T (tracing stop)", 3.82 +// "Z (zombie)", or "X (dead)"." Assumes that the thread is dead if we 3.83 +// don't find the status file or if the status is 'X' or 'Z'. 3.84 +static bool process_doesnt_exist(pid_t pid) { 3.85 + char fname[32]; 3.86 + char buf[30]; 3.87 + FILE *fp = NULL; 3.88 + const char state_string[] = "State:"; 3.89 + 3.90 + sprintf(fname, "/proc/%d/status", pid); 3.91 + fp = fopen(fname, "r"); 3.92 + if (fp == NULL) { 3.93 + print_debug("can't open /proc/%d/status file\n", pid); 3.94 + // Assume the thread does not exist anymore. 3.95 + return true; 3.96 + } 3.97 + bool found_state = false; 3.98 + size_t state_len = strlen(state_string); 3.99 + while (fgets(buf, sizeof(buf), fp) != NULL) { 3.100 + char *state = NULL; 3.101 + if (strncmp (buf, state_string, state_len) == 0) { 3.102 + found_state = true; 3.103 + state = buf + state_len; 3.104 + // Skip the spaces 3.105 + while (isspace(*state)) { 3.106 + state++; 3.107 + } 3.108 + // A state value of 'X' indicates that the thread is dead. 'Z' 3.109 + // indicates that the thread is a zombie. 3.110 + if (*state == 'X' || *state == 'Z') { 3.111 + fclose (fp); 3.112 + return true; 3.113 + } 3.114 + break; 3.115 } 3.116 } 3.117 + // If the state value is not 'X' or 'Z', the thread exists. 3.118 + if (!found_state) { 3.119 + // We haven't found the line beginning with 'State:'. 3.120 + // Assuming the thread exists. 3.121 + print_error("Could not find the 'State:' string in the /proc/%d/status file\n", pid); 3.122 + } 3.123 + fclose (fp); 3.124 + return false; 3.125 } 3.126 3.127 // attach to a process/thread specified by "pid" 3.128 -static bool ptrace_attach(pid_t pid, char* err_buf, size_t err_buf_len) { 3.129 +static attach_state_t ptrace_attach(pid_t pid, char* err_buf, size_t err_buf_len) { 3.130 + errno = 0; 3.131 if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) < 0) { 3.132 + if (errno == EPERM || errno == ESRCH) { 3.133 + // Check if the process/thread is exiting or is a zombie 3.134 + if (process_doesnt_exist(pid)) { 3.135 + print_debug("Thread with pid %d does not exist\n", pid); 3.136 + return ATTACH_THREAD_DEAD; 3.137 + } 3.138 + } 3.139 char buf[200]; 3.140 char* msg = strerror_r(errno, buf, sizeof(buf)); 3.141 snprintf(err_buf, err_buf_len, "ptrace(PTRACE_ATTACH, ..) failed for %d: %s", pid, msg); 3.142 - print_debug("%s\n", err_buf); 3.143 - return false; 3.144 + print_error("%s\n", err_buf); 3.145 + return ATTACH_FAIL; 3.146 } else { 3.147 - return ptrace_waitpid(pid); 3.148 + attach_state_t wait_ret = ptrace_waitpid(pid); 3.149 + if (wait_ret == ATTACH_THREAD_DEAD) { 3.150 + print_debug("Thread with pid %d does not exist\n", pid); 3.151 + } 3.152 + return wait_ret; 3.153 } 3.154 } 3.155 3.156 @@ -345,16 +413,20 @@ 3.157 struct ps_prochandle* Pgrab(pid_t pid, char* err_buf, size_t err_buf_len) { 3.158 struct ps_prochandle* ph = NULL; 3.159 thread_info* thr = NULL; 3.160 + attach_state_t attach_status = ATTACH_SUCCESS; 3.161 3.162 if ( (ph = (struct ps_prochandle*) calloc(1, sizeof(struct ps_prochandle))) == NULL) { 3.163 - snprintf(err_buf, err_buf_len, "can't allocate memory for ps_prochandle"); 3.164 - print_debug("%s\n", err_buf); 3.165 - return NULL; 3.166 + snprintf(err_buf, err_buf_len, "can't allocate memory for ps_prochandle"); 3.167 + print_debug("%s\n", err_buf); 3.168 + return NULL; 3.169 } 3.170 3.171 - if (ptrace_attach(pid, err_buf, err_buf_len) != true) { 3.172 - free(ph); 3.173 - return NULL; 3.174 + if ((attach_status = ptrace_attach(pid, err_buf, err_buf_len)) != ATTACH_SUCCESS) { 3.175 + if (attach_status == ATTACH_THREAD_DEAD) { 3.176 + print_error("The process with pid %d does not exist.\n", pid); 3.177 + } 3.178 + free(ph); 3.179 + return NULL; 3.180 } 3.181 3.182 // initialize ps_prochandle 3.183 @@ -373,14 +445,23 @@ 3.184 3.185 // attach to the threads 3.186 thr = ph->threads; 3.187 + 3.188 while (thr) { 3.189 - // don't attach to the main thread again 3.190 - if (ph->pid != thr->lwp_id && ptrace_attach(thr->lwp_id, err_buf, err_buf_len) != true) { 3.191 - // even if one attach fails, we get return NULL 3.192 - Prelease(ph); 3.193 - return NULL; 3.194 - } 3.195 - thr = thr->next; 3.196 + thread_info* current_thr = thr; 3.197 + thr = thr->next; 3.198 + // don't attach to the main thread again 3.199 + if (ph->pid != current_thr->lwp_id) { 3.200 + if ((attach_status = ptrace_attach(current_thr->lwp_id, err_buf, err_buf_len)) != ATTACH_SUCCESS) { 3.201 + if (attach_status == ATTACH_THREAD_DEAD) { 3.202 + // Remove this thread from the threads list 3.203 + delete_thread_info(ph, current_thr); 3.204 + } 3.205 + else { 3.206 + Prelease(ph); 3.207 + return NULL; 3.208 + } // ATTACH_THREAD_DEAD 3.209 + } // !ATTACH_SUCCESS 3.210 + } 3.211 } 3.212 return ph; 3.213 }