8202884: SA: Attach/detach might fail on Linux if debugee application create/destroy threads during attaching

Wed, 29 May 2019 09:09:43 +0100

author
adinn
date
Wed, 29 May 2019 09:09:43 +0100
changeset 9683
fba8dbd018a6
parent 9682
9905a72841d7
child 9695
17778f8991c8

8202884: SA: Attach/detach might fail on Linux if debugee application create/destroy threads during attaching
Summary: While doing a ptrace attach, do not attach to threads which are in the process of exiting or are zombies -- skip these threads.
Reviewed-by: jcbeyler, ysuenaga

agent/src/os/linux/libproc_impl.c file | annotate | diff | comparison | revisions
agent/src/os/linux/libproc_impl.h file | annotate | diff | comparison | revisions
agent/src/os/linux/ps_proc.c file | annotate | diff | comparison | revisions
     1.1 --- a/agent/src/os/linux/libproc_impl.c	Thu May 23 04:05:08 2019 +0100
     1.2 +++ b/agent/src/os/linux/libproc_impl.c	Wed May 29 09:09:43 2019 +0100
     1.3 @@ -1,5 +1,5 @@
     1.4  /*
     1.5 - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
     1.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8   *
     1.9   * This code is free software; you can redistribute it and/or modify it
    1.10 @@ -256,6 +256,26 @@
    1.11     return newthr;
    1.12  }
    1.13  
    1.14 +void delete_thread_info(struct ps_prochandle* ph, thread_info* thr_to_be_removed) {
    1.15 +    thread_info* current_thr = ph->threads;
    1.16 +
    1.17 +    if (thr_to_be_removed == ph->threads) {
    1.18 +      ph->threads = ph->threads->next;
    1.19 +    } else {
    1.20 +      thread_info* previous_thr;
    1.21 +      while (current_thr && current_thr != thr_to_be_removed) {
    1.22 +        previous_thr = current_thr;
    1.23 +        current_thr = current_thr->next;
    1.24 +      }
    1.25 +      if (current_thr == NULL) {
    1.26 +        print_error("Could not find the thread to be removed");
    1.27 +        return;
    1.28 +      }
    1.29 +      previous_thr->next = current_thr->next;
    1.30 +    }
    1.31 +    ph->num_threads--;
    1.32 +    free(current_thr);
    1.33 +}
    1.34  
    1.35  // struct used for client data from thread_db callback
    1.36  struct thread_db_client_data {
    1.37 @@ -278,6 +298,11 @@
    1.38  
    1.39    print_debug("thread_db : pthread %d (lwp %d)\n", ti.ti_tid, ti.ti_lid);
    1.40  
    1.41 +  if (ti.ti_state == TD_THR_UNKNOWN || ti.ti_state == TD_THR_ZOMBIE) {
    1.42 +    print_debug("Skipping pthread %d (lwp %d)\n", ti.ti_tid, ti.ti_lid);
    1.43 +    return TD_OK;
    1.44 +  }
    1.45 +
    1.46    if (ptr->callback(ptr->ph, ti.ti_tid, ti.ti_lid) != true)
    1.47      return TD_ERR;
    1.48  
     2.1 --- a/agent/src/os/linux/libproc_impl.h	Thu May 23 04:05:08 2019 +0100
     2.2 +++ b/agent/src/os/linux/libproc_impl.h	Wed May 29 09:09:43 2019 +0100
     2.3 @@ -1,5 +1,5 @@
     2.4  /*
     2.5 - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     2.6 + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
     2.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.8   *
     2.9   * This code is free software; you can redistribute it and/or modify it
    2.10 @@ -113,6 +113,9 @@
    2.11  // reads thread info using libthread_db and calls above callback for each thread
    2.12  bool read_thread_info(struct ps_prochandle* ph, thread_info_callback cb);
    2.13  
    2.14 +// deletes a thread from the thread list
    2.15 +void delete_thread_info(struct ps_prochandle* ph, thread_info* thr);
    2.16 +
    2.17  // adds a new shared object to lib list, returns NULL on failure
    2.18  lib_info* add_lib_info(struct ps_prochandle* ph, const char* libname, uintptr_t base);
    2.19  
     3.1 --- a/agent/src/os/linux/ps_proc.c	Thu May 23 04:05:08 2019 +0100
     3.2 +++ b/agent/src/os/linux/ps_proc.c	Wed May 29 09:09:43 2019 +0100
     3.3 @@ -28,6 +28,7 @@
     3.4  #include <signal.h>
     3.5  #include <errno.h>
     3.6  #include <elf.h>
     3.7 +#include <ctype.h>
     3.8  #include <sys/types.h>
     3.9  #include <sys/wait.h>
    3.10  #include <sys/ptrace.h>
    3.11 @@ -45,6 +46,12 @@
    3.12  // This file has the libproc implementation specific to live process
    3.13  // For core files, refer to ps_core.c
    3.14  
    3.15 +typedef enum {
    3.16 +  ATTACH_SUCCESS,
    3.17 +  ATTACH_FAIL,
    3.18 +  ATTACH_THREAD_DEAD
    3.19 +} attach_state_t;
    3.20 +
    3.21  static inline uintptr_t align(uintptr_t ptr, size_t size) {
    3.22    return (ptr & ~(size - 1));
    3.23  }
    3.24 @@ -167,9 +174,10 @@
    3.25  
    3.26  // waits until the ATTACH has stopped the process
    3.27  // by signal SIGSTOP
    3.28 -static bool ptrace_waitpid(pid_t pid) {
    3.29 +static attach_state_t ptrace_waitpid(pid_t pid) {
    3.30    int ret;
    3.31    int status;
    3.32 +  errno = 0;
    3.33    while (true) {
    3.34      // Wait for debuggee to stop.
    3.35      ret = waitpid(pid, &status, 0);
    3.36 @@ -184,15 +192,15 @@
    3.37          // will go to sleep.
    3.38          if (WSTOPSIG(status) == SIGSTOP) {
    3.39            // Debuggee stopped by SIGSTOP.
    3.40 -          return true;
    3.41 +          return ATTACH_SUCCESS;
    3.42          }
    3.43          if (!ptrace_continue(pid, WSTOPSIG(status))) {
    3.44            print_error("Failed to correctly attach to VM. VM might HANG! [PTRACE_CONT failed, stopped by %d]\n", WSTOPSIG(status));
    3.45 -          return false;
    3.46 +          return ATTACH_FAIL;
    3.47          }
    3.48        } else {
    3.49 -        print_debug("waitpid(): Child process exited/terminated (status = 0x%x)\n", status);
    3.50 -        return false;
    3.51 +        print_debug("waitpid(): Child process %d exited/terminated (status = 0x%x)\n", pid, status);
    3.52 +        return ATTACH_THREAD_DEAD;
    3.53        }
    3.54      } else {
    3.55        switch (errno) {
    3.56 @@ -201,29 +209,89 @@
    3.57            break;
    3.58          case ECHILD:
    3.59            print_debug("waitpid() failed. Child process pid (%d) does not exist \n", pid);
    3.60 -          break;
    3.61 +          return ATTACH_THREAD_DEAD;
    3.62          case EINVAL:
    3.63 -          print_debug("waitpid() failed. Invalid options argument.\n");
    3.64 -          break;
    3.65 +          print_error("waitpid() failed. Invalid options argument.\n");
    3.66 +          return ATTACH_FAIL;
    3.67          default:
    3.68 -          print_debug("waitpid() failed. Unexpected error %d\n",errno);
    3.69 -          break;
    3.70 +          print_error("waitpid() failed. Unexpected error %d\n",errno);
    3.71 +          return ATTACH_FAIL;
    3.72        }
    3.73 -      return false;
    3.74 +    } // else
    3.75 +  } // while
    3.76 +}
    3.77 +
    3.78 +// checks the state of the thread/process specified by "pid", by reading
    3.79 +// in the 'State:' value from the /proc/<pid>/status file. From the proc
    3.80 +// man page, "Current state of the process. One of "R (running)",
    3.81 +// "S (sleeping)", "D (disk sleep)", "T (stopped)", "T (tracing stop)",
    3.82 +// "Z (zombie)", or "X (dead)"." Assumes that the thread is dead if we
    3.83 +// don't find the status file or if the status is 'X' or 'Z'.
    3.84 +static bool process_doesnt_exist(pid_t pid) {
    3.85 +  char fname[32];
    3.86 +  char buf[30];
    3.87 +  FILE *fp = NULL;
    3.88 +  const char state_string[] = "State:";
    3.89 +
    3.90 +  sprintf(fname, "/proc/%d/status", pid);
    3.91 +  fp = fopen(fname, "r");
    3.92 +  if (fp == NULL) {
    3.93 +    print_debug("can't open /proc/%d/status file\n", pid);
    3.94 +    // Assume the thread does not exist anymore.
    3.95 +    return true;
    3.96 +  }
    3.97 +  bool found_state = false;
    3.98 +  size_t state_len = strlen(state_string);
    3.99 +  while (fgets(buf, sizeof(buf), fp) != NULL) {
   3.100 +    char *state = NULL;
   3.101 +    if (strncmp (buf, state_string, state_len) == 0) {
   3.102 +      found_state = true;
   3.103 +      state = buf + state_len;
   3.104 +      // Skip the spaces
   3.105 +      while (isspace(*state)) {
   3.106 +        state++;
   3.107 +      }
   3.108 +      // A state value of 'X' indicates that the thread is dead. 'Z'
   3.109 +      // indicates that the thread is a zombie.
   3.110 +      if (*state == 'X' || *state == 'Z') {
   3.111 +        fclose (fp);
   3.112 +        return true;
   3.113 +      }
   3.114 +      break;
   3.115      }
   3.116    }
   3.117 +  // If the state value is not 'X' or 'Z', the thread exists.
   3.118 +  if (!found_state) {
   3.119 +    // We haven't found the line beginning with 'State:'.
   3.120 +    // Assuming the thread exists.
   3.121 +    print_error("Could not find the 'State:' string in the /proc/%d/status file\n", pid);
   3.122 +  }
   3.123 +  fclose (fp);
   3.124 +  return false;
   3.125  }
   3.126  
   3.127  // attach to a process/thread specified by "pid"
   3.128 -static bool ptrace_attach(pid_t pid, char* err_buf, size_t err_buf_len) {
   3.129 +static attach_state_t ptrace_attach(pid_t pid, char* err_buf, size_t err_buf_len) {
   3.130 +  errno = 0;
   3.131    if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) < 0) {
   3.132 +    if (errno == EPERM || errno == ESRCH) {
   3.133 +      // Check if the process/thread is exiting or is a zombie
   3.134 +      if (process_doesnt_exist(pid)) {
   3.135 +        print_debug("Thread with pid %d does not exist\n", pid);
   3.136 +        return ATTACH_THREAD_DEAD;
   3.137 +      }
   3.138 +    }
   3.139      char buf[200];
   3.140      char* msg = strerror_r(errno, buf, sizeof(buf));
   3.141      snprintf(err_buf, err_buf_len, "ptrace(PTRACE_ATTACH, ..) failed for %d: %s", pid, msg);
   3.142 -    print_debug("%s\n", err_buf);
   3.143 -    return false;
   3.144 +    print_error("%s\n", err_buf);
   3.145 +    return ATTACH_FAIL;
   3.146    } else {
   3.147 -    return ptrace_waitpid(pid);
   3.148 +    attach_state_t wait_ret = ptrace_waitpid(pid);
   3.149 +    if (wait_ret == ATTACH_THREAD_DEAD) {
   3.150 +      print_debug("Thread with pid %d does not exist\n", pid);
   3.151 +    }
   3.152 +    return wait_ret;
   3.153    }
   3.154  }
   3.155  
   3.156 @@ -345,16 +413,20 @@
   3.157  struct ps_prochandle* Pgrab(pid_t pid, char* err_buf, size_t err_buf_len) {
   3.158    struct ps_prochandle* ph = NULL;
   3.159    thread_info* thr = NULL;
   3.160 +  attach_state_t attach_status = ATTACH_SUCCESS;
   3.161  
   3.162    if ( (ph = (struct ps_prochandle*) calloc(1, sizeof(struct ps_prochandle))) == NULL) {
   3.163 -     snprintf(err_buf, err_buf_len, "can't allocate memory for ps_prochandle");
   3.164 -     print_debug("%s\n", err_buf);
   3.165 -     return NULL;
   3.166 +    snprintf(err_buf, err_buf_len, "can't allocate memory for ps_prochandle");
   3.167 +    print_debug("%s\n", err_buf);
   3.168 +    return NULL;
   3.169    }
   3.170  
   3.171 -  if (ptrace_attach(pid, err_buf, err_buf_len) != true) {
   3.172 -     free(ph);
   3.173 -     return NULL;
   3.174 +  if ((attach_status = ptrace_attach(pid, err_buf, err_buf_len)) != ATTACH_SUCCESS) {
   3.175 +    if (attach_status == ATTACH_THREAD_DEAD) {
   3.176 +       print_error("The process with pid %d does not exist.\n", pid);
   3.177 +    }
   3.178 +    free(ph);
   3.179 +    return NULL;
   3.180    }
   3.181  
   3.182    // initialize ps_prochandle
   3.183 @@ -373,14 +445,23 @@
   3.184  
   3.185    // attach to the threads
   3.186    thr = ph->threads;
   3.187 +
   3.188    while (thr) {
   3.189 -     // don't attach to the main thread again
   3.190 -    if (ph->pid != thr->lwp_id && ptrace_attach(thr->lwp_id, err_buf, err_buf_len) != true) {
   3.191 -        // even if one attach fails, we get return NULL
   3.192 -        Prelease(ph);
   3.193 -        return NULL;
   3.194 -     }
   3.195 -     thr = thr->next;
   3.196 +    thread_info* current_thr = thr;
   3.197 +    thr = thr->next;
   3.198 +    // don't attach to the main thread again
   3.199 +    if (ph->pid != current_thr->lwp_id) {
   3.200 +      if ((attach_status = ptrace_attach(current_thr->lwp_id, err_buf, err_buf_len)) != ATTACH_SUCCESS) {
   3.201 +        if (attach_status == ATTACH_THREAD_DEAD) {
   3.202 +          // Remove this thread from the threads list
   3.203 +          delete_thread_info(ph, current_thr);
   3.204 +        }
   3.205 +        else {
   3.206 +          Prelease(ph);
   3.207 +          return NULL;
   3.208 +        } // ATTACH_THREAD_DEAD
   3.209 +      } // !ATTACH_SUCCESS
   3.210 +    }
   3.211    }
   3.212    return ph;
   3.213  }

mercurial