# HG changeset patch
# User Timothy J Fontaine <tj.fontaine@oracle.com>
# Date 1493366149 25200
#      Fri Apr 28 00:55:49 2017 -0700
# Node ID aa16ec46dccd412d3e886dfeabfe58198cc20769
# Parent  5534221c23fcf5bc92ea48bc26c656f77a25da43
attach in linux should be relative to /proc/pid/root and namespace aware

Diagnostic commands (i.e. jcmd, jstack, etc) fail to attach to a target JVM
that is inside a container (e.g. Docker).

A Linux container often isolates a process in a PID and Mount namespace that is
separate from the "root container" (analogous to the hypervisor/dom0 in
hardware virtualization environments, or the global zone on Solaris). A target
JVM that is isolated in either a PID namespace, or a Mount namespace will fail
the attach sequence.

When the target JVM is in its own PID namespace the pid of the process is
distinct from what the real pid of the process as it relates to the root
container. For example, in the root container you can observe a JVM with a pid
of 17734, however if that JVM is running inside a Docker container the pid
inside its PID namespace is likely 1. So when the target JVM receives the
SIGQUIT it looks in /proc/self/cwd/ for .attach_pid1 however the external
attaching JVM has created the file /proc/17734/cwd/.attach_pid17734. Given this
discrepancy the target JVM will output to stderr thread status, since
/proc/self/cwd/.attach_pid1 doesn't exist and won't continue with the attach
sequence.

The solution is to parse /proc/pid/status for the field NSpid (available since
Linux 4.1) which contains a list of pids, where the last entry is the "inner
most" PID namespace value. (Namespaces can be stacked, unlike Solaris Zones
which have a virtualization depth of 1)

The rest of the Linux attach sequence assumes a shared mount namespace by
waiting for /tmp/.java_pid17734 to appear. But if the attaching process is in a
separate namespace because the target JVM is in a mount namepsace (or in a
chroot as well) the unix domain socket for attaching won't appear.

Instead the attach sequence should resolve file names relative to
/proc/17734/root which has a materialized view of the rootfs for the target.

diff -r 5534221c23fc -r aa16ec46dccd src/solaris/classes/sun/tools/attach/LinuxVirtualMachine.java
--- a/src/solaris/classes/sun/tools/attach/LinuxVirtualMachine.java	Tue Apr 18 15:45:27 2017 -0700
+++ b/src/solaris/classes/sun/tools/attach/LinuxVirtualMachine.java	Fri Apr 28 00:55:49 2017 -0700
@@ -32,6 +32,10 @@
 import java.io.InputStream;
 import java.io.IOException;
 import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.Files;
 
 /*
  * Linux implementation of HotSpotVirtualMachine
@@ -66,12 +70,20 @@
             throw new AttachNotSupportedException("Invalid process identifier");
         }
 
+        // Try and resolve to the "inner most" pid namespace
+        int ns_pid;
+        try {
+            ns_pid = getNamespacePid(pid);
+        } catch (NumberFormatException x) {
+            throw new AttachNotSupportedException("Unable to parse namespace");
+        }
+
         // Find the socket file. If not found then we attempt to start the
         // attach mechanism in the target VM by sending it a QUIT signal.
         // Then we attempt to find the socket file again.
-        path = findSocketFile(pid);
+        path = findSocketFile(pid, ns_pid);
         if (path == null) {
-            File f = createAttachFile(pid);
+            File f = createAttachFile(pid, ns_pid);
             try {
                 // On LinuxThreads each thread is a process and we don't have the
                 // pid of the VMThread which has SIGQUIT unblocked. To workaround
@@ -99,7 +111,7 @@
                     try {
                         Thread.sleep(delay);
                     } catch (InterruptedException x) { }
-                    path = findSocketFile(pid);
+                    path = findSocketFile(pid, ns_pid);
                     i++;
                 } while (i <= retries && path == null);
                 if (path == null) {
@@ -271,8 +283,12 @@
     }
 
     // Return the socket file for the given process.
-    private String findSocketFile(int pid) {
-        File f = new File(tmpdir, ".java_pid" + pid);
+    private String findSocketFile(int pid, int ns_pid) {
+        // A process may not exist in the same mount namespace as the caller.
+        // Instead, attach relative to the target root filesystem as exposed by
+        // procfs regardless of namespaces.
+        String root = "/proc/" + pid + "/root/" + tmpdir;
+        File f = new File(root, ".java_pid" + ns_pid);
         if (!f.exists()) {
             return null;
         }
@@ -283,8 +299,8 @@
     // if not already started. The client creates a .attach_pid<pid> file in the
     // target VM's working directory (or temp directory), and the SIGQUIT handler
     // checks for the file.
-    private File createAttachFile(int pid) throws IOException {
-        String fn = ".attach_pid" + pid;
+    private File createAttachFile(int pid, int ns_pid) throws IOException {
+        String fn = ".attach_pid" + ns_pid;
         String path = "/proc/" + pid + "/cwd/" + fn;
         File f = new File(path);
         try {
@@ -316,6 +332,32 @@
     }
 
 
+    // Return the inner most namespaced PID if there is one,
+    // otherwise return the original PID
+    private int getNamespacePid(int pid) throws IOException, NumberFormatException {
+        // Assuming a real procfs sits beneath, reading this doesn't block
+        // nor will it consume a lot of memory
+        String statusFile = "/proc/" + pid + "/status";
+        Path statusPath = Paths.get(statusFile);
+
+        for (String line : Files.readAllLines(statusPath, StandardCharsets.UTF_8)) {
+            String[] parts = line.split(":");
+            if (parts.length == 2 && parts[0].trim().equals("NSpid")) {
+                parts = parts[1].trim().split("\\s+");
+                // The last entry represents the PID the JVM "thinks" it is.
+                // Even in non-namespaced pids these entries should be
+                // valid. You could refer to it as the inner most pid.
+                int ns_pid = Integer.parseInt(parts[parts.length - 1]);
+                return ns_pid;
+            }
+        }
+
+        // TODO XXX friggin old kernels may not have NSpid field (i.e. 3.10)
+        // fallback to original pid in the event we cannot deduce
+        return pid;
+    }
+
+
     //-- native methods
 
     static native boolean isLinuxThreads();
