This is an automated email from the ASF dual-hosted git repository. tballison pushed a commit to branch TIKA-4740-plugin-roots-cwd-fix in repository https://gitbox.apache.org/repos/asf/tika.git
commit 27264c6e212d6d23299f7996b6af562d691afcf5 Author: tallison <[email protected]> AuthorDate: Tue May 26 21:35:55 2026 -0400 TIKA-4740 -- tika-server-core fix --- .../tika/pipes/core/PerClientServerManager.java | 30 +++++++++++++++++----- .../tika/pipes/core/SharedServerManager.java | 30 +++++++++++++++++----- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PerClientServerManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PerClientServerManager.java index 3b53cecc41..12e3e19c50 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PerClientServerManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PerClientServerManager.java @@ -270,13 +270,15 @@ public class PerClientServerManager implements ServerManager { tmpDir = Files.createTempDirectory("pipes-server-" + clientId + "-"); ProcessBuilder pb = new ProcessBuilder(getCommandline()); - // Run the child in tmpDir so any hs_err_pid<N>.log JVM crash log lands - // where surfaceCrashDiagnostics() looks for it. Redirect stdio to per- - // server files instead of inheriting the parent JVM's handles -- on - // Windows inheritIO() duplicates surefire's stderr handle into the - // child, blocking the controller's pipe reader past parent exit and - // hanging CI. - pb.directory(tmpDir.toFile()); + // Redirect stdio to per-server files instead of inheriting the parent + // JVM's handles -- on Windows inheritIO() duplicates surefire's stderr + // handle into the child, blocking the controller's pipe reader past + // parent exit and hanging CI. We deliberately do NOT call + // pb.directory(): the child must inherit the parent JVM's CWD so + // relative paths in tika configs (e.g. "plugin-roots":"target/plugins") + // resolve the same way they did when this manager was loaded. + // hs_err crash logs are pointed at tmpDir via -XX:ErrorFile in + // getCommandline() instead. pb.redirectOutput(ServerProcessIO.stdoutLog(tmpDir)); pb.redirectError(ServerProcessIO.stderrLog(tmpDir)); @@ -383,6 +385,7 @@ public class PerClientServerManager implements ServerManager { boolean hasExitOnOOM = false; boolean hasLog4j = false; boolean hasActiveProcessorCount = false; + boolean hasErrorFile = false; String origGCString = null; String newGCLogString = null; @@ -402,12 +405,25 @@ public class PerClientServerManager implements ServerManager { if (arg.startsWith("-XX:ActiveProcessorCount=")) { hasActiveProcessorCount = true; } + if (arg.startsWith("-XX:ErrorFile=")) { + hasErrorFile = true; + } if (arg.startsWith("-Xloggc:")) { origGCString = arg; newGCLogString = arg.replace("${pipesClientId}", "id-" + clientId); } } + // Direct native-crash dumps (hs_err_pid<N>.log) into tmpDir so + // ServerProcessIO.surfaceCrashDiagnostics() can find and emit them on + // abnormal exit. The child JVM inherits the parent's CWD (we do NOT + // call pb.directory()), so without this the JVM would write hs_err + // wherever the parent was launched -- typically lost. + if (!hasErrorFile) { + configArgs.add("-XX:ErrorFile=" + tmpDir.resolve("hs_err_pid%p.log") + .toAbsolutePath()); + } + // If the user hasn't explicitly set -XX:ActiveProcessorCount, size each // forked JVM's view of CPUs to a fair slice of the host. Otherwise each // JVM defaults its GC, JIT, and common ForkJoinPool to "all cores", which diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/SharedServerManager.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/SharedServerManager.java index 3778b082cf..32b9cf8388 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/SharedServerManager.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/SharedServerManager.java @@ -283,13 +283,15 @@ public class SharedServerManager implements ServerManager { // eliminating the TOCTOU race between probing a free port and binding it. pb.environment().put("TIKA_PIPES_PORT", "0"); pb.environment().put("TIKA_PIPES_AUTH_TOKEN", HexFormat.of().formatHex(token)); - // Run the child in tmpDir so any hs_err_pid<N>.log JVM crash log - // lands where surfaceCrashDiagnostics() looks for it. Keep stdout on - // a parent-owned pipe so we can read the READY:port signal. Redirect - // stderr to a file rather than INHERIT -- on Windows, inheriting - // stderr duplicates surefire's stderr handle into the child, blocking - // the controller's pipe reader past parent exit and hanging CI. - pb.directory(tmpDir.toFile()); + // Keep stdout on a parent-owned pipe so we can read the READY:port + // signal. Redirect stderr to a file rather than INHERIT -- on + // Windows, inheriting stderr duplicates surefire's stderr handle + // into the child, blocking the controller's pipe reader past parent + // exit and hanging CI. We deliberately do NOT call pb.directory(): + // the child must inherit the parent JVM's CWD so relative paths in + // tika configs (e.g. "plugin-roots":"target/plugins") still resolve. + // hs_err crash logs are pointed at tmpDir via -XX:ErrorFile in + // getCommandline() instead. pb.redirectErrorStream(false); pb.redirectError(ServerProcessIO.stderrLog(tmpDir)); @@ -417,6 +419,7 @@ public class SharedServerManager implements ServerManager { boolean hasHeadless = false; boolean hasExitOnOOM = false; boolean hasLog4j = false; + boolean hasErrorFile = false; for (String arg : configArgs) { if (arg.startsWith("-Djava.awt.headless")) { @@ -431,6 +434,19 @@ public class SharedServerManager implements ServerManager { if (arg.startsWith("-Dlog4j.configuration") || arg.startsWith("-Dlog4j2.configuration")) { hasLog4j = true; } + if (arg.startsWith("-XX:ErrorFile=")) { + hasErrorFile = true; + } + } + + // Direct native-crash dumps (hs_err_pid<N>.log) into tmpDir so + // ServerProcessIO.surfaceCrashDiagnostics() can find and emit them on + // abnormal exit. The child JVM inherits the parent's CWD (we do NOT + // call pb.directory()), so without this the JVM would write hs_err + // wherever the parent was launched. + if (!hasErrorFile) { + configArgs.add("-XX:ErrorFile=" + tmpDir.resolve("hs_err_pid%p.log") + .toAbsolutePath()); } List<String> commandLine = new ArrayList<>();
