On Fri, 4 Aug 2023 13:04:28 GMT, Yi Yang <yy...@openjdk.org> wrote: >> ### Motivation and proposal >> Hi, heap dump brings about pauses for application's execution(STW), this is >> a well-known pain. JDK-8252842 have added parallel support to heapdump in an >> attempt to alleviate this issue. However, all concurrent threads >> competitively write heap data to the same file, and more memory is required >> to maintain the concurrent buffer queue. In experiments, we did not feel a >> significant performance improvement from that. >> >> The minor-pause solution, which is presented in this PR, is a two-phase >> segmented heap dump: >> >> - Phase 1(STW): Concurrent threads directly write data to multiple heap >> files. >> - Phase 2(Non-STW): Merge multiple heap files into one complete heap dump >> file. This process can happen outside safepoint. >> >> Now concurrent worker threads are not required to maintain a buffer queue, >> which would result in more memory overhead, nor do they need to compete for >> locks. The changes in the overall design are as follows: >> >> data:image/s3,"s3://crabby-images/9b12c/9b12c4be2be0ae0281e54167541f1cdac96e6378" alt="image" >> <p align="center">Fig1. Before</p> >> >> data:image/s3,"s3://crabby-images/976b1/976b1b72e52a2a05601de4e63476ccaabf2ab083" alt="image" >> <p align="center">Fig2. After this patch</p> >> >> ### Performance evaluation >> | memory | numOfThread | CompressionMode | STW | Total | >> | -------| ----------- | --------------- | --- | ---- | >> | 8g | 1 T | N | 15.612 | 15.612 | >> | 8g | 32 T | N | 2.561725 | 14.498 | >> | 8g | 32 T | C1 | 2.3084878 | 14.198 | >> | 8g | 32 T | C2 | 10.9355128 | 21.882 | >> | 8g | 96 T | N | 2.6790452 | 14.012 | >> | 8g | 96 T | C1 | 2.3044796 | 3.589 | >> | 8g | 96 T | C2 | 9.7585151 | 20.219 | >> | 16g | 1 T | N | 26.278 | 26.278 | >> | 16g | 32 T | N | 5.231374 | 26.417 | >> | 16g | 32 T | C1 | 5.6946983 | 6.538 | >> | 16g | 32 T | C2 | 21.8211105 | 41.133 | >> | 16g | 96 T | N | 6.2445556 | 27.141 | >> | 16g | 96 T | C1 | 4.6007096 | 6.259 | >> | 16g | 96 T | C2 | 19.2965783 | 39.007 | >> | 32g | 1 T | N | 48.149 | 48.149 | >> | 32g | 32 T | N | 10.7734677 | 61.643 | >> | 32g | 32 T | C1 | 10.1642097 | 10.903 | >> | 32g | 32 T | C2 | 43.8407607 | 88.152 | >> | 32g | 96 T | N | 13.1522042 | 61.432 | >> | 32g | 96 T | C1 | 9.0954641 | 9.885 | >> | 32g | 96 T | C2 | 38.9900931 | 80.574 | >> | 64g | 1 T | N | 100.583 | 100.583 | >> | 64g | 32 T | N | 20.9233744 | 134.701 | >> | 64g | 32 T | C1 | 18.5023784 | 19.358 | >> | 64g | 32 T | C2 | 86.4748377 | 172.707 | >> | 64g | 96 T | N | 26.7374116 | 126.08 | >> | 64g | ... > > Yi Yang has updated the pull request incrementally with one additional commit > since the last revision: > > new can_parallel_dump
import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.util.List; import jdk.test.lib.Asserts; import jdk.test.lib.JDKToolLauncher; import jdk.test.lib.apps.LingeredApp; import jdk.test.lib.dcmd.PidJcmdExecutor; import jdk.test.lib.process.OutputAnalyzer; import jdk.test.lib.process.ProcessTools; import jdk.test.lib.hprof.HprofParser; /** * @test * @bug 8306441 * @summary Verify the integrity of generated heap dump and capability of parallel dump * @library /test/lib * @run driver HeapDumpParallelTest */ public class HeapDumpParallelTest { private static void checkAndVerify(OutputAnalyzer dcmdOut, LingeredApp app, File heapDumpFile, boolean expectSerial) throws IOException { dcmdOut.shouldHaveExitValue(0); dcmdOut.shouldContain("Heap dump file created"); OutputAnalyzer appOut = new OutputAnalyzer(app.getProcessStdout()); appOut.shouldContain("[heapdump]"); if (!expectSerial && Runtime.getRuntime().availableProcessors() > 1) { appOut.shouldContain("Dump heap objects in parallel"); appOut.shouldContain("Merge heap files complete"); } else { appOut.shouldNotContain("Dump heap objects in parallel"); appOut.shouldNotContain("Merge heap files complete"); } verifyHeapDump(heapDumpFile); if (heapDumpFile.exists()) { heapDumpFile.delete(); } } private static LingeredApp launchApp() throws IOException { LingeredApp theApp = new LingeredApp(); LingeredApp.startApp(theApp, "-Xlog:heapdump", "-Xmx512m", "-XX:-UseDynamicNumberOfGCThreads", "-XX:ParallelGCThreads=2"); return theApp; } public static void main(String[] args) throws Exception { String heapDumpFileName = "parallelHeapDump.bin"; File heapDumpFile = new File(heapDumpFileName); if (heapDumpFile.exists()) { heapDumpFile.delete(); } LingeredApp theApp = launchApp(); try { // Expect error message OutputAnalyzer out = attachJcmdHeapDump(heapDumpFile, theApp.getPid(), "-parallel=" + -1); out.shouldContain("Invalid number of parallel dump threads."); // Expect serial dump because 0 implies to disable parallel dump test(heapDumpFile, "-parallel=" + 0, true); // Expect serial dump test(heapDumpFile, "-parallel=" + 1, true); // Expect parallel dump test(heapDumpFile, "-parallel=" + Integer.MAX_VALUE, false); // Expect parallel dump test(heapDumpFile, "-gz=9 -overwrite -parallel=" + Runtime.getRuntime().availableProcessors(), false); } finally { theApp.stopApp(); } } private static void test(File heapDumpFile, String arg, boolean expectSerial) throws Exception { LingeredApp theApp = launchApp(); try { OutputAnalyzer dcmdOut = attachJcmdHeapDump(heapDumpFile, theApp.getPid(), arg); theApp.stopApp(); checkAndVerify(dcmdOut, theApp, heapDumpFile, expectSerial); } finally { theApp.stopApp(); } } private static OutputAnalyzer attachJcmdHeapDump(File heapDumpFile, long lingeredAppPid, String arg) throws Exception { // e.g. jcmd <pid> GC.heap_dump -parallel=cpucount <file_path> System.out.println("Testing pid " + lingeredAppPid); PidJcmdExecutor executor = new PidJcmdExecutor("" + lingeredAppPid); return executor.execute("GC.heap_dump " + arg + " " + heapDumpFile.getAbsolutePath()); } private static void verifyHeapDump(File dump) { ...as before... ------------- PR Comment: https://git.openjdk.org/jdk/pull/13667#issuecomment-1669094080