PERF_RECORD_COMPRESSED records are decompressed from trace file into
a linked list of mmaped memory regions using streaming Zstandard API.
After that the regions are loaded fetching uncompressed events. When
dumping raw trace (e.g., perf report -D --header) file offsets of
events from compressed records are set to zero.

Signed-off-by: Alexey Budankov <alexey.budan...@linux.intel.com>
---
Changes in v2:
- moved compression/decompression code to session layer
---
 tools/perf/builtin-report.c |   5 +-
 tools/perf/util/session.c   | 165 +++++++++++++++++++++++++++++++++++-
 tools/perf/util/session.h   |  11 +++
 tools/perf/util/tool.h      |   2 +
 4 files changed, 181 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c9ceaf88759c..c8b5686d1f6a 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1197,6 +1197,9 @@ int cmd_report(int argc, const char **argv)
        if (session == NULL)
                return -1;
 
+       if (perf_session__zstd_init(session, 0) < 0)
+               pr_warning("Decompression initialization failed. Reported data 
may be incomplete.\n");
+
        if (report.queue_size) {
                ordered_events__set_alloc_size(&session->ordered_events,
                                               report.queue_size);
@@ -1409,7 +1412,7 @@ int cmd_report(int argc, const char **argv)
 
 error:
        zfree(&report.ptime_range);
-
+       perf_session__zstd_fini(session);
        perf_session__delete(session);
        return ret;
 }
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b2bace785d9a..e35a5cc4d9a5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -32,6 +32,21 @@ int perf_session__zstd_init(struct perf_session *session, 
int level)
 {
        size_t ret;
 
+       session->zstd_dstream = ZSTD_createDStream();
+       if (session->zstd_dstream == NULL) {
+               pr_err("Couldn't create decompression stream.\n");
+               return -1;
+       }
+
+       ret = ZSTD_initDStream(session->zstd_dstream);
+       if (ZSTD_isError(ret)) {
+               pr_err("Failed to initialize decompression stream: %s\n", 
ZSTD_getErrorName(ret));
+               return -1;
+       }
+
+       if (level == 0)
+               return 0;
+
        session->header.env.comp_type  = PERF_COMP_NONE;
        session->header.env.comp_level = 0;
 
@@ -55,6 +70,22 @@ int perf_session__zstd_init(struct perf_session *session, 
int level)
 
 int perf_session__zstd_fini(struct perf_session *session)
 {
+       struct decomp *next = session->decomp, *decomp;
+       size_t decomp_len = session->header.env.comp_mmap_len;
+
+       if (session->zstd_dstream) {
+               ZSTD_freeDStream(session->zstd_dstream);
+               session->zstd_dstream = NULL;
+       }
+
+       do {
+               decomp = next;
+               if (decomp == NULL)
+                       break;
+               next = decomp->next;
+               munmap(decomp, decomp_len + sizeof(struct decomp));
+       } while (1);
+
        if (session->zstd_cstream) {
                ZSTD_freeCStream(session->zstd_cstream);
                session->zstd_cstream = NULL;
@@ -106,6 +137,80 @@ size_t perf_session__zstd_compress(void *to,  void *dst, 
size_t dst_size,
 
        return compressed;
 }
+
+static size_t perf_session__zstd_decompress(struct perf_session *session,
+                                       void *src, size_t src_size,
+                                       void *dst, size_t dst_size)
+{
+       size_t ret;
+       ZSTD_inBuffer input = { src, src_size, 0 };
+       ZSTD_outBuffer output = { dst, dst_size, 0 };
+
+       while (input.pos < input.size) {
+               ret = ZSTD_decompressStream(session->zstd_dstream, &output, 
&input);
+               if (ZSTD_isError(ret)) {
+                       pr_err("failed to decompress (B): %ld -> %ld : %s\n",
+                               src_size, output.size, ZSTD_getErrorName(ret));
+                       break;
+               }
+               output.dst  = dst + output.pos;
+               output.size = dst_size - output.pos;
+       }
+
+       return output.pos;
+}
+
+static int perf_session__process_compressed_event(struct perf_session *session,
+                                       union perf_event *event, u64 
file_offset)
+{
+       void *src;
+       size_t decomp_size, src_size;
+       u64 decomp_last_rem = 0;
+       size_t decomp_len = session->header.env.comp_mmap_len;
+       struct decomp *decomp, *decomp_last = session->decomp_last;
+
+       decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, 
PROT_READ|PROT_WRITE,
+                     MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+       if (decomp == MAP_FAILED) {
+               pr_err("Couldn't allocate memory for decompression\n");
+               return -1;
+       }
+
+       decomp->file_pos = file_offset;
+       decomp->head = 0;
+
+       if (decomp_last) {
+               decomp_last_rem = decomp_last->size - decomp_last->head;
+               memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), 
decomp_last_rem);
+               decomp->size = decomp_last_rem;
+       }
+
+       src = (void *)event + sizeof(struct compressed_event);
+       src_size = event->pack.header.size - sizeof(struct compressed_event);
+
+       decomp_size = perf_session__zstd_decompress(session, src, src_size,
+                               &(decomp->data[decomp_last_rem]), decomp_len - 
decomp_last_rem);
+       if (!decomp_size) {
+               munmap(decomp, sizeof(struct decomp) + decomp_len);
+               pr_err("Couldn't decompress data\n");
+               return -1;
+       }
+
+       decomp->size += decomp_size;
+
+       if (session->decomp == NULL) {
+               session->decomp = decomp;
+               session->decomp_last = decomp;
+       } else {
+               session->decomp_last->next = decomp;
+               session->decomp_last = decomp;
+       }
+
+       pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
+
+       return 0;
+}
+
 #else /* !HAVE_ZSTD_SUPPORT */
 int perf_session__zstd_init(struct perf_session *session __maybe_unused, int 
level __maybe_unused)
 {
@@ -123,6 +228,14 @@ size_t perf_session__zstd_compress(void *to __maybe_unused,
 {
        return 0;
 }
+
+static int perf_session__process_compressed_event(struct perf_session *session 
__maybe_unused,
+                               union perf_event *event __maybe_unused,
+                               u64 file_offset __maybe_unused)
+{
+       dump_printf(": unhandled!\n");
+       return 0;
+}
 #endif
 
 size_t perf_session__zstd_copy(void *to __maybe_unused,
@@ -531,6 +644,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
                tool->time_conv = process_event_op2_stub;
        if (tool->feature == NULL)
                tool->feature = process_event_op2_stub;
+       if (tool->compressed == NULL)
+               tool->compressed = perf_session__process_compressed_event;
 }
 
 static void swap_sample_id_all(union perf_event *event, void *data)
@@ -1464,7 +1579,8 @@ static s64 perf_session__process_user_event(struct 
perf_session *session,
        int fd = perf_data__fd(session->data);
        int err;
 
-       dump_event(session->evlist, event, file_offset, &sample);
+       if (event->header.type != PERF_RECORD_COMPRESSED)
+               dump_event(session->evlist, event, file_offset, &sample);
 
        /* These events are processed right away */
        switch (event->header.type) {
@@ -1517,6 +1633,11 @@ static s64 perf_session__process_user_event(struct 
perf_session *session,
                return tool->time_conv(session, event);
        case PERF_RECORD_HEADER_FEATURE:
                return tool->feature(session, event);
+       case PERF_RECORD_COMPRESSED:
+               err = tool->compressed(session, event, file_offset);
+               if (err)
+                       dump_event(session->evlist, event, file_offset, 
&sample);
+               return 0;
        default:
                return -EINVAL;
        }
@@ -1799,6 +1920,8 @@ static int perf_session__flush_thread_stacks(struct 
perf_session *session)
 
 volatile int session_done;
 
+static int __perf_session__process_decomp_events(struct perf_session *session);
+
 static int __perf_session__process_pipe_events(struct perf_session *session)
 {
        struct ordered_events *oe = &session->ordered_events;
@@ -1879,6 +2002,10 @@ static int __perf_session__process_pipe_events(struct 
perf_session *session)
        if (skip > 0)
                head += skip;
 
+       err = __perf_session__process_decomp_events(session);
+       if (err)
+               goto out_err;
+
        if (!session_done())
                goto more;
 done:
@@ -1927,6 +2054,38 @@ fetch_mmaped_event(struct perf_session *session,
        return event;
 }
 
+static int __perf_session__process_decomp_events(struct perf_session *session)
+{
+       s64 skip;
+       u64 size, file_pos = 0;
+       union perf_event *event;
+       struct decomp *decomp = session->decomp_last;
+
+       if (!decomp)
+               return 0;
+
+       while (decomp->head < decomp->size && !session_done()) {
+               event = fetch_mmaped_event(session, decomp->head, decomp->size, 
decomp->data);
+               if (!event)
+                       break;
+
+               size = event->header.size;
+               if (size < sizeof(struct perf_event_header) ||
+                   (skip = perf_session__process_event(session, event, 
file_pos)) < 0) {
+                       pr_err("%#" PRIx64 " [%#x]: failed to process type: 
%d\n",
+                               decomp->file_pos + decomp->head, 
event->header.size, event->header.type);
+                       return -EINVAL;
+               }
+
+               if (skip)
+                       size += skip;
+
+               decomp->head += size;
+       }
+
+       return 0;
+}
+
 /*
  * On 64bit we can mmap the data file in one go. No need for tiny mmap
  * slices. On 32bit we use 32MB.
@@ -2027,6 +2186,10 @@ reader__process_events(struct reader *rd, struct 
perf_session *session,
        head += size;
        file_pos += size;
 
+       err = __perf_session__process_decomp_events(session);
+       if (err)
+               goto out;
+
        ui_progress__update(prog, size);
 
        if (session_done())
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index d8f3284cd838..06a0536adbe0 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -42,7 +42,18 @@ struct perf_session {
        u64                     bytes_compressed;
 #ifdef HAVE_ZSTD_SUPPORT
        ZSTD_CStream            *zstd_cstream;
+       ZSTD_DStream            *zstd_dstream;
 #endif
+       struct decomp           *decomp;
+       struct decomp           *decomp_last;
+};
+
+struct decomp {
+       struct decomp *next;
+       u64 file_pos;
+       u64 head;
+       size_t size;
+       char data[];
 };
 
 struct perf_tool;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 250391672f9f..9096a6e3de59 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -28,6 +28,7 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
 
 typedef int (*event_op2)(struct perf_session *session, union perf_event 
*event);
 typedef s64 (*event_op3)(struct perf_session *session, union perf_event 
*event);
+typedef int (*event_op4)(struct perf_session *session, union perf_event 
*event, u64 data);
 
 typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
                        struct ordered_events *oe);
@@ -72,6 +73,7 @@ struct perf_tool {
                        stat,
                        stat_round,
                        feature;
+       event_op4       compressed;
        event_op3       auxtrace;
        bool            ordered_events;
        bool            ordering_requires_timestamps;

Reply via email to