perf data includes SystemMemory Configuration dump. This
information helps to understand the physical to logical real
address mapping for the logical partitions in the system.

To help with relating and identifying the start of memory
mapping data in the auxiliary buffer, two PERF_SAMPLE_RAW records
are also present in the ring buffer. First PERF_SAMPLE_RAW record
represents beginning of system memory mapping data in aux buffer.
And second PERF_SAMPLE_RAW record represents the end of the trace
data in aux buffer and also contains the total size of the memory
map data. These sample raw records are used during post processing.

Add support for processing Hardware Trace Macro (HTM) auxiliary trace
data collected via perf AUX buffers. This enables post-processing of
HTM traces including system memory configuration and trace

HTM trace data includes two types of information:
1. Bus traces captured in the AUX buffer
2. System Memory Configuration that maps physical to logical real
   addresses for logical partitions

The implementation handles the challenge of large HTM trace buffers
(up to 8GB) being collected through perf AUX buffers (typically
16MB) by reading data in chunks during post-processing.

Key features:

- Process PERF_RECORD_SAMPLE events with RAW data that mark boundaries
  between trace data and memory configuration data in the AUX buffer

- Write HTM trace data to htm.bin.nXpXcX files where X represents
  node, chip, and core indices extracted from the event configuration

- Write system memory configuration to translation.nXpXcX files for
  address mapping analysis

- Integrate with external htmdecode tool for trace decoding
  when available (config bit 0 set indicates Bus traces)

- Use fork/exec pattern for secure external command execution with
  proper error handling and exit code checking

The memory configuration data is written in 32-byte entries with the
entry count stored at offset 0x10 in big-endian format. The first
PERF_SAMPLE_RAW record marks the start of memory mapping data, while
the second marks the end and contains the total buffer count.

Error handling includes:
- NULL checks for all file operations
- Verification of write operations
- Graceful degradation if htmdecode is not installed
- Proper resource cleanup (file handles, memory mappings)

Example usage:
  # perf record -C 1 -e htm/nodalchipindex=2,nodeindex=0,htm_type=1/ <workload> 
# Collect trace data
  # perf script -D    # Shows HTM trace data
  # ls htm.bin.*      # Binary trace files
  # ls translation.*  # Memory configuration files

Signed-off-by: Athira Rajeev <[email protected]>
---
 tools/perf/util/powerpc-htm.c | 225 +++++++++++++++++++++++++++++++++-
 1 file changed, 224 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/powerpc-htm.c b/tools/perf/util/powerpc-htm.c
index ffddf0e59fc1..487989ca4fc7 100644
--- a/tools/perf/util/powerpc-htm.c
+++ b/tools/perf/util/powerpc-htm.c
@@ -14,6 +14,12 @@
 #include <inttypes.h>
 #include "powerpc-htm.h"
 #include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "sample.h"
+#include <sys/types.h>
+#include <sys/wait.h>
 
 struct perf_session;
 
@@ -26,8 +32,140 @@ struct powerpc_htm {
        struct machine                  *machine;
        u32                             pmu_type;
        char                            htmbin_file[64];
+       char                            trans_file[64];
+       int                             htm_mem_entries;
+       int                             mem_maps;
 };
 
+struct htm_mem {
+       uint64_t phy_real;
+       uint64_t logical_real;
+       uint32_t lp_index;
+       uint8_t mem_tier;
+       uint8_t mem_type;
+       uint16_t res;
+       uint64_t size;
+};
+
+static int run_htmdecode(const char *input_file, const char *output_file)
+{
+       pid_t pid;
+       int status;
+
+       pid = fork();
+       if (pid == -1) {
+               pr_err("fork() failed: %s\n", strerror(errno));
+               return -errno;
+       }
+
+       if (pid == 0) {
+               /* Child process */
+               int fd = open(output_file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+
+               if (fd == -1) {
+                       pr_err("Failed to open output file: %s\n", 
strerror(errno));
+                       exit(1);
+               }
+
+               /* Redirect stdout to output file */
+               dup2(fd, STDOUT_FILENO);
+               close(fd);
+
+               /* Execute htmdecode - execlp searches PATH automatically */
+               execlp("htmdecode", "htmdecode", "-o", "-j", "-w", "1",
+                       "-f", input_file, NULL);
+
+               /* If execlp returns, it failed */
+               pr_err("Failed to execute htmdecode: %s\n", strerror(errno));
+               if (errno == ENOENT)
+                       pr_err("htmdecode not found in PATH\n");
+
+               exit(127);  /* Standard "command not found" exit code */
+       }
+
+       /* Parent process - wait for child */
+       if (waitpid(pid, &status, 0) == -1) {
+               pr_err("waitpid() failed: %s\n", strerror(errno));
+               return -errno;
+       }
+
+       /* Check exit status */
+       if (WIFEXITED(status)) {
+               int exit_code = WEXITSTATUS(status);
+
+               if (exit_code == 127) {
+                       pr_err("htmdecode not found in PATH\n");
+                       return -ENOENT;
+               } else if (exit_code != 0) {
+                       pr_err("htmdecode failed with exit code %d\n", 
exit_code);
+                       return -EINVAL;
+               }
+       } else if (WIFSIGNALED(status)) {
+               pr_err("htmdecode killed by signal %d\n", WTERMSIG(status));
+               return -EINTR;
+       }
+
+       return 0;
+}
+
+static int create_mem_maps(struct powerpc_htm *htm)
+{
+       off_t file_size;
+       void *htmdata, *mapped_data;
+       int fd;
+       struct stat file_info;
+       struct htm_mem *mem;
+       char tracefile[128];
+       int ret;
+
+       snprintf(tracefile, sizeof(tracefile), "%s.out", htm->htmbin_file);
+
+       ret = run_htmdecode(htm->htmbin_file, tracefile);
+       if (ret) {
+               if (ret == -ENOENT)
+                       pr_info("htmdecode not found. Install htmdecode to 
decode traces.\n");
+               else
+                       pr_info("htmdecode failed with error %d\n", ret);
+               return ret;
+       }
+
+       fd = open(htm->trans_file, O_RDONLY);
+       if (fd == -1) {
+               pr_err("Failed to open %s: %s\n", htm->trans_file, 
strerror(errno));
+               return -1;
+       }
+
+       if (fstat(fd, &file_info) == -1) {
+               close(fd);
+               pr_err("fstat failed on %s: %s\n", htm->trans_file, 
strerror(errno));
+               return -1;
+       }
+
+       file_size = file_info.st_size;
+
+       mapped_data = mmap(NULL, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
+       if (mapped_data == MAP_FAILED) {
+               close(fd);
+               pr_err("mmap failed on %s: %s\n", htm->trans_file, 
strerror(errno));
+               return -1;
+       }
+
+       htmdata = mapped_data + 0x20;
+       mem = (struct htm_mem *)htmdata;
+
+       if (!mem || !htm->htm_mem_entries) {
+               pr_info("No memory mapping entries captured in HTM 
translation\n");
+               munmap(mapped_data, file_size);
+               close(fd);
+               return -1;
+       }
+
+       munmap(mapped_data, file_size);
+       close(fd);
+
+       return 0;
+}
+
 /*
  * Check if HTM events have more data to collect.
  *
@@ -95,9 +233,55 @@ static void powerpc_htm_dump_event(size_t len)
 {
        const char *color = PERF_COLOR_BLUE;
 
-       color_fprintf(stdout, color,
+       if (dump_trace) {
+               color_fprintf(stdout, color,
                        ". ... HTM PMU data: size %zu bytes\n",
                        len);
+       }
+}
+
+static int write_htm(void *data, size_t size, struct powerpc_htm *htm)
+{
+       FILE *fp;
+       u64 *num_entries;
+       size_t entries;
+       size_t written;
+       int ret = -1;
+
+       if (htm->mem_maps) {
+               fp = fopen(htm->trans_file, "ab");
+               if (!fp) {
+                       pr_err("Failed to open %s: %s\n", htm->trans_file, 
strerror(errno));
+                       return ret;
+               }
+               num_entries = data + 0x10;
+               entries = be64_to_cpu(*num_entries);
+               entries++;
+               written = fwrite(data, 32, entries, fp);
+               if (written != entries) {
+                       pr_err("Failed to write data: expected %zu, wrote 
%zu\n", entries, written);
+                       fclose(fp);
+                       return ret;
+               }
+               fclose(fp);
+               htm->htm_mem_entries += entries;
+               return 0;
+       }
+
+       fp = fopen(htm->htmbin_file, "a");
+       if (!fp) {
+               pr_err("Failed to open %s: %s\n", htm->htmbin_file, 
strerror(errno));
+               return ret;
+       }
+       written = fwrite(data, size, 1, fp);
+       if (!written) {
+               pr_err("Failed to htm trace data\n");
+               fclose(fp);
+               return ret;
+       }
+       fclose(fp);
+
+       return 0;
 }
 
 static int powerpc_htm_process_event(struct perf_session *session 
__maybe_unused,
@@ -105,6 +289,37 @@ static int powerpc_htm_process_event(struct perf_session 
*session __maybe_unused
                                 struct perf_sample *sample __maybe_unused,
                                 const struct perf_tool *tool __maybe_unused)
 {
+       struct powerpc_htm *htm = container_of(session->auxtrace, struct 
powerpc_htm,
+                       auxtrace);
+
+       if ((event->header.type == PERF_RECORD_SAMPLE) && sample->raw_data) {
+               int *content = (int *)sample->raw_data;
+               struct evsel *evsel = evlist__event2evsel(session->evlist, 
event);
+               int config = (evsel->core.attr.config) & 0xF;
+               struct auxtrace_buffer *buffer = NULL;
+               struct auxtrace_queues *queues = &htm->queues;
+               unsigned int i = 0;
+               int j = 0;
+
+               if (strstr(evsel->name, "htm") == NULL)
+                       return 0;
+
+               for (i = 0; i < queues->nr_queues; i++) {
+                       buffer = auxtrace_buffer__next(&queues->queue_array[i], 
buffer);
+                       for (; buffer;) {
+                               if (j >= *content)
+                                       htm->mem_maps = 1;
+                               if (write_htm(buffer->data, buffer->size, htm))
+                                       return -1;
+                               j++;
+                               buffer = 
auxtrace_buffer__next(&queues->queue_array[i], buffer);
+                       }
+               }
+               /* Only for power bus traces, we decode traces */
+               if (config == 1)
+                       create_mem_maps(htm);
+       }
+
        return 0;
 }
 
@@ -205,6 +420,14 @@ int powerpc_htm_process_auxtrace_info(union perf_event 
*event,
        }
        fclose(fp);
 
+       snprintf(htm->trans_file, sizeof(htm->trans_file), 
"translation.n%d.p%d.c%d", nodeindex, nodalchipindex, coreindexonchip);
+       fp = fopen(htm->trans_file, "w");
+       if (!fp) {
+               pr_err("Failed to create %s: %s\n", htm->trans_file, 
strerror(errno));
+               return -errno;
+       }
+       fclose(fp);
+
        powerpc_htm_print_info(&auxtrace_info->priv[0]);
 
        err = auxtrace_queues__process_index(&htm->queues, session);
-- 
2.52.0


Reply via email to