On Mon, Mar 01, 2021 at 05:43:43PM +0200, James Clark wrote:

[...]

> > I'd like to propose to add a new field "cs_etm_queue::buf_rec_len", it
> > stands for the record length based on the RECORD_AUX event.  In
> > theory, this value should be always less than "cs_etm_queue::buf_len".
> > 
> > When every time the "PERF_RECORD_AUX" event is coming, we find out the
> > corresponding queue (so this can be applied for "1:1" or "N:1" models
> > for source and sink), and accumulate "perf_record_aux::aux_size" into
> > "cs_etm_queue::buf_rec_len".
> > 
> > At the decoder side, it decreases "etmq->buf_rec_len" until to zero for
> > the current round of decoding (see cs_etm__decode_data_block()).  Since
> > all the "PERF_RECORD_AUX" event will be processed before
> > "PERF_RECORD_EXIT" event, so we don't worry the tail trace data will be
> > ignored.
> > 
> > The main reason for this suggestion is it don't need to change the
> > significant logic in current code.  I will try to do experiment for this
> > idea and share back.
> > 
> > James, if you think I miss anything, please correct me as needed.
> > Thanks!
> > 
> 
> This is an interesting idea, I think we could push decoded packets into the
> min heap as the aux records are received, and not do anything with them until
> the end of the data is reached. That way instead of saving aux records, we'd
> save the result of the decode for each aux record.
> 
> Currently each cs_etm_queue has a cs_etm_traceid_queue/cs_etm_packet_queue 
> for each
> stream, but that would have to be changed to have multiple ones because 
> multiple
> packets could be decoded to get through the whole aux record.
> 
> It would be a similarly sized change, and could also have a bigger impact on
> memory. So I'm not sure if it would help to reduce the changes, but it is 
> possible.

Below change is still very coarse and I just did very basic testing for
it, so didn't cover all cases; so simply use it to demonstrate the basic
idea.

Before the event PERF_RECORD_AUX arrives, we don't decode any trace
data.  And after PERF_RECORD_AUX coming, the aux buffer size will be
accumulated into the queue, and decode the trace data for the queue
based on the accumulated buffer length.

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index b9c1d329a7f1..3bd5609b6de4 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -89,7 +89,7 @@ struct cs_etm_queue {
        u8 pending_timestamp;
        u64 offset;
        const unsigned char *buf;
-       size_t buf_len, buf_used;
+       size_t aux_buf_len, buf_len, buf_used;
        /* Conversion between traceID and index in traceid_queues array */
        struct intlist *traceid_queues_list;
        struct cs_etm_traceid_queue **traceid_queues;
@@ -1085,6 +1085,7 @@ cs_etm__get_trace(struct cs_etm_queue *etmq)
                if (old_buffer)
                        auxtrace_buffer__drop_data(old_buffer);
                etmq->buf_len = 0;
+               etmq->aux_buf_len = 0;
                return 0;
        }
 
@@ -2052,6 +2053,7 @@ static int cs_etm__decode_data_block(struct cs_etm_queue 
*etmq)
        etmq->offset += processed;
        etmq->buf_used += processed;
        etmq->buf_len -= processed;
+       etmq->aux_buf_len -= processed;
 
 out:
        return ret;
@@ -2177,7 +2179,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
                         */
                        err = cs_etm__process_traceid_queue(etmq, tidq);
 
-               } while (etmq->buf_len);
+               } while (etmq->aux_buf_len > 0);
 
                if (err == 0)
                        /* Flush any remaining branch stack entries */
@@ -2216,6 +2218,27 @@ static int cs_etm__process_timeless_queues(struct 
cs_etm_auxtrace *etm,
        return 0;
 }
 
+static void cs_etm__update_aux_buf_len(struct cs_etm_auxtrace *etm,
+                                     struct perf_record_aux *aux)
+{
+       unsigned int cs_queue_nr, queue_nr;
+       struct auxtrace_queue *queue;
+       struct cs_etm_queue *etmq;
+
+       if (!etm->heap.heap_cnt)
+               return;
+
+       /* Take the entry at the top of the min heap */
+       cs_queue_nr = etm->heap.heap_array[0].queue_nr;
+       queue_nr = TO_QUEUE_NR(cs_queue_nr);
+       queue = &etm->queues.queue_array[queue_nr];
+       etmq = queue->priv;
+
+       etmq->aux_buf_len += aux->aux_size;
+       fprintf(stderr, "%s: aux_buf_len=%ld\n", __func__, etmq->aux_buf_len);
+       return;
+}
+
 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 {
        int ret = 0;
@@ -2272,6 +2295,9 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace 
*etm)
                if (ret < 0)
                        goto out;
 
+               if (etmq->aux_buf_len <= 0)
+                       goto out;
+
                /*
                 * No more auxtrace_buffers to process in this etmq, simply
                 * move on to another entry in the auxtrace_heap.
@@ -2414,9 +2440,15 @@ static int cs_etm__process_event(struct perf_session 
*session,
        else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
                return cs_etm__process_switch_cpu_wide(etm, event);
 
+       fprintf(stderr, "%s: event->header.type=%d\n", __func__, 
event->header.type);
+
        if (!etm->timeless_decoding &&
-           event->header.type == PERF_RECORD_AUX)
+           event->header.type == PERF_RECORD_AUX) {
+
+               fprintf(stderr, "%s: aux_size=%lld\n", __func__, 
event->aux.aux_size);
+               cs_etm__update_aux_buf_len(etm, &event->aux);
                return cs_etm__process_queues(etm);
+       }
 
        return 0;
 }

Reply via email to