On Mon, Mar 01, 2021 at 05:43:43PM +0200, James Clark wrote: [...]
> > I'd like to propose to add a new field "cs_etm_queue::buf_rec_len", it > > stands for the record length based on the RECORD_AUX event. In > > theory, this value should be always less than "cs_etm_queue::buf_len". > > > > When every time the "PERF_RECORD_AUX" event is coming, we find out the > > corresponding queue (so this can be applied for "1:1" or "N:1" models > > for source and sink), and accumulate "perf_record_aux::aux_size" into > > "cs_etm_queue::buf_rec_len". > > > > At the decoder side, it decreases "etmq->buf_rec_len" until to zero for > > the current round of decoding (see cs_etm__decode_data_block()). Since > > all the "PERF_RECORD_AUX" event will be processed before > > "PERF_RECORD_EXIT" event, so we don't worry the tail trace data will be > > ignored. > > > > The main reason for this suggestion is it don't need to change the > > significant logic in current code. I will try to do experiment for this > > idea and share back. > > > > James, if you think I miss anything, please correct me as needed. > > Thanks! > > > > This is an interesting idea, I think we could push decoded packets into the > min heap as the aux records are received, and not do anything with them until > the end of the data is reached. That way instead of saving aux records, we'd > save the result of the decode for each aux record. > > Currently each cs_etm_queue has a cs_etm_traceid_queue/cs_etm_packet_queue > for each > stream, but that would have to be changed to have multiple ones because > multiple > packets could be decoded to get through the whole aux record. > > It would be a similarly sized change, and could also have a bigger impact on > memory. So I'm not sure if it would help to reduce the changes, but it is > possible. Below change is still very coarse and I just did very basic testing for it, so didn't cover all cases; so simply use it to demonstrate the basic idea. Before the event PERF_RECORD_AUX arrives, we don't decode any trace data. And after PERF_RECORD_AUX coming, the aux buffer size will be accumulated into the queue, and decode the trace data for the queue based on the accumulated buffer length. diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index b9c1d329a7f1..3bd5609b6de4 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -89,7 +89,7 @@ struct cs_etm_queue { u8 pending_timestamp; u64 offset; const unsigned char *buf; - size_t buf_len, buf_used; + size_t aux_buf_len, buf_len, buf_used; /* Conversion between traceID and index in traceid_queues array */ struct intlist *traceid_queues_list; struct cs_etm_traceid_queue **traceid_queues; @@ -1085,6 +1085,7 @@ cs_etm__get_trace(struct cs_etm_queue *etmq) if (old_buffer) auxtrace_buffer__drop_data(old_buffer); etmq->buf_len = 0; + etmq->aux_buf_len = 0; return 0; } @@ -2052,6 +2053,7 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) etmq->offset += processed; etmq->buf_used += processed; etmq->buf_len -= processed; + etmq->aux_buf_len -= processed; out: return ret; @@ -2177,7 +2179,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) */ err = cs_etm__process_traceid_queue(etmq, tidq); - } while (etmq->buf_len); + } while (etmq->aux_buf_len > 0); if (err == 0) /* Flush any remaining branch stack entries */ @@ -2216,6 +2218,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, return 0; } +static void cs_etm__update_aux_buf_len(struct cs_etm_auxtrace *etm, + struct perf_record_aux *aux) +{ + unsigned int cs_queue_nr, queue_nr; + struct auxtrace_queue *queue; + struct cs_etm_queue *etmq; + + if (!etm->heap.heap_cnt) + return; + + /* Take the entry at the top of the min heap */ + cs_queue_nr = etm->heap.heap_array[0].queue_nr; + queue_nr = TO_QUEUE_NR(cs_queue_nr); + queue = &etm->queues.queue_array[queue_nr]; + etmq = queue->priv; + + etmq->aux_buf_len += aux->aux_size; + fprintf(stderr, "%s: aux_buf_len=%ld\n", __func__, etmq->aux_buf_len); + return; +} + static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) { int ret = 0; @@ -2272,6 +2295,9 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) if (ret < 0) goto out; + if (etmq->aux_buf_len <= 0) + goto out; + /* * No more auxtrace_buffers to process in this etmq, simply * move on to another entry in the auxtrace_heap. @@ -2414,9 +2440,15 @@ static int cs_etm__process_event(struct perf_session *session, else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) return cs_etm__process_switch_cpu_wide(etm, event); + fprintf(stderr, "%s: event->header.type=%d\n", __func__, event->header.type); + if (!etm->timeless_decoding && - event->header.type == PERF_RECORD_AUX) + event->header.type == PERF_RECORD_AUX) { + + fprintf(stderr, "%s: aux_size=%lld\n", __func__, event->aux.aux_size); + cs_etm__update_aux_buf_len(etm, &event->aux); return cs_etm__process_queues(etm); + } return 0; }