Daniel P Berrange writes: > Instead of having a global dstate array, declare a single > 'uint16 TRACE_${EVENT_NAME}_DSTATE' variable for each > trace event. Record a pointer to this variable in the > TraceEvent struct too.
> By turning trace_event_get_state_dynamic_by_id into a > macro, this still hits the fast path, and cache affinity > is ensured by declaring all the uint16 vars adjacent to > each other. > Signed-off-by: Daniel P. Berrange <berra...@redhat.com> > --- > scripts/tracetool/format/events_c.py | 6 +++++- > scripts/tracetool/format/events_h.py | 3 +++ > stubs/trace-control.c | 9 ++++----- > trace/control-internal.h | 14 ++++---------- > trace/control-target.c | 20 ++++++++------------ > trace/control.c | 11 ++--------- > trace/event-internal.h | 6 ++++++ > 7 files changed, 32 insertions(+), 37 deletions(-) > diff --git a/scripts/tracetool/format/events_c.py > b/scripts/tracetool/format/events_c.py > index 4012063..a2f457f 100644 > --- a/scripts/tracetool/format/events_c.py > +++ b/scripts/tracetool/format/events_c.py > @@ -25,6 +25,9 @@ def generate(events, backend): > '#include "trace/control.h"', > '') > + for e in events: > + out('uint16_t TRACE_%s_DSTATE;' % e.name.upper()) > + > out('TraceEvent trace_events[TRACE_EVENT_COUNT] = {') > for e in events: I would emit an "obviously non-public" variable name, like ___TRACE_%s_dstate. The "TRACE_%s" is only necesary for consistency with the "public name" and macro trickery on the fast path. To make naming consistency easier to track, you can use Event.api(), which needs only a small extension ("scripts/tracetool/__init__.py"): QEMU_TRACE = "trace_%(name)s" QEMU_TRACE_TCG = QEMU_TRACE + "_tcg" QEMU_DSTATE = "___TRACE_%(NAME)s_dstate" def api(self, fmt=None): if fmt is None: fmt = Event.QEMU_TRACE return fmt % {"name": self.name, "NAME": self.name.upper()} Then change all the places where you generate the dstate symbol name to something like: out('uint16_t ' + e.api(e.QEMU_DSTATE)) + ';') > @@ -34,7 +37,8 @@ def generate(events, backend): > vcpu_id = "TRACE_VCPU_EVENT_COUNT" > out(' { .id = %(id)s, .vcpu_id = %(vcpu_id)s,' > ' .name = \"%(name)s\",' > - ' .sstate = %(sstate)s },', > + ' .sstate = %(sstate)s,', > + ' .dstate = &%(id)s_DSTATE, }, ', > id = "TRACE_" + e.name.upper(), > vcpu_id = vcpu_id, > name = e.name, > diff --git a/scripts/tracetool/format/events_h.py > b/scripts/tracetool/format/events_h.py > index a9da60b..193b02c 100644 > --- a/scripts/tracetool/format/events_h.py > +++ b/scripts/tracetool/format/events_h.py > @@ -32,6 +32,9 @@ def generate(events, backend): > out(' TRACE_EVENT_COUNT', > '} TraceEventID;') > + for e in events: > + out('extern uint16_t TRACE_%s_DSTATE;' % e.name.upper()) > + > # per-vCPU event identifiers > out('typedef enum {') Idem on the two above. [...] > diff --git a/trace/control-internal.h b/trace/control-internal.h > index 7f31e39..1446498 100644 > --- a/trace/control-internal.h > +++ b/trace/control-internal.h > @@ -16,7 +16,6 @@ > extern TraceEvent trace_events[]; > -extern uint16_t trace_events_dstate[]; > extern int trace_events_enabled_count; > @@ -54,18 +53,13 @@ static inline bool > trace_event_get_state_static(TraceEvent *ev) > return ev->sstate; > } > -static inline bool trace_event_get_state_dynamic_by_id(TraceEventID id) > -{ > - /* it's on fast path, avoid consistency checks (asserts) */ > - return unlikely(trace_events_enabled_count) && trace_events_dstate[id]; > -} > +/* it's on fast path, avoid consistency checks (asserts) */ > +#define trace_event_get_state_dynamic_by_id(id) \ > + (unlikely(trace_events_enabled_count) && id ## _DSTATE) > static inline bool trace_event_get_state_dynamic(TraceEvent *ev) > { > - TraceEventID id; > - assert(trace_event_get_state_static(ev)); > - id = trace_event_get_id(ev); > - return trace_event_get_state_dynamic_by_id(id); > + return unlikely(trace_events_enabled_count) && *ev->dstate; This one is not on the fast path, so there's no need for the first part of the AND (shouldn't hurt performance to keep it either). > } > static inline bool trace_event_get_vcpu_state_dynamic_by_vcpu_id(CPUState > *vcpu, [...] All the rest (snipped from this mail) looks good. Cheers, Lluis