Hello Adrián, Thanks for taking a look!
On 18/07/2025 03:43, Adrián Larumbe wrote: > Hi Lucas, > > On 16.05.2025 16:49, Lukas Zapolskas wrote: >> This patch extends the DEV_QUERY ioctl to return information about the >> performance counter setup for userspace, and introduces the new >> ioctl DRM_PANTHOR_PERF_CONTROL in order to allow for the sampling of >> performance counters. >> >> The new design is inspired by the perf aux ringbuffer, with the insert >> and extract indices being mapped to userspace, allowing multiple samples >> to be exposed at any given time. To avoid pointer chasing, the sample >> metadata and block metadata are inline with the elements they >> describe. > > Is the perf aux ringbuffer something internal to ARM's DDK? > I'm referring to the in-tree perf tool, which has its ring buffer design documented here [0]. >> Userspace is responsible for passing in resources for samples to be >> exposed, including the event file descriptor for notification of new >> sample availability, the ringbuffer BO to store samples, and the >> control BO along with the offset for mapping the insert and extract >> indices. Though these indices are only a total of 8 bytes, userspace >> can then reuse the same physical page for tracking the state of >> multiple buffers by giving different offsets from the BO start to >> map them. >> >> Co-developed-by: Mihail Atanassov <mihail.atanas...@arm.com> >> Signed-off-by: Mihail Atanassov <mihail.atanas...@arm.com> >> Signed-off-by: Lukas Zapolskas <lukas.zapols...@arm.com> >> --- >> include/uapi/drm/panthor_drm.h | 565 +++++++++++++++++++++++++++++++++ >> 1 file changed, 565 insertions(+) >> >> diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h >> index 97e2c4510e69..a74eabcabbcb 100644 >> --- a/include/uapi/drm/panthor_drm.h >> +++ b/include/uapi/drm/panthor_drm.h >> @@ -127,6 +127,9 @@ enum drm_panthor_ioctl_id { >> >> /** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */ >> DRM_PANTHOR_TILER_HEAP_DESTROY, >> + >> + /** @DRM_PANTHOR_PERF_CONTROL: Control a performance counter session. */ >> + DRM_PANTHOR_PERF_CONTROL, >> }; >> >> /** >> @@ -226,6 +229,9 @@ enum drm_panthor_dev_query_type { >> * @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group >> priorities information. >> */ >> DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, >> + >> + /** @DRM_PANTHOR_DEV_QUERY_PERF_INFO: Query performance counter >> interface information. */ >> + DRM_PANTHOR_DEV_QUERY_PERF_INFO, >> }; >> >> /** >> @@ -379,6 +385,135 @@ struct drm_panthor_group_priorities_info { >> __u8 pad[3]; >> }; >> >> +/** >> + * enum drm_panthor_perf_feat_flags - Performance counter configuration >> feature flags. >> + */ >> +enum drm_panthor_perf_feat_flags { >> + /** @DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT: Coarse-grained block states >> are supported. */ >> + DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT = 1 << 0, >> +}; >> + >> +/** >> + * enum drm_panthor_perf_block_type - Performance counter supported block >> types. >> + */ >> +enum drm_panthor_perf_block_type { >> + /** @DRM_PANTHOR_PERF_BLOCK_METADATA: Internal use only. */ >> + DRM_PANTHOR_PERF_BLOCK_METADATA = 0, >> + >> + /** @DRM_PANTHOR_PERF_BLOCK_FW: The FW counter block. */ >> + DRM_PANTHOR_PERF_BLOCK_FW, >> + >> + /** @DRM_PANTHOR_PERF_BLOCK_CSHW: The CSHW counter block. */ >> + DRM_PANTHOR_PERF_BLOCK_CSHW, >> + >> + /** @DRM_PANTHOR_PERF_BLOCK_TILER: The tiler counter block. */ >> + DRM_PANTHOR_PERF_BLOCK_TILER, >> + >> + /** @DRM_PANTHOR_PERF_BLOCK_MEMSYS: A memsys counter block. */ >> + DRM_PANTHOR_PERF_BLOCK_MEMSYS, >> + >> + /** @DRM_PANTHOR_PERF_BLOCK_SHADER: A shader core counter block. */ >> + DRM_PANTHOR_PERF_BLOCK_SHADER, >> + >> + /** @DRM_PANTHOR_PERF_BLOCK_FIRST: Internal use only. */ >> + DRM_PANTHOR_PERF_BLOCK_FIRST = DRM_PANTHOR_PERF_BLOCK_FW, >> + >> + /** @DRM_PANTHOR_PERF_BLOCK_LAST: Internal use only. */ >> + DRM_PANTHOR_PERF_BLOCK_LAST = DRM_PANTHOR_PERF_BLOCK_SHADER, >> + >> + /** @DRM_PANTHOR_PERF_BLOCK_MAX: Internal use only. */ >> + DRM_PANTHOR_PERF_BLOCK_MAX = DRM_PANTHOR_PERF_BLOCK_LAST + 1, >> +}; >> + >> +/** >> + * enum drm_panthor_perf_clock - Identifier of the clock used to produce >> the cycle count values >> + * in a given block. >> + * >> + * Since the integrator has the choice of using one or more clocks, there >> may be some confusion >> + * as to which blocks are counted by which clock values unless this >> information is explicitly >> + * provided as part of every block sample. Not every single clock here can >> be used: in the simplest >> + * case, all cycle counts will be associated with the top-level clock. >> + */ >> +enum drm_panthor_perf_clock { >> + /** @DRM_PANTHOR_PERF_CLOCK_TOPLEVEL: Top-level CSF clock. */ >> + DRM_PANTHOR_PERF_CLOCK_TOPLEVEL, >> + >> + /** >> + * @DRM_PANTHOR_PERF_CLOCK_COREGROUP: Core group clock, responsible for >> the MMU, L2 >> + * caches and the tiler. >> + */ >> + DRM_PANTHOR_PERF_CLOCK_COREGROUP, >> + >> + /** @DRM_PANTHOR_PERF_CLOCK_SHADER: Clock for the shader cores. */ >> + DRM_PANTHOR_PERF_CLOCK_SHADER, >> +}; >> + >> +/** >> + * struct drm_panthor_perf_info - Performance counter interface information >> + * >> + * Structure grouping all queryable information relating to the performance >> counter >> + * interfaces. >> + */ >> +struct drm_panthor_perf_info { >> + /** >> + * @counters_per_block: The number of 8-byte counters available in a >> block. >> + */ >> + __u32 counters_per_block; >> + >> + /** >> + * @sample_header_size: The size of the header struct available at the >> beginning >> + * of every sample. >> + */ >> + __u32 sample_header_size; >> + >> + /** >> + * @block_header_size: The size of the header struct inline with the >> counters for a >> + * single block. >> + */ >> + __u32 block_header_size; >> + >> + /** >> + * @sample_size: The size of a fully annotated sample, starting with a >> sample header >> + * of size @sample_header_size bytes, and all available >> blocks for the current >> + * configuration, each comprised of @counters_per_block >> 64-bit counters and >> + * a block header of @block_header_size bytes. >> + * >> + * The user must use this field to allocate size for the >> ring buffer. In >> + * the case of new blocks being added, an old userspace >> can always use >> + * this field and ignore any blocks it does not know >> about. >> + */ >> + __u32 sample_size; > > I might've asked this question in a previous review, but couldn't user space > easily calculate > the sample size with sample_header_size + block_header_size*(?_blocks) + > (?_blocks)*counters_per_block ? > It can if the versions of the userspace and the kernel are in lockstep. In the case of an old userspace and a newer kernel, we can have a new field added to the end of this struct, and the user would not know how to interpret this. In that case, the user could not successfully create a new session. >> + /** @flags: Combination of drm_panthor_perf_feat_flags flags. */ >> + __u32 flags; >> + >> + /** >> + * @supported_clocks: Bitmask of the clocks supported by the GPU. >> + * >> + * Each bit represents a variant of the enum drm_panthor_perf_clock. >> + * >> + * For the same GPU, different implementers may have different clocks >> for the same hardware >> + * block. At the moment, up to four clocks are supported, and any >> clocks that are present >> + * will be reported here. > > However, there seems to be just three clocks in in the drm_panthor_perf_clock > enum definition. > Thanks for pointing that out! Need to clean this up. > t> + */ >> + __u32 supported_clocks; >> + >> + /** @fw_blocks: Number of FW blocks available. */ >> + __u32 fw_blocks; >> + >> + /** @cshw_blocks: Number of CSHW blocks available. */ >> + __u32 cshw_blocks; >> + >> + /** @tiler_blocks: Number of tiler blocks available. */ >> + __u32 tiler_blocks; >> + >> + /** @memsys_blocks: Number of memsys blocks available. */ >> + __u32 memsys_blocks; >> + >> + /** @shader_blocks: Number of shader core blocks available. */ >> + __u32 shader_blocks; >> +}; >> + >> /** >> * struct drm_panthor_dev_query - Arguments passed to >> DRM_PANTHOR_IOCTL_DEV_QUERY >> */ >> @@ -977,6 +1112,434 @@ struct drm_panthor_tiler_heap_destroy { >> __u32 pad; >> }; >> >> +/** >> + * DOC: Performance counter decoding in userspace. >> + * >> + * Each sample will be exposed to userspace in the following manner: >> + * >> + * >> +--------+--------+------------------------+--------+-------------------------+-----+ >> + * | Sample | Block | Block | Block | Block >> | ... | >> + * | header | header | counters | header | counters >> | | >> + * >> +--------+--------+------------------------+--------+-------------------------+-----+ >> + * >> + * Each sample will start with a sample header of type @struct >> drm_panthor_perf_sample header, >> + * providing sample-wide information like the start and end timestamps, the >> counter set currently >> + * configured, and any errors that may have occurred during sampling. >> + * >> + * After the fixed size header, the sample will consist of blocks of >> + * 64-bit @drm_panthor_dev_query_perf_info::counters_per_block counters, >> each prefaced with a >> + * header of its own, indicating source block type, as well as the cycle >> count needed to normalize >> + * cycle values within that block, and a clock source identifier. >> + */ >> + >> +/** >> + * enum drm_panthor_perf_block_state - Bitmask of the power and execution >> states that an individual >> + * hardware block went through in a sampling period. >> + * >> + * Because the sampling period is controlled from userspace, the block may >> undergo multiple >> + * state transitions, so this must be interpreted as one or more such >> transitions occurring. >> + */ >> +enum drm_panthor_perf_block_state { >> + /** >> + * @DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN: The state of this block was >> unknown during >> + * the sampling period. >> + */ >> + DRM_PANTHOR_PERF_BLOCK_STATE_UNKNOWN = 0, >> + >> + /** >> + * @DRM_PANTHOR_PERF_BLOCK_STATE_ON: This block was powered on for some >> or all of >> + * the sampling period. >> + */ >> + DRM_PANTHOR_PERF_BLOCK_STATE_ON = 1 << 0, >> + >> + /** >> + * @DRM_PANTHOR_PERF_BLOCK_STATE_OFF: This block was powered off for >> some or all of the >> + * sampling period. >> + */ >> + DRM_PANTHOR_PERF_BLOCK_STATE_OFF = 1 << 1, >> + >> + /** >> + * @DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE: This block was available >> for execution for >> + * some or all of the sampling period. >> + */ >> + DRM_PANTHOR_PERF_BLOCK_STATE_AVAILABLE = 1 << 2, >> + /** >> + * @DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE: This block was >> unavailable for execution for >> + * some or all of the sampling period. >> + */ >> + DRM_PANTHOR_PERF_BLOCK_STATE_UNAVAILABLE = 1 << 3, >> + >> + /** >> + * @DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL: This block was executing in >> normal mode >> + * for some or all of the sampling period. >> + */ >> + DRM_PANTHOR_PERF_BLOCK_STATE_NORMAL = 1 << 4, >> + >> + /** >> + * @DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED: This block was executing in >> protected mode >> + * for some or all of the sampling period. >> + */ >> + DRM_PANTHOR_PERF_BLOCK_STATE_PROTECTED = 1 << 5, >> +}; >> + >> +/** >> + * struct drm_panthor_perf_block_header - Header present before every block >> in the >> + * sample ringbuffer. >> + */ >> +struct drm_panthor_perf_block_header { >> + /** @block_type: Type of the block. */ >> + __u8 block_type; >> + >> + /** @block_idx: Block index. */ >> + __u8 block_idx; >> + >> + /** >> + * @block_states: Coarse-grained block transitions, bitmask of enum >> + * drm_panthor_perf_block_states. >> + */ >> + __u8 block_states; >> + >> + /** >> + * @clock: Clock used to produce the cycle count for this block, taken >> from >> + * enum drm_panthor_perf_clock. The cycle counts are stored in the >> sample header. >> + */ >> + __u8 clock; >> + >> + /** @pad: MBZ. */ >> + __u8 pad[4]; >> + >> + /** @enable_mask: Bitmask of counters requested during the session >> setup. */ >> + __u64 enable_mask[2]; >> +}; >> + >> +/** >> + * enum drm_panthor_perf_sample_flags - Sample-wide events that occurred >> over the sampling >> + * period. >> + */ >> +enum drm_panthor_perf_sample_flags { >> + /** >> + * @DRM_PANTHOR_PERF_SAMPLE_OVERFLOW: This sample contains overflows >> due to the duration >> + * of the sampling period. >> + */ >> + DRM_PANTHOR_PERF_SAMPLE_OVERFLOW = 1 << 0, >> + >> + /** >> + * @DRM_PANTHOR_PERF_SAMPLE_ERROR: This sample encountered an error >> condition during >> + * the sample duration. >> + */ >> + DRM_PANTHOR_PERF_SAMPLE_ERROR = 1 << 1, >> +}; >> + >> +/** >> + * struct drm_panthor_perf_sample_header - Header present before every >> sample. >> + */ >> +struct drm_panthor_perf_sample_header { >> + /** >> + * @timestamp_start_ns: Earliest timestamp that values in this sample >> represent, in >> + * nanoseconds. Derived from CLOCK_MONOTONIC_RAW. >> + */ >> + __u64 timestamp_start_ns; >> + >> + /** >> + * @timestamp_end_ns: Latest timestamp that values in this sample >> represent, in >> + * nanoseconds. Derived from CLOCK_MONOTONIC_RAW. >> + */ >> + __u64 timestamp_end_ns; >> + >> + /** @block_set: Set of performance counter blocks. */ >> + __u8 block_set; >> + >> + /** @pad: MBZ. */ >> + __u8 pad[3]; >> + >> + /** @flags: Current sample flags, combination of >> drm_panthor_perf_sample_flags. */ >> + __u32 flags; >> + >> + /** >> + * @user_data: User data provided as part of the command that triggered >> this sample. >> + * >> + * - Automatic samples (periodic ones or those around non-counting >> periods or power state >> + * transitions) will be tagged with the user_data provided as part of >> the >> + * DRM_PANTHOR_PERF_COMMAND_START call. >> + * - Manual samples will be tagged with the user_data provided with the >> + * DRM_PANTHOR_PERF_COMMAND_SAMPLE call. >> + * - A session's final automatic sample will be tagged with the >> user_data provided with the >> + * DRM_PANTHOR_PERF_COMMAND_STOP call. >> + */ >> + __u64 user_data; >> + >> + /** >> + * @toplevel_clock_cycles: The number of cycles elapsed between >> + * drm_panthor_perf_sample_header::timestamp_start_ns and >> + * drm_panthor_perf_sample_header::timestamp_end_ns on the top-level >> clock if the >> + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. >> + */ >> + __u64 toplevel_clock_cycles; >> + >> + /** >> + * @coregroup_clock_cycles: The number of cycles elapsed between >> + * drm_panthor_perf_sample_header::timestamp_start_ns and >> + * drm_panthor_perf_sample_header::timestamp_end_ns on the coregroup >> clock if the >> + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. >> + */ >> + __u64 coregroup_clock_cycles; >> + >> + /** >> + * @shader_clock_cycles: The number of cycles elapsed between >> + * drm_panthor_perf_sample_header::timestamp_start_ns and >> + * drm_panthor_perf_sample_header::timestamp_end_ns on the shader core >> clock if the >> + * corresponding bit is set in drm_panthor_perf_info::supported_clocks. >> + */ >> + __u64 shader_clock_cycles; >> +}; >> + >> +/** >> + * enum drm_panthor_perf_command - Command type passed to the >> DRM_PANTHOR_PERF_CONTROL >> + * IOCTL. >> + */ >> +enum drm_panthor_perf_command { >> + /** @DRM_PANTHOR_PERF_COMMAND_SETUP: Create a new performance counter >> sampling context. */ >> + DRM_PANTHOR_PERF_COMMAND_SETUP, >> + >> + /** @DRM_PANTHOR_PERF_COMMAND_TEARDOWN: Teardown a performance counter >> sampling context. */ >> + DRM_PANTHOR_PERF_COMMAND_TEARDOWN, >> + >> + /** @DRM_PANTHOR_PERF_COMMAND_START: Start a sampling session on the >> indicated context. */ >> + DRM_PANTHOR_PERF_COMMAND_START, >> + >> + /** @DRM_PANTHOR_PERF_COMMAND_STOP: Stop the sampling session on the >> indicated context. */ >> + DRM_PANTHOR_PERF_COMMAND_STOP, >> + >> + /** >> + * @DRM_PANTHOR_PERF_COMMAND_SAMPLE: Request a manual sample on the >> indicated context. >> + * >> + * When the sampling session is configured with a non-zero sampling >> frequency, any >> + * DRM_PANTHOR_PERF_CONTROL calls with this command will be ignored and >> return an >> + * -EINVAL. >> + */ >> + DRM_PANTHOR_PERF_COMMAND_SAMPLE, >> +}; >> + >> +/** >> + * struct drm_panthor_perf_control - Arguments passed to >> DRM_PANTHOR_IOCTL_PERF_CONTROL. >> + */ >> +struct drm_panthor_perf_control { >> + /** @cmd: Command from enum drm_panthor_perf_command. */ >> + __u32 cmd; >> + >> + /** >> + * @handle: session handle. >> + * >> + * Returned by the DRM_PANTHOR_PERF_COMMAND_SETUP call. >> + * It must be used in subsequent commands for the same context. >> + */ >> + __u32 handle; >> + >> + /** >> + * @size: size of the command structure. >> + * >> + * If the pointer is NULL, the size is updated by the driver to provide >> the size of the >> + * output structure. If the pointer is not NULL, the driver will only >> copy min(size, >> + * struct_size) to the pointer and update the size accordingly. >> + */ >> + __u64 size; >> + >> + /** >> + * @pointer: user pointer to a command type struct, such as >> + * @struct drm_panthor_perf_cmd_start. >> + */ >> + __u64 pointer; >> +}; >> + >> +/** >> + * enum drm_panthor_perf_counter_set - The counter set to be requested from >> the hardware. >> + * >> + * The hardware supports a single performance counter set at a time, so >> requesting any set other >> + * than the primary may fail if another process is sampling at the same >> time. >> + * >> + * If in doubt, the primary counter set has the most commonly used counters >> and requires no >> + * additional permissions to open. >> + */ >> +enum drm_panthor_perf_counter_set { >> + /** >> + * @DRM_PANTHOR_PERF_SET_PRIMARY: The default set configured on the >> hardware. >> + * >> + * This is the only set for which all counters in all blocks are >> defined. >> + */ >> + DRM_PANTHOR_PERF_SET_PRIMARY, >> + >> + /** >> + * @DRM_PANTHOR_PERF_SET_SECONDARY: The secondary performance counter >> set. >> + * >> + * Some blocks may not have any defined counters for this set, and the >> block will >> + * have the UNAVAILABLE block state permanently set in the block header. >> + * >> + * Accessing this set requires the calling process to have the >> CAP_PERFMON capability. >> + */ >> + DRM_PANTHOR_PERF_SET_SECONDARY, >> + >> + /** >> + * @DRM_PANTHOR_PERF_SET_TERTIARY: The tertiary performance counter set. >> + * >> + * Some blocks may not have any defined counters for this set, and the >> block will have >> + * the UNAVAILABLE block state permanently set in the block header. >> Note that the >> + * tertiary set has the fewest defined counter blocks. >> + * >> + * Accessing this set requires the calling process to have the >> CAP_PERFMON capability. >> + */ >> + DRM_PANTHOR_PERF_SET_TERTIARY, >> +}; >> + >> +/** >> + * struct drm_panthor_perf_ringbuf_control - Struct used to map in the ring >> buffer control indices >> + * into memory shared between >> user and kernel. >> + * >> + */ >> +struct drm_panthor_perf_ringbuf_control { >> + /** >> + * @extract_idx: The index of the latest sample that was processed by >> userspace. Only >> + * modifiable by userspace. >> + */ >> + __u64 extract_idx; >> + >> + /** >> + * @insert_idx: The index of the latest sample emitted by the kernel. >> Only modiable by >> + * modifiable by the kernel. >> + */ >> + __u64 insert_idx; >> +}; >> + >> +/** >> + * struct drm_panthor_perf_cmd_setup - Arguments passed to >> DRM_PANTHOR_IOCTL_PERF_CONTROL >> + * when the DRM_PANTHOR_PERF_COMMAND_SETUP command is specified. >> + */ >> +struct drm_panthor_perf_cmd_setup { >> + /** >> + * @block_set: Set of performance counter blocks, member of >> + * enum drm_panthor_perf_block_set. >> + * >> + * This is a global configuration and only one set can be active at a >> time. If >> + * another client has already requested a counter set, any further >> requests >> + * for a different counter set will fail and return an -EBUSY. >> + * >> + * If the requested set does not exist, the request will fail and >> return an -EINVAL. >> + * >> + * Some sets have additional requirements to be enabled, and the setup >> request will >> + * fail with an -EACCES if these requirements are not satisfied. >> + */ >> + __u8 block_set; >> + >> + /** @pad: MBZ. */ >> + __u8 pad[7]; >> + >> + /** @fd: eventfd for signalling the availability of a new sample. */ >> + __u32 fd; >> + >> + /** @ringbuf_handle: Handle to the BO to write perf counter sample to. >> */ >> + __u32 ringbuf_handle; >> + >> + /** >> + * @control_handle: Handle to the BO containing a contiguous 16 byte >> range, used for the >> + * insert and extract indices for the ringbuffer. >> + */ >> + __u32 control_handle; >> + >> + /** >> + * @sample_slots: The number of slots available in the >> userspace-provided BO. Must be >> + * a power of 2. >> + * >> + * If sample_slots * sample_size does not match the BO size, the setup >> request will fail. >> + */ >> + __u32 sample_slots; >> + >> + /** >> + * @control_offset: Offset into the control BO where the insert and >> extract indices are >> + * located. >> + */ >> + __u64 control_offset; >> + >> + /** >> + * @sample_freq_ns: Period between automatic counter sample collection >> in nanoseconds. Zero >> + * disables automatic collection and all collection must be done >> through explicit calls >> + * to DRM_PANTHOR_PERF_CONTROL.SAMPLE. Non-zero values will disable >> manual counter sampling >> + * via the DRM_PANTHOR_PERF_COMMAND_SAMPLE command. >> + * >> + * This disables software-triggered periodic sampling, but hardware >> will still trigger >> + * automatic samples on certain events, including shader core power >> transitions, and >> + * entries to and exits from non-counting periods. The final stop >> command will also >> + * trigger a sample to ensure no data is lost. >> + */ >> + __u64 sample_freq_ns; >> + >> + /** >> + * @fw_enable_mask: Bitmask of counters to request from the FW counter >> block. Any bits >> + * past the first drm_panthor_perf_info.counters_per_block bits will be >> ignored. Bit 0 >> + * corresponds to counter 0. >> + */ >> + __u64 fw_enable_mask[2]; >> + >> + /** >> + * @cshw_enable_mask: Bitmask of counters to request from the CSHW >> counter block. Any bits >> + * past the first drm_panthor_perf_info.counters_per_block bits will be >> ignored. Bit 0 >> + * corresponds to counter 0. >> + */ >> + __u64 cshw_enable_mask[2]; >> + >> + /** >> + * @tiler_enable_mask: Bitmask of counters to request from the tiler >> counter block. Any >> + * bits past the first drm_panthor_perf_info.counters_per_block bits >> will be ignored. Bit >> + * 0 corresponds to counter 0. >> + */ >> + __u64 tiler_enable_mask[2]; >> + >> + /** >> + * @memsys_enable_mask: Bitmask of counters to request from the memsys >> counter blocks. Any >> + * bits past the first drm_panthor_perf_info.counters_per_block bits >> will be ignored. Bit 0 >> + * corresponds to counter 0. >> + */ >> + __u64 memsys_enable_mask[2]; >> + >> + /** >> + * @shader_enable_mask: Bitmask of counters to request from the shader >> core counter blocks. >> + * Any bits past the first drm_panthor_perf_info.counters_per_block >> bits will be ignored. >> + * Bit 0 corresponds to counter 0. >> + */ >> + __u64 shader_enable_mask[2]; >> +}; >> + >> +/** >> + * struct drm_panthor_perf_cmd_start - Arguments passed to >> DRM_PANTHOR_IOCTL_PERF_CONTROL >> + * when the DRM_PANTHOR_PERF_COMMAND_START command is specified. >> + */ >> +struct drm_panthor_perf_cmd_start { >> + /** >> + * @user_data: User provided data that will be attached to automatic >> samples collected >> + * until the next DRM_PANTHOR_PERF_COMMAND_STOP. >> + */ >> + __u64 user_data; >> +}; >> + >> +/** >> + * struct drm_panthor_perf_cmd_stop - Arguments passed to >> DRM_PANTHOR_IOCTL_PERF_CONTROL >> + * when the DRM_PANTHOR_PERF_COMMAND_STOP command is specified. >> + */ >> +struct drm_panthor_perf_cmd_stop { >> + /** >> + * @user_data: User provided data that will be attached to the >> automatic sample collected >> + * at the end of this sampling session. >> + */ >> + __u64 user_data; >> +}; >> + >> +/** >> + * struct drm_panthor_perf_cmd_sample - Arguments passed to >> DRM_PANTHOR_IOCTL_PERF_CONTROL >> + * when the DRM_PANTHOR_PERF_COMMAND_SAMPLE command is specified. >> + */ >> +struct drm_panthor_perf_cmd_sample { >> + /** @user_data: User provided data that will be attached to the >> sample.*/ >> + __u64 user_data; >> +}; >> + >> /** >> * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number >> * @__access: Access type. Must be R, W or RW. >> @@ -1019,6 +1582,8 @@ enum { >> DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create), >> DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY = >> DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy), >> + DRM_IOCTL_PANTHOR_PERF_CONTROL = >> + DRM_IOCTL_PANTHOR(WR, PERF_CONTROL, perf_control) >> }; >> >> #if defined(__cplusplus) >> -- >> 2.33.0.dirty > > > > Adrian Larumbe Kind regards, Lukas Zapolskas [0]: https://docs.kernel.org/next/userspace-api/perf_ring_buffer.html