On Mon, Sep 01, 2025 at 11:03:36AM +0530, Ekansh Gupta wrote: > For any remote call to DSP, after sending an invocation message, > fastRPC driver waits for glink response and during this time the > CPU can go into low power modes. This adds latency to overall fastrpc > call as CPU wakeup and scheduling latencies are included. Adding a
s/Adding/Add/, see Documentation/process/submitting-patches.rst > polling mode support with which fastRPC driver will poll continuously > on a memory after sending a message to remote subsystem which will > eliminate CPU wakeup and scheduling latencies and reduce fastRPC > overhead. Describe your design decisions: when it is enabled, why, etc. > > Signed-off-by: Ekansh Gupta <ekansh.gu...@oss.qualcomm.com> > --- > drivers/misc/fastrpc.c | 121 ++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 114 insertions(+), 7 deletions(-) > > diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c > index 57e118de6e4a..939a3e3d29e2 100644 > --- a/drivers/misc/fastrpc.c > +++ b/drivers/misc/fastrpc.c > @@ -22,6 +22,8 @@ > #include <linux/firmware/qcom/qcom_scm.h> > #include <uapi/misc/fastrpc.h> > #include <linux/of_reserved_mem.h> > +#include <linux/compiler.h> > +#include <linux/iopoll.h> > > #define ADSP_DOMAIN_ID (0) > #define MDSP_DOMAIN_ID (1) > @@ -37,6 +39,7 @@ > #define FASTRPC_CTX_MAX (256) > #define FASTRPC_INIT_HANDLE 1 > #define FASTRPC_DSP_UTILITIES_HANDLE 2 > +#define FASTRPC_MAX_STATIC_HANDLE (20) What is this? > #define FASTRPC_CTXID_MASK (0xFF00) > #define INIT_FILELEN_MAX (2 * 1024 * 1024) > #define INIT_FILE_NAMELEN_MAX (128) > @@ -105,6 +108,20 @@ > > #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev) > > +/* Poll response number from remote processor for call completion */ > +#define FASTRPC_POLL_RESPONSE (0xdecaf) > + > +/* Polling mode timeout limit */ > +#define FASTRPC_POLL_MAX_TIMEOUT_US (10000) > + > +/* Response types supported for RPC calls */ > +enum fastrpc_response_flags { > + /* normal job completion glink response */ > + NORMAL_RESPONSE = 0, > + /* process updates poll memory instead of glink response */ > + POLL_MODE = 1, > +}; bool is_polled; OR unsigned long is_polled : 1; > + > struct fastrpc_phy_page { > u64 addr; /* physical address */ > u64 size; /* size of contiguous region */ > @@ -235,8 +252,14 @@ struct fastrpc_invoke_ctx { > u32 sc; > u64 *fdlist; > u32 *crc; > + /* Poll memory that DSP updates */ > + u32 *poll; > u64 ctxid; > u64 msg_sz; > + /* work done status flag */ > + bool is_work_done; > + /* response flags from remote processor */ > + enum fastrpc_response_flags rsp_flags; > struct kref refcount; > struct list_head node; /* list of ctxs */ > struct completion work; > @@ -891,7 +914,8 @@ static int fastrpc_get_meta_size(struct > fastrpc_invoke_ctx *ctx) > sizeof(struct fastrpc_invoke_buf) + > sizeof(struct fastrpc_phy_page)) * ctx->nscalars + > sizeof(u64) * FASTRPC_MAX_FDLIST + > - sizeof(u32) * FASTRPC_MAX_CRCLIST; > + sizeof(u32) * FASTRPC_MAX_CRCLIST + > + sizeof(u32); > > return size; > } > @@ -987,6 +1011,8 @@ static int fastrpc_get_args(u32 kernel, struct > fastrpc_invoke_ctx *ctx) > list = fastrpc_invoke_buf_start(rpra, ctx->nscalars); > pages = fastrpc_phy_page_start(list, ctx->nscalars); > ctx->fdlist = (u64 *)(pages + ctx->nscalars); > + ctx->crc = (u32 *)(ctx->fdlist + FASTRPC_MAX_FDLIST); Why? > + ctx->poll = (u32 *)(ctx->crc + FASTRPC_MAX_CRCLIST); > args = (uintptr_t)ctx->buf->virt + metalen; > rlen = pkt_size - metalen; > ctx->rpra = rpra; > @@ -1155,6 +1181,83 @@ static int fastrpc_invoke_send(struct > fastrpc_session_ctx *sctx, > > } > > +static inline u32 fastrpc_poll_op(void *p) > +{ > + struct fastrpc_invoke_ctx *ctx = p; > + > + dma_rmb(); > + return READ_ONCE(*ctx->poll); Is this enough? Is the write by the DSP side going to invalidate the cache for this memory location? Think about older platforms which usually don't have dma-coherent property in the DSP / FastRPC nodes. > +} > + > +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx) > +{ > + u32 val; > + int ret; > + > + /* > + * Poll until DSP writes FASTRPC_POLL_RESPONSE into *ctx->poll > + * or until another path marks the work done. > + */ > + ret = read_poll_timeout_atomic(fastrpc_poll_op, val, > + (val == FASTRPC_POLL_RESPONSE) || > + ctx->is_work_done, 1, > + FASTRPC_POLL_MAX_TIMEOUT_US, false, ctx); > + > + if (!ret && val == FASTRPC_POLL_RESPONSE) { > + ctx->is_work_done = true; > + ctx->retval = 0; > + } > + > + if (ret == -ETIMEDOUT) > + ret = -EIO; > + > + return ret; > +} > + > +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx, > + u32 kernel) > +{ > + int err = 0; > + > + if (kernel) { > + if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) > + err = -ETIMEDOUT; > + } else { > + err = wait_for_completion_interruptible(&ctx->work); > + } > + > + return err; > +} > + > +static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx, > + u32 kernel) > +{ > + int err; > + > + do { > + switch (ctx->rsp_flags) { > + case NORMAL_RESPONSE: > + err = fastrpc_wait_for_response(ctx, kernel); > + if (err || ctx->is_work_done) > + return err; > + break; > + case POLL_MODE: > + err = poll_for_remote_response(ctx); > + /* If polling timed out, move to normal response mode */ > + if (err) > + ctx->rsp_flags = NORMAL_RESPONSE; > + break; > + default: What kind of response type can it be? Have you had checked for the flag being set, you wouldn't have a false possibility of having another response type. > + err = -EBADR; > + dev_dbg(ctx->fl->sctx->dev, > + "unsupported response type:0x%x\n", > ctx->rsp_flags); > + break; > + } > + } while (!ctx->is_work_done); > + > + return err; > +} > + > static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, > u32 handle, u32 sc, > struct fastrpc_invoke_args *args) > @@ -1190,16 +1293,19 @@ static int fastrpc_internal_invoke(struct > fastrpc_user *fl, u32 kernel, > if (err) > goto bail; > > - if (kernel) { > - if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) > - err = -ETIMEDOUT; > - } else { > - err = wait_for_completion_interruptible(&ctx->work); > - } > + if (handle > FASTRPC_MAX_STATIC_HANDLE && fl->pd == USER_PD) > + ctx->rsp_flags = POLL_MODE; This definitely needs to be explained. > > + err = fastrpc_wait_for_completion(ctx, kernel); > if (err) > goto bail; > > + if (!ctx->is_work_done) { > + err = -ETIMEDOUT; > + dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, > sc 0x%x\n", > + handle, sc); > + goto bail; > + } > /* make sure that all memory writes by DSP are seen by CPU */ > dma_rmb(); > /* populate all the output buffers with results */ > @@ -2462,6 +2568,7 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device > *rpdev, void *data, > > ctx->retval = rsp->retval; > complete(&ctx->work); > + ctx->is_work_done = true; > > /* > * The DMA buffer associated with the context cannot be freed in > -- > 2.34.1 > -- With best wishes Dmitry