On Mon, Jan 27, 2025 at 10:12:38AM +0530, Ekansh Gupta wrote:
> For any remote call to DSP, after sending an invocation message,
> fastRPC driver waits for glink response and during this time the
> CPU can go into low power modes. Adding a polling mode support
> with which fastRPC driver will poll continuously on a memory
> after sending a message to remote subsystem which will eliminate
> CPU wakeup and scheduling latencies and reduce fastRPC overhead.
> With this change, DSP always sends a glink response which will
> get ignored if polling mode didn't time out.

Is there a chance to implement actual async I/O protocol with the help
of the poll() call instead of hiding the polling / wait inside the
invoke2?

> 
> Signed-off-by: Ekansh Gupta <quic_ekang...@quicinc.com>
> ---
>  drivers/misc/fastrpc.c      | 122 +++++++++++++++++++++++++++++++++---
>  include/uapi/misc/fastrpc.h |   3 +-
>  2 files changed, 114 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
> index cfacee0dded5..257a741af115 100644
> --- a/drivers/misc/fastrpc.c
> +++ b/drivers/misc/fastrpc.c
> @@ -19,6 +19,7 @@
>  #include <linux/rpmsg.h>
>  #include <linux/scatterlist.h>
>  #include <linux/slab.h>
> +#include <linux/delay.h>
>  #include <linux/firmware/qcom/qcom_scm.h>
>  #include <uapi/misc/fastrpc.h>
>  #include <linux/of_reserved_mem.h>
> @@ -38,6 +39,7 @@
>  #define FASTRPC_CTX_MAX (256)
>  #define FASTRPC_INIT_HANDLE  1
>  #define FASTRPC_DSP_UTILITIES_HANDLE 2
> +#define FASTRPC_MAX_STATIC_HANDLE (20)
>  #define FASTRPC_CTXID_MASK (0xFF0)
>  #define INIT_FILELEN_MAX (2 * 1024 * 1024)
>  #define INIT_FILE_NAMELEN_MAX (128)
> @@ -106,6 +108,19 @@
>  
>  #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
>  
> +/* Poll response number from remote processor for call completion */
> +#define FASTRPC_POLL_RESPONSE (0xdecaf)
> +/* timeout in us for polling until memory barrier */
> +#define FASTRPC_POLL_TIME_MEM_UPDATE (500)
> +
> +/* Response types supported for RPC calls */
> +enum fastrpc_response_flags {
> +     /* normal job completion glink response */
> +     NORMAL_RESPONSE = 0,
> +     /* process updates poll memory instead of glink response */
> +     POLL_MODE = 1,
> +};
> +
>  static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
>                                               "sdsp", "cdsp", "cdsp1" };
>  struct fastrpc_phy_page {
> @@ -238,9 +253,16 @@ struct fastrpc_invoke_ctx {
>       u32 sc;
>       u64 *fdlist;
>       u32 *crclist;
> +     u32 *poll;
>       void __user *crc;
>       u64 ctxid;
>       u64 msg_sz;
> +     /* Threads poll for specified timeout and fall back to glink wait */
> +     u64 poll_timeout;
> +     /* work done status flag */
> +     bool is_work_done;
> +     /* response flags from remote processor */
> +     enum fastrpc_response_flags rsp_flags;
>       struct kref refcount;
>       struct list_head node; /* list of ctxs */
>       struct completion work;
> @@ -258,6 +280,7 @@ struct fastrpc_invoke_ctx {
>  struct fastrpc_ctx_args {
>       struct fastrpc_invoke_args *args;
>       void __user *crc;
> +     u64 poll_timeout;
>  };
>  
>  struct fastrpc_session_ctx {
> @@ -619,11 +642,14 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
>       fastrpc_channel_ctx_get(cctx);
>  
>       ctx->crc = cargs->crc;
> +     ctx->poll_timeout = cargs->poll_timeout;
>       ctx->sc = sc;
>       ctx->retval = -1;
>       ctx->pid = current->pid;
>       ctx->client_id = user->client_id;
>       ctx->cctx = cctx;
> +     ctx->rsp_flags = NORMAL_RESPONSE;
> +     ctx->is_work_done = false;
>       init_completion(&ctx->work);
>       INIT_WORK(&ctx->put_work, fastrpc_context_put_wq);
>  
> @@ -882,7 +908,8 @@ static int fastrpc_get_meta_size(struct 
> fastrpc_invoke_ctx *ctx)
>               sizeof(struct fastrpc_invoke_buf) +
>               sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
>               sizeof(u64) * FASTRPC_MAX_FDLIST +
> -             sizeof(u32) * FASTRPC_MAX_CRCLIST;
> +             sizeof(u32) * FASTRPC_MAX_CRCLIST +
> +             sizeof(u32);
>  
>       return size;
>  }
> @@ -975,6 +1002,8 @@ static int fastrpc_get_args(u32 kernel, struct 
> fastrpc_invoke_ctx *ctx)
>       pages = fastrpc_phy_page_start(list, ctx->nscalars);
>       ctx->fdlist = (u64 *)(pages + ctx->nscalars);
>       ctx->crclist = (u32 *)(ctx->fdlist + FASTRPC_MAX_FDLIST);
> +     ctx->poll = (u32 *)(ctx->crclist + FASTRPC_MAX_CRCLIST);
> +
>       args = (uintptr_t)ctx->buf->virt + metalen;
>       rlen = pkt_size - metalen;
>       ctx->rpra = rpra;
> @@ -1145,6 +1174,72 @@ static int fastrpc_invoke_send(struct 
> fastrpc_session_ctx *sctx,
>  
>  }
>  
> +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx, u64 
> timeout)
> +{
> +     int err = -EIO, i, j;
> +
> +     /* poll on memory for DSP response. Return failure on timeout */
> +     for (i = 0, j = 0; i < timeout; i++, j++) {
> +             if (*ctx->poll == FASTRPC_POLL_RESPONSE) {
> +                     err = 0;
> +                     ctx->is_work_done = true;
> +                     ctx->retval = 0;
> +                     break;
> +             }
> +             if (j == FASTRPC_POLL_TIME_MEM_UPDATE) {
> +                     /* make sure that all poll memory writes by DSP are 
> seen by CPU */
> +                     dma_rmb();
> +                     j = 0;
> +             }
> +             udelay(1);
> +     }
> +     return err;
> +}
> +
> +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx,
> +                                             u32 kernel)
> +{
> +     int err = 0;
> +
> +     if (kernel) {
> +             if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
> +                     err = -ETIMEDOUT;
> +     } else {
> +             err = wait_for_completion_interruptible(&ctx->work);
> +     }
> +
> +     return err;
> +}
> +
> +static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx,
> +                                     u32 kernel)
> +{
> +     int err;
> +
> +     do {
> +             switch (ctx->rsp_flags) {
> +             case NORMAL_RESPONSE:
> +                     err = fastrpc_wait_for_response(ctx, kernel);
> +                     if (err || ctx->is_work_done)
> +                             return err;
> +                     break;
> +             case POLL_MODE:
> +                     err = poll_for_remote_response(ctx, ctx->poll_timeout);
> +                     /* If polling timed out, move to normal response mode */
> +                     if (err)
> +                             ctx->rsp_flags = NORMAL_RESPONSE;
> +                     break;
> +             default:
> +                     err = -EBADR;
> +                     dev_dbg(ctx->fl->sctx->dev,
> +                             "unsupported response type:0x%x\n", 
> ctx->rsp_flags);
> +                     break;
> +             }
> +     } while (!ctx->is_work_done);
> +
> +     return err;
> +}
> +
>  static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
>                                  u32 handle, u32 sc,
>                                  struct fastrpc_ctx_args *cargs)
> @@ -1180,16 +1275,20 @@ static int fastrpc_internal_invoke(struct 
> fastrpc_user *fl,  u32 kernel,
>       if (err)
>               goto bail;
>  
> -     if (kernel) {
> -             if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
> -                     err = -ETIMEDOUT;
> -     } else {
> -             err = wait_for_completion_interruptible(&ctx->work);
> -     }
> +     if (ctx->poll_timeout != 0 && handle > FASTRPC_MAX_STATIC_HANDLE
> +         && fl->pd == USER_PD)
> +             ctx->rsp_flags = POLL_MODE;
>  
> +     err = fastrpc_wait_for_completion(ctx, kernel);
>       if (err)
>               goto bail;
>  
> +     if (!ctx->is_work_done) {
> +             err = -ETIMEDOUT;
> +             dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, 
> sc 0x%x\n",
> +                     handle, sc);
> +             goto bail;
> +     }
>       /* make sure that all memory writes by DSP are seen by CPU */
>       dma_rmb();
>       /* populate all the output buffers with results */
> @@ -1769,7 +1868,7 @@ static int fastrpc_invokev2(struct fastrpc_user *fl, 
> char __user *argp)
>               return -EFAULT;
>  
>       /* Check if all reserved fields are zero */
> -     for (i = 0; i < 16; i++) {
> +     for (i = 0; i < 14; i++) {
>               if (inv2.reserved[i] != 0)
>                       return -EINVAL;
>       }
> @@ -1779,6 +1878,7 @@ static int fastrpc_invokev2(struct fastrpc_user *fl, 
> char __user *argp)
>               return -ENOMEM;
>  
>       cargs->crc = (void __user *)(uintptr_t)inv2.crc;
> +     cargs->poll_timeout = inv2.poll_timeout;
>  
>       err = fastrpc_remote_invoke(fl, &inv2.inv, cargs);
>       kfree(cargs);
> @@ -2581,12 +2681,14 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device 
> *rpdev, void *data,
>       ctx = idr_find(&cctx->ctx_idr, ctxid);
>       spin_unlock_irqrestore(&cctx->lock, flags);
>  
> +     /* Ignore this failure as context returned will be NULL for polling 
> mode */
>       if (!ctx) {
> -             dev_err(&rpdev->dev, "No context ID matches response\n");
> -             return -ENOENT;
> +             dev_dbg(&rpdev->dev, "No context ID matches response\n");
> +             return 0;
>       }
>  
>       ctx->retval = rsp->retval;
> +     ctx->is_work_done = true;
>       complete(&ctx->work);
>  
>       /*
> diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
> index 406b80555d41..1920c537bbbf 100644
> --- a/include/uapi/misc/fastrpc.h
> +++ b/include/uapi/misc/fastrpc.h
> @@ -84,7 +84,8 @@ struct fastrpc_invoke {
>  struct fastrpc_invoke_v2 {
>       struct fastrpc_invoke inv;
>       __u64 crc;
> -     __u32 reserved[16];
> +     __u64 poll_timeout;
> +     __u32 reserved[14];
>  };
>  
>  struct fastrpc_init_create {
> -- 
> 2.34.1
> 

-- 
With best wishes
Dmitry

Reply via email to