On Sun, Sep 22, 2024 at 06:07:09AM -0700, Zhi Wang wrote:
> The max RPC size is 16 pages (including the RPC header). To send an RPC
> larger than 16 pages, nvkm should split it into multiple RPCs and send
> it accordingly. The first of the split RPCs has the expected function
> number, while the rest of the split RPCs are sent with function number
> as NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD. GSP will consume the split
> RPCs from the cmdq and always write the result back to the msgq. The
> result is also formed as split RPCs.
> 
> However, NVKM is able to send split RPC when dealing with large RPCs,
> but totally not aware of handling the return of the large RPCs, which
> are the split RPC in the msgq. Thus, it keeps dumping the unknown RPC
> messages from msgq, which is actually CONTINUATION_RECORD message,
> discard them unexpectly. Thus, the caller will not be able to consume
> the result from GSP.
> 
> Introduce the handling of split RPCs on the msgq path. Slightly
> re-factor the low-level part of receiving RPCs from the msgq, RPC
> vehicle handling to merge the split RPCs back into a large RPC before
> handling it to the upper level. Thus, the upper-level of RPC APIs don't
> need to be heavily changed.
> 
> Signed-off-by: Zhi Wang <z...@nvidia.com>
> ---
>  .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 172 ++++++++++++------
>  1 file changed, 121 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c 
> b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
> index 49721935013b..ec4ab732997a 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
> @@ -72,6 +72,21 @@ struct r535_gsp_msg {
>  
>  #define GSP_MSG_HDR_SIZE offsetof(struct r535_gsp_msg, data)
>  
> +struct nvfw_gsp_rpc {
> +     u32 header_version;
> +     u32 signature;
> +     u32 length;
> +     u32 function;
> +     u32 rpc_result;
> +     u32 rpc_result_private;
> +     u32 sequence;
> +     union {
> +             u32 spare;
> +             u32 cpuRmGfid;
> +     };
> +     u8  data[];
> +};
> +
>  static int
>  r535_rpc_status_to_errno(uint32_t rpc_status)
>  {
> @@ -87,12 +102,12 @@ r535_rpc_status_to_errno(uint32_t rpc_status)
>  }
>  
>  static void *
> -r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime)
> +r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime,
> +                u8 *msg, bool skip_copy_rpc_header)

This function now has very specific expectations on the memory that has to be
allocated by the caller, which is dependent on multiple factors, i.e.
`skip_copy_rpc_header`, `prepc`, etc. and also exposes the burden to free the
memory on failure to the caller.

Besides that, I think the name `r535_gsp_msgq_wait` becomes misleading, it has
quite some more semantics than just that meanwhile.

I think shouldn't open-code all this, instead I think it would be better to wrap
all relevant arguments in a dedicated state structure that explains all the
different cases and conditionals, and build a properly documented state machine
around it.

>  {
>       struct r535_gsp_msg *mqe;
>       u32 size, rptr = *gsp->msgq.rptr;
>       int used;
> -     u8 *msg;
>       u32 len;
>  
>       size = DIV_ROUND_UP(GSP_MSG_HDR_SIZE + repc, GSP_PAGE_SIZE);
> @@ -123,13 +138,13 @@ r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 
> *prepc, int *ptime)
>  
>       size = ALIGN(repc + GSP_MSG_HDR_SIZE, GSP_PAGE_SIZE);
>  
> -     msg = kvmalloc(repc, GFP_KERNEL);
> -     if (!msg)
> -             return ERR_PTR(-ENOMEM);
> -
>       len = ((gsp->msgq.cnt - rptr) * GSP_PAGE_SIZE) - sizeof(*mqe);
>       len = min_t(u32, repc, len);
> -     memcpy(msg, mqe->data, len);
> +     if (!skip_copy_rpc_header)
> +             memcpy(msg, mqe->data, len);
> +     else
> +             memcpy(msg, mqe->data + sizeof(struct nvfw_gsp_rpc),
> +                    len - sizeof(struct nvfw_gsp_rpc));
>  
>       repc -= len;
>  
> @@ -145,10 +160,91 @@ r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 
> *prepc, int *ptime)
>       return msg;
>  }
>  
> +static void
> +r535_gsp_msg_dump(struct nvkm_gsp *gsp, struct nvfw_gsp_rpc *msg, int lvl)
> +{
> +     if (gsp->subdev.debug >= lvl) {
> +             nvkm_printk__(&gsp->subdev, lvl, info,
> +                           "msg fn:%d len:0x%x/0x%zx res:0x%x resp:0x%x\n",
> +                           msg->function, msg->length, msg->length - 
> sizeof(*msg),
> +                           msg->rpc_result, msg->rpc_result_private);
> +             print_hex_dump(KERN_INFO, "msg: ", DUMP_PREFIX_OFFSET, 16, 1,
> +                            msg->data, msg->length - sizeof(*msg), true);
> +     }
> +}
> +
>  static void *
> -r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 repc, int *ptime)
> +r535_gsp_msgq_recv_continuation(struct nvkm_gsp *gsp, u32 *payload_size,
> +                             u8 *buf, int *ptime)
>  {
> -     return r535_gsp_msgq_wait(gsp, repc, NULL, ptime);
> +     struct nvkm_subdev *subdev = &gsp->subdev;
> +     struct nvfw_gsp_rpc *msg;
> +     u32 size;
> +
> +     /* Peek next message */
> +     msg = r535_gsp_msgq_wait(gsp, sizeof(*msg), &size, ptime, NULL,
> +                              false);
> +     if (IS_ERR_OR_NULL(msg))
> +             return msg;
> +
> +     if (msg->function != NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD) {
> +             nvkm_error(subdev, "Not a continuation of a large RPC\n");
> +             r535_gsp_msg_dump(gsp, msg, NV_DBG_ERROR);
> +             return ERR_PTR(-EIO);
> +     }
> +
> +     *payload_size = msg->length - sizeof(*msg);
> +     return r535_gsp_msgq_wait(gsp, msg->length, NULL, ptime, buf,
> +                               true);
> +}
> +
> +static void *
> +r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 msg_repc, u32 total_repc,
> +                int *ptime)
> +{
> +     struct nvfw_gsp_rpc *msg;
> +     const u32 max_msg_size = (16 * 0x1000) - sizeof(struct r535_gsp_msg);
> +     const u32 max_rpc_size = max_msg_size - sizeof(*msg);
> +     u32 repc = total_repc;
> +     u8 *buf, *next;
> +
> +     if (WARN_ON(msg_repc > max_msg_size))
> +             return NULL;
> +
> +     buf = kvmalloc(max_t(u32, msg_repc, total_repc + sizeof(*msg)), 
> GFP_KERNEL);
> +     if (!buf)
> +             return ERR_PTR(-ENOMEM);
> +
> +     msg = r535_gsp_msgq_wait(gsp, msg_repc, NULL, ptime, buf, false);
> +     if (IS_ERR_OR_NULL(msg)) {
> +             kfree(buf);
> +             return msg;
> +     }
> +
> +     if (total_repc <= max_rpc_size)
> +             return buf;
> +
> +     next = buf;
> +
> +     next += msg_repc;
> +     repc -= msg_repc - sizeof(*msg);
> +
> +     while (repc) {
> +             struct nvfw_gsp_rpc *cont_msg;
> +             u32 size;
> +
> +             cont_msg = r535_gsp_msgq_recv_continuation(gsp, &size, next,
> +                                                   ptime);
> +             if (IS_ERR_OR_NULL(cont_msg)) {
> +                     kfree(buf);
> +                     return cont_msg;
> +             }
> +             repc -= size;
> +             next += size;
> +     }
> +
> +     msg->length = total_repc + sizeof(*msg);
> +     return buf;
>  }
>  
>  static int
> @@ -236,40 +332,12 @@ r535_gsp_cmdq_get(struct nvkm_gsp *gsp, u32 argc)
>       return cmd->data;
>  }
>  
> -struct nvfw_gsp_rpc {
> -     u32 header_version;
> -     u32 signature;
> -     u32 length;
> -     u32 function;
> -     u32 rpc_result;
> -     u32 rpc_result_private;
> -     u32 sequence;
> -     union {
> -             u32 spare;
> -             u32 cpuRmGfid;
> -     };
> -     u8  data[];
> -};
> -
>  static void
>  r535_gsp_msg_done(struct nvkm_gsp *gsp, struct nvfw_gsp_rpc *msg)
>  {
>       kvfree(msg);
>  }
>  
> -static void
> -r535_gsp_msg_dump(struct nvkm_gsp *gsp, struct nvfw_gsp_rpc *msg, int lvl)
> -{
> -     if (gsp->subdev.debug >= lvl) {
> -             nvkm_printk__(&gsp->subdev, lvl, info,
> -                           "msg fn:%d len:0x%x/0x%zx res:0x%x resp:0x%x\n",
> -                           msg->function, msg->length, msg->length - 
> sizeof(*msg),
> -                           msg->rpc_result, msg->rpc_result_private);
> -             print_hex_dump(KERN_INFO, "msg: ", DUMP_PREFIX_OFFSET, 16, 1,
> -                            msg->data, msg->length - sizeof(*msg), true);
> -     }
> -}
> -
>  static struct nvfw_gsp_rpc *
>  r535_gsp_msg_recv(struct nvkm_gsp *gsp, int fn, u32 repc)
>  {
> @@ -279,11 +347,11 @@ r535_gsp_msg_recv(struct nvkm_gsp *gsp, int fn, u32 
> repc)
>       u32 size;
>  
>  retry:
> -     msg = r535_gsp_msgq_wait(gsp, sizeof(*msg), &size, &time);
> +     msg = r535_gsp_msgq_wait(gsp, sizeof(*msg), &size, &time, NULL, false);
>       if (IS_ERR_OR_NULL(msg))
>               return msg;
>  
> -     msg = r535_gsp_msgq_recv(gsp, msg->length, &time);
> +     msg = r535_gsp_msgq_recv(gsp, msg->length, repc, &time);
>       if (IS_ERR_OR_NULL(msg))
>               return msg;
>  
> @@ -736,6 +804,7 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *argv, bool 
> wait, u32 repc)
>       mutex_lock(&gsp->cmdq.mutex);
>       if (rpc_size > max_rpc_size) {
>               const u32 fn = rpc->function;
> +             u32 remain_rpc_size = rpc_size;
>  
>               /* Adjust length, and send initial RPC. */
>               rpc->length = sizeof(*rpc) + max_rpc_size;
> @@ -746,11 +815,11 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *argv, 
> bool wait, u32 repc)
>                       goto done;
>  
>               argv += max_rpc_size;
> -             rpc_size -= max_rpc_size;
> +             remain_rpc_size -= max_rpc_size;
>  
>               /* Remaining chunks sent as CONTINUATION_RECORD RPCs. */
> -             while (rpc_size) {
> -                     u32 size = min(rpc_size, max_rpc_size);
> +             while (remain_rpc_size) {
> +                     u32 size = min(remain_rpc_size, max_rpc_size);
>                       void *next;
>  
>                       next = r535_gsp_rpc_get(gsp, 
> NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, size);
> @@ -766,19 +835,20 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *argv, 
> bool wait, u32 repc)
>                               goto done;
>  
>                       argv += size;
> -                     rpc_size -= size;
> +                     remain_rpc_size -= size;
>               }
>  
>               /* Wait for reply. */
> -             if (wait) {
> -                     rpc = r535_gsp_msg_recv(gsp, fn, repc);
> -                     if (!IS_ERR_OR_NULL(rpc))
> +             rpc = r535_gsp_msg_recv(gsp, fn, rpc_size);
> +             if (!IS_ERR_OR_NULL(rpc)) {
> +                     if (wait)
>                               repv = rpc->data;
> -                     else
> -                             repv = rpc;
> -             } else {
> -                     repv = NULL;
> -             }
> +                     else {
> +                             nvkm_gsp_rpc_done(gsp, rpc);
> +                             repv = NULL;
> +                     }
> +             } else
> +                     repv = wait ? rpc : NULL;
>       } else {
>               repv = r535_gsp_rpc_send(gsp, argv, wait, repc);
>       }
> -- 
> 2.34.1
> 

Reply via email to