Re: [Mesa-dev] [PATCH 06/17] gallium: add a new interface for pipe_context::launch_grid()

Marek Olšák Tue, 26 Jan 2016 02:47:58 -0800

Reviewed-by: Marek Olšák <marek.ol...@amd.com>

Marek


On Sun, Jan 24, 2016 at 10:09 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
> This introduces pipe_grid_info which contains all information to
> describe a launch_grid call. This will be used to implement indirect
> compute in the same fashion as indirect draw.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  src/gallium/drivers/ilo/ilo_gpgpu.c                |  8 ++----
>  src/gallium/drivers/nouveau/nv50/nv50_compute.c    | 16 +++++------
>  src/gallium/drivers/nouveau/nv50/nv50_context.h    |  3 +-
>  .../drivers/nouveau/nv50/nv50_query_hw_sm.c        | 12 ++++++--
>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    | 19 ++++++-------
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  6 ++--
>  .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c        | 12 ++++++--
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c    | 10 +++----
>  src/gallium/drivers/r600/evergreen_compute.c       | 19 ++++++-------
>  src/gallium/drivers/radeonsi/si_compute.c          | 33 
> +++++++++++-----------
>  src/gallium/include/pipe/p_context.h               | 17 ++---------
>  src/gallium/include/pipe/p_state.h                 | 27 ++++++++++++++++++
>  src/gallium/state_trackers/clover/core/kernel.cpp  | 13 +++++----
>  src/gallium/tests/trivial/compute.c                | 11 +++++++-
>  14 files changed, 117 insertions(+), 89 deletions(-)
>
> diff --git a/src/gallium/drivers/ilo/ilo_gpgpu.c 
> b/src/gallium/drivers/ilo/ilo_gpgpu.c
> index b741590..ab165b6 100644
> --- a/src/gallium/drivers/ilo/ilo_gpgpu.c
> +++ b/src/gallium/drivers/ilo/ilo_gpgpu.c
> @@ -79,9 +79,7 @@ launch_grid(struct ilo_context *ilo,
>  }
>
>  static void
> -ilo_launch_grid(struct pipe_context *pipe,
> -                const uint *block_layout, const uint *grid_layout,
> -                uint32_t pc, const void *input)
> +ilo_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
>  {
>     struct ilo_context *ilo = ilo_context(pipe);
>     struct ilo_shader_state *cs = ilo->state_vector.cs;
> @@ -92,13 +90,13 @@ ilo_launch_grid(struct pipe_context *pipe,
>     input_buf.buffer_size =
>        ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE);
>     if (input_buf.buffer_size) {
> -      u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input,
> +      u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, info->input,
>              &input_buf.buffer_offset, &input_buf.buffer);
>     }
>
>     ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
>
> -   launch_grid(ilo, block_layout, grid_layout, &input_buf, pc);
> +   launch_grid(ilo, info->block, info->grid, &input_buf, info->pc);
>
>     ilo_render_invalidate_hw(ilo->render);
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
> index 6d23fd6..04488d6 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
> @@ -270,13 +270,11 @@ nv50_compute_find_symbol(struct nv50_context *nv50, 
> uint32_t label)
>  }
>
>  void
> -nv50_launch_grid(struct pipe_context *pipe,
> -                 const uint *block_layout, const uint *grid_layout,
> -                 uint32_t label, const void *input)
> +nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info 
> *info)
>  {
>     struct nv50_context *nv50 = nv50_context(pipe);
>     struct nouveau_pushbuf *push = nv50->base.pushbuf;
> -   unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2];
> +   unsigned block_size = info->block[0] * info->block[1] * info->block[2];
>     struct nv50_program *cp = nv50->compprog;
>     bool ret;
>
> @@ -286,10 +284,10 @@ nv50_launch_grid(struct pipe_context *pipe,
>        return;
>     }
>
> -   nv50_compute_upload_input(nv50, input);
> +   nv50_compute_upload_input(nv50, info->input);
>
>     BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
> -   PUSH_DATA (push, nv50_compute_find_symbol(nv50, label));
> +   PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
>
>     BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
>     PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
> @@ -298,14 +296,14 @@ nv50_launch_grid(struct pipe_context *pipe,
>
>     /* grid/block setup */
>     BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
> -   PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]);
> -   PUSH_DATA (push, block_layout[2]);
> +   PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
> +   PUSH_DATA (push, info->block[2]);
>     BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
>     PUSH_DATA (push, 1 << 16 | block_size);
>     BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
>     PUSH_DATA (push, 1);
>     BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
> -   PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]);
> +   PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
>     BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
>     PUSH_DATA (push, 1);
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h 
> b/src/gallium/drivers/nouveau/nv50/nv50_context.h
> index 712d00e..cb18df3 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
> @@ -324,7 +324,6 @@ nv98_video_buffer_create(struct pipe_context *pipe,
>
>  /* nv50_compute.c */
>  void
> -nv50_launch_grid(struct pipe_context *, const uint *, const uint *,
> -                 uint32_t, const void *);
> +nv50_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
>
>  #endif
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
> index 79c7023..da3970e 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
> @@ -218,11 +218,12 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct 
> nv50_hw_query *hq)
>     struct pipe_context *pipe = &nv50->base.pipe;
>     struct nouveau_pushbuf *push = nv50->base.pushbuf;
>     struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
> +   struct pipe_grid_info info;
>     uint32_t mask;
>     uint32_t input[3];
>     const uint block[3] = { 32, 1, 1 };
>     const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
> -   int c;
> +   int c, i;
>
>     if (unlikely(!screen->pm.prog)) {
>        struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
> @@ -262,7 +263,14 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct 
> nv50_hw_query *hq)
>     pipe->bind_compute_state(pipe, screen->pm.prog);
>     input[0] = hq->bo->offset + hq->base_offset;
>     input[1] = hq->sequence;
> -   pipe->launch_grid(pipe, block, grid, 0, input);
> +
> +   for (i = 0; i < 3; i++) {
> +      info.block[i] = block[i];
> +      info.grid[i] = grid[i];
> +   }
> +   info.pc = 0;
> +   info.input = input;
> +   pipe->launch_grid(pipe, &info);
>
>     nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> index 7180434..5c7dc0e 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> @@ -183,10 +183,7 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, 
> const void *input)
>  }
>
>  void
> -nvc0_launch_grid(struct pipe_context *pipe,
> -                 const uint *block_layout, const uint *grid_layout,
> -                 uint32_t label,
> -                 const void *input)
> +nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info 
> *info)
>  {
>     struct nvc0_context *nvc0 = nvc0_context(pipe);
>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> @@ -200,10 +197,10 @@ nvc0_launch_grid(struct pipe_context *pipe,
>        return;
>     }
>
> -   nvc0_compute_upload_input(nvc0, input);
> +   nvc0_compute_upload_input(nvc0, info->input);
>
>     BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
> -   PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
> +   PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
>
>     BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
>     PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
> @@ -212,18 +209,18 @@ nvc0_launch_grid(struct pipe_context *pipe,
>
>     BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
>     PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
> -   PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
> +   PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
>     PUSH_DATA (push, cp->num_barriers);
>     BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
>     PUSH_DATA (push, cp->num_gprs);
>
>     /* grid/block setup */
>     BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
> -   PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
> -   PUSH_DATA (push, grid_layout[2]);
> +   PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
> +   PUSH_DATA (push, info->grid[2]);
>     BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
> -   PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
> -   PUSH_DATA (push, block_layout[2]);
> +   PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
> +   PUSH_DATA (push, info->block[2]);
>
>     /* launch preliminary setup */
>     BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 4ab2ac4..2e726e6 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -332,11 +332,9 @@ nvc0_video_buffer_create(struct pipe_context *pipe,
>  void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
>
>  /* nve4_compute.c */
> -void nve4_launch_grid(struct pipe_context *,
> -                      const uint *, const uint *, uint32_t, const void *);
> +void nve4_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
>
>  /* nvc0_compute.c */
> -void nvc0_launch_grid(struct pipe_context *,
> -                      const uint *, const uint *, uint32_t, const void *);
> +void nvc0_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
>
>  #endif
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
> index 721857e..5e418fe 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
> @@ -937,11 +937,12 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct 
> nvc0_hw_query *hq)
>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>     const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
>     struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
> +   struct pipe_grid_info info;
>     uint32_t mask;
>     uint32_t input[3];
>     const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
>     const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 };
> -   unsigned c;
> +   unsigned c, i;
>
>     if (unlikely(!screen->pm.prog)) {
>        struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
> @@ -989,7 +990,14 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct 
> nvc0_hw_query *hq)
>     input[0] = (hq->bo->offset + hq->base_offset);
>     input[1] = (hq->bo->offset + hq->base_offset) >> 32;
>     input[2] = hq->sequence;
> -   pipe->launch_grid(pipe, block, grid, 0, input);
> +
> +   for (i = 0; i < 3; i++) {
> +      info.block[i] = block[i];
> +      info.grid[i] = grid[i];
> +   }
> +   info.pc = 0;
> +   info.input = input;
> +   pipe->launch_grid(pipe, &info);
>
>     nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c 
> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> index d3e5676..1719e09 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> @@ -429,10 +429,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context 
> *nv,
>  }
>
>  void
> -nve4_launch_grid(struct pipe_context *pipe,
> -                 const uint *block_layout, const uint *grid_layout,
> -                 uint32_t label,
> -                 const void *input)
> +nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info 
> *info)
>  {
>     struct nvc0_context *nvc0 = nvc0_context(pipe);
>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> @@ -453,13 +450,14 @@ nve4_launch_grid(struct pipe_context *pipe,
>     if (ret)
>        goto out;
>
> -   nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, 
> grid_layout);
> +   nve4_compute_setup_launch_desc(nvc0, desc, info->pc,
> +                                  info->block, info->grid);
>  #ifdef DEBUG
>     if (debug_get_num_option("NV50_PROG_DEBUG", 0))
>        nve4_compute_dump_launch_desc(desc);
>  #endif
>
> -   nve4_compute_upload_input(nvc0, input, block_layout, grid_layout);
> +   nve4_compute_upload_input(nvc0, info->input, info->block, info->grid);
>
>     /* upload descriptor and flush */
>  #if 0
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
> b/src/gallium/drivers/r600/evergreen_compute.c
> index 20945ec..a53646a 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -574,9 +574,7 @@ void evergreen_emit_cs_shader(
>  }
>
>  static void evergreen_launch_grid(
> -               struct pipe_context *ctx_,
> -               const uint *block_layout, const uint *grid_layout,
> -               uint32_t pc, const void *input)
> +               struct pipe_context *ctx_, const struct pipe_grid_info *info)
>  {
>         struct r600_context *ctx = (struct r600_context *)ctx_;
>  #ifdef HAVE_OPENCL
> @@ -584,7 +582,7 @@ static void evergreen_launch_grid(
>         boolean use_kill;
>
>  #if HAVE_LLVM < 0x0306
> -       struct r600_kernel *kernel = &shader->kernels[pc];
> +       struct r600_kernel *kernel = &shader->kernels[info->pc];
>         (void)use_kill;
>          if (!kernel->code_bo) {
>                  void *p;
> @@ -616,19 +614,20 @@ static void evergreen_launch_grid(
>                  ctx->b.ws->buffer_unmap(kernel->code_bo->buf);
>          }
>         shader->active_kernel = kernel;
> -       ctx->cs_shader_state.kernel_index = pc;
> +       ctx->cs_shader_state.kernel_index = info->pc;
>  #else
> -       ctx->cs_shader_state.pc = pc;
> +       ctx->cs_shader_state.pc = info->pc;
>         /* Get the config information for this kernel. */
> -       r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, 
> &use_kill);
> +       r600_shader_binary_read_config(&shader->binary, &shader->bc,
> +                                  info->pc, &use_kill);
>  #endif
>  #endif
>
> -       COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
> +       COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", 
> info->pc);
>
>
> -       evergreen_compute_upload_input(ctx_, block_layout, grid_layout, 
> input);
> -       compute_emit_cs(ctx, block_layout, grid_layout);
> +       evergreen_compute_upload_input(ctx_, info->block, info->grid, 
> info->input);
> +       compute_emit_cs(ctx, info->block, info->grid);
>  }
>
>  static void evergreen_set_compute_resources(struct pipe_context * ctx_,
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 6ef6eee..fa7a9ac 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -225,9 +225,7 @@ static unsigned compute_num_waves_for_scratch(
>  }
>
>  static void si_launch_grid(
> -               struct pipe_context *ctx,
> -               const uint *block_layout, const uint *grid_layout,
> -               uint32_t pc, const void *input)
> +               struct pipe_context *ctx, const struct pipe_grid_info *info)
>  {
>         struct si_context *sctx = (struct si_context*)ctx;
>         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
> @@ -247,7 +245,7 @@ static void si_launch_grid(
>         unsigned num_waves_for_scratch;
>
>  #if HAVE_LLVM < 0x0306
> -       shader = &program->kernels[pc];
> +       shader = &program->kernels[info->pc];
>  #endif
>
>
> @@ -267,7 +265,7 @@ static void si_launch_grid(
>
>  #if HAVE_LLVM >= 0x0306
>         /* Read the config information */
> -       si_shader_binary_read_config(&shader->binary, &shader->config, pc);
> +       si_shader_binary_read_config(&shader->binary, &shader->config, 
> info->pc);
>  #endif
>
>         /* Upload the kernel arguments */
> @@ -278,15 +276,16 @@ static void si_launch_grid(
>         kernel_args = sctx->b.ws->buffer_map(input_buffer->buf,
>                         sctx->b.gfx.cs, PIPE_TRANSFER_WRITE);
>         for (i = 0; i < 3; i++) {
> -               kernel_args[i] = grid_layout[i];
> -               kernel_args[i + 3] = grid_layout[i] * block_layout[i];
> -               kernel_args[i + 6] = block_layout[i];
> +               kernel_args[i] = info->grid[i];
> +               kernel_args[i + 3] = info->grid[i] * info->block[i];
> +               kernel_args[i + 6] = info->block[i];
>         }
>
>         num_waves_for_scratch = compute_num_waves_for_scratch(
> -               &sctx->screen->b.info, block_layout, grid_layout);
> +               &sctx->screen->b.info, info->block, info->grid);
>
> -       memcpy(kernel_args + (num_work_size_bytes / 4), input, 
> program->input_size);
> +       memcpy(kernel_args + (num_work_size_bytes / 4), info->input,
> +          program->input_size);
>
>         if (shader->config.scratch_bytes_per_wave > 0) {
>
> @@ -327,11 +326,11 @@ static void si_launch_grid(
>         si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0);
>
>         si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
> -                               S_00B81C_NUM_THREAD_FULL(block_layout[0]));
> +                               S_00B81C_NUM_THREAD_FULL(info->block[0]));
>         si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
> -                               S_00B820_NUM_THREAD_FULL(block_layout[1]));
> +                               S_00B820_NUM_THREAD_FULL(info->block[1]));
>         si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
> -                               S_00B824_NUM_THREAD_FULL(block_layout[2]));
> +                               S_00B824_NUM_THREAD_FULL(info->block[2]));
>
>         /* Global buffers */
>         for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
> @@ -361,7 +360,7 @@ static void si_launch_grid(
>         shader_va = shader->bo->gpu_address;
>
>  #if HAVE_LLVM >= 0x0306
> -       shader_va += pc;
> +       shader_va += info->pc;
>  #endif
>         radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
>                                   RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
> @@ -413,9 +412,9 @@ static void si_launch_grid(
>                 ;
>
>         si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
> -       si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */
> -       si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */
> -       si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */
> +       si_pm4_cmd_add(pm4, info->grid[0]); /* Thread groups DIM_X */
> +       si_pm4_cmd_add(pm4, info->grid[1]); /* Thread groups DIM_Y */
> +       si_pm4_cmd_add(pm4, info->grid[2]); /* Thread gropus DIM_Z */
>         si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
>          si_pm4_cmd_end(pm4, false);
>
> diff --git a/src/gallium/include/pipe/p_context.h 
> b/src/gallium/include/pipe/p_context.h
> index f69a75b..6698ce3 100644
> --- a/src/gallium/include/pipe/p_context.h
> +++ b/src/gallium/include/pipe/p_context.h
> @@ -48,6 +48,7 @@ struct pipe_constant_buffer;
>  struct pipe_debug_callback;
>  struct pipe_depth_stencil_alpha_state;
>  struct pipe_draw_info;
> +struct pipe_grid_info;
>  struct pipe_fence_handle;
>  struct pipe_framebuffer_state;
>  struct pipe_image_view;
> @@ -596,23 +597,9 @@ struct pipe_context {
>     /**
>      * Launch the compute kernel starting from instruction \a pc of the
>      * currently bound compute program.
> -    *
> -    * \a grid_layout and \a block_layout are arrays of size \a
> -    * PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the
> -    * grid (in block units) and working block (in thread units) to be
> -    * used, respectively.
> -    *
> -    * \a pc For drivers that use PIPE_SHADER_IR_LLVM as their prefered IR,
> -    * this value will be the index of the kernel in the opencl.kernels
> -    * metadata list.
> -    *
> -    * \a input will be used to initialize the INPUT resource, and it
> -    * should point to a buffer of at least
> -    * pipe_compute_state::req_input_mem bytes.
>      */
>     void (*launch_grid)(struct pipe_context *context,
> -                       const uint *block_layout, const uint *grid_layout,
> -                       uint32_t pc, const void *input);
> +                       const struct pipe_grid_info *info);
>     /*@}*/
>
>     /**
> diff --git a/src/gallium/include/pipe/p_state.h 
> b/src/gallium/include/pipe/p_state.h
> index 051856e..b5bc45a 100644
> --- a/src/gallium/include/pipe/p_state.h
> +++ b/src/gallium/include/pipe/p_state.h
> @@ -678,6 +678,33 @@ struct pipe_blit_info
>     boolean alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) 
> */
>  };
>
> +/**
> + * Information to describe a launch_grid call.
> + */
> +struct pipe_grid_info
> +{
> +   /**
> +    * For drivers that use PIPE_SHADER_IR_LLVM as their prefered IR, this 
> value
> +    * will be the index of the kernel in the opencl.kernels metadata list.
> +    */
> +   uint32_t pc;
> +
> +   /**
> +    * Will be used to initialize the INPUT resource, and it should point to a
> +    * buffer of at least pipe_compute_state::req_input_mem bytes.
> +    */
> +   void *input;
> +
> +   /**
> +    * Determine the layout of the working block (in thread units) to be used.
> +    */
> +   uint block[3];
> +
> +   /**
> +    * Determine the layout of the grid (in block units) to be used.
> +    */
> +   uint grid[3];
> +};
>
>  /**
>   * Structure used as a header for serialized LLVM programs.
> diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp 
> b/src/gallium/state_trackers/clover/core/kernel.cpp
> index a226ec1..41b3852 100644
> --- a/src/gallium/state_trackers/clover/core/kernel.cpp
> +++ b/src/gallium/state_trackers/clover/core/kernel.cpp
> @@ -55,6 +55,7 @@ kernel::launch(command_queue &q,
>     const auto reduced_grid_size =
>        map(divides(), grid_size, block_size);
>     void *st = exec.bind(&q, grid_offset);
> +   struct pipe_grid_info info;
>
>     // The handles are created during exec_context::bind(), so we need make
>     // sure to call exec_context::bind() before retrieving them.
> @@ -74,11 +75,13 @@ kernel::launch(command_queue &q,
>     q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
>                                exec.g_buffers.data(), g_handles.data());
>
> -   q.pipe->launch_grid(q.pipe,
> -                       pad_vector(q, block_size, 1).data(),
> -                       pad_vector(q, reduced_grid_size, 1).data(),
> -                       find(name_equals(_name), m.syms).offset,
> -                       exec.input.data());
> +   // Fill information for the launch_grid() call.
> +   info.block = pad_vector(q, block_size, 1).data(),
> +   info.grid = pad_vector(q, reduced_grid_size, 1).data(),
> +   info.pc = find(name_equals(_name), m.sysm).offset;
> +   info.input = exec.input.data();
> +
> +   q.pipe->launch_grid(q.pipe, &info);
>
>     q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
>     q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
> diff --git a/src/gallium/tests/trivial/compute.c 
> b/src/gallium/tests/trivial/compute.c
> index 5ce12ab..288cf2a 100644
> --- a/src/gallium/tests/trivial/compute.c
> +++ b/src/gallium/tests/trivial/compute.c
> @@ -424,8 +424,17 @@ static void launch_grid(struct context *ctx, const uint 
> *block_layout,
>                          const void *input)
>  {
>          struct pipe_context *pipe = ctx->pipe;
> +        struct pipe_grid_info info;
> +        int i;
> +
> +        for (i = 0; i < 3; i++) {
> +                info.block[i] = block_layout[i];
> +                info.grid[i] = grid_layout[i];
> +        }
> +        info.pc = pc;
> +        info.input = input;
>
> -        pipe->launch_grid(pipe, block_layout, grid_layout, pc, input);
> +        pipe->launch_grid(pipe, &info);
>  }
>
>  static void test_default_init(void *p, int s, int x, int y)
> --
> 2.6.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 06/17] gallium: add a new interface for pipe_context::launch_grid()

Reply via email to