Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- docs/GL3.txt | 2 +- docs/relnotes/11.2.0.html | 1 + src/gallium/drivers/nouveau/nvc0/mme/com9097.mme | 50 ++++++++++ src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 34 +++++++ src/gallium/drivers/nouveau/nvc0/nvc0_macros.h | 2 + src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 19 ++++ src/gallium/drivers/nouveau/nvc0/nvc0_query.h | 7 ++ src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 106 +++++++++++++++++++++ src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 3 +- 9 files changed, 222 insertions(+), 2 deletions(-)
diff --git a/docs/GL3.txt b/docs/GL3.txt index 7623ada..257fc73 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -186,7 +186,7 @@ GL 4.4, GLSL 4.40: - specified transform/feedback layout in progress - input/output block locations DONE GL_ARB_multi_bind DONE (all drivers) - GL_ARB_query_buffer_object not started + GL_ARB_query_buffer_object DONE (nvc0) GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html index 404e293..c35ee9a 100644 --- a/docs/relnotes/11.2.0.html +++ b/docs/relnotes/11.2.0.html @@ -48,6 +48,7 @@ Note: some of the new features are only available with certain drivers. <li>GL_ARB_compute_shader on i965</li> <li>GL_ARB_copy_image on r600</li> <li>GL_ARB_indirect_parameters on nvc0</li> +<li>GL_ARB_query_buffer_object on nvc0</li> <li>GL_ARB_shader_atomic_counters on nvc0</li> <li>GL_ARB_shader_draw_parameters on i965, nvc0</li> <li>GL_ARB_shader_storage_buffer_object on nvc0</li> diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme index 4daa57d..c3dba96 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme @@ -491,3 +491,53 @@ daic_runout: daic_runout_check: branz annul $r7 #daic_runout bra annul #daic_restore + +/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE: + * + * This is a combination macro for all of our query buffer object needs. + * It has the option to clamp results to a configurable amount, as well as + * to write out one or two words. + * + * We use the query engine to write out the values, and expect the query + * address to point to the right place. + * + * arg = clamp value (0 means unclamped). clamped means just 1 written value. + * parm[0] = LSB of end value + * parm[1] = MSB of end value + * parm[2] = LSB of start value + * parm[3] = MSB of start value + * parm[4] = desired sequence + * parm[5] = actual sequence + */ +.section #mme9097_query_buffer_write + maddrsend 0x44 /* SERIALIZE */ + parm $r2 + parm $r3 + parm $r4 + parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */ + parm $r6 + parm $r7 + mov $r6 (sub $r7 $r6) /* actual - desired */ + mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */ + braz annul $r6 #qbw_ready + exit +qbw_ready: + mov $r2 (sub $r2 $r4) + braz $r1 #qbw_postclamp + mov $r3 (sbb $r3 $r5) + branz annul $r3 #qbw_clamp + mov $r4 (sub $r1 $r2) + mov $r4 (sbb 0x0 0x0) + braz annul $r4 #qbw_postclamp +qbw_clamp: + mov $r2 $r1 +qbw_postclamp: + send $r2 + mov $r4 0x1000 + branz annul $r1 #qbw_done + send (extrinsrt 0x0 $r4 0x0 0x10 0x10) + maddr 0x16c2 /* QUERY_SEQUENCE */ + send $r3 +qbw_done: + exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10) + maddrsend 0x44 /* SERIALIZE */ diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h index bf8625e..ac65d4b 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h @@ -332,3 +332,37 @@ uint32_t mme9097_draw_arrays_indirect_count[] = { 0xfffef837, 0xfffdc027, }; + +uint32_t mme9097_query_buffer_write[] = { + 0x00110071, + 0x00000201, +/* 0x000b: qbw_ready */ + 0x00000301, + 0x00000401, +/* 0x0012: qbw_clamp */ +/* 0x0013: qbw_postclamp */ + 0x05b08551, + 0x00000601, +/* 0x0019: qbw_done */ + 0x00000701, + 0x0005be10, + 0x00060610, + 0x0000b027, + 0x00000091, + 0x00051210, + 0x0001c807, + 0x00075b10, + 0x00011837, + 0x00048c10, + 0x00060410, + 0x0000a027, + 0x00000a11, + 0x00001041, + 0x04000411, + 0x00010837, + 0x84010042, + 0x05b08021, + 0x00001841, + 0x840100c2, + 0x00110071, +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h index 27c026b..49e176c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h @@ -33,4 +33,6 @@ #define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850 +#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858 + #endif /* __NVC0_MACROS_H__ */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index 7497317..9b07841 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -74,6 +74,24 @@ nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq, } static void +nvc0_get_query_result_resource(struct pipe_context *pipe, + struct pipe_query *pq, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset) +{ + struct nvc0_query *q = nvc0_query(pq); + if (!q->funcs->get_query_result_resource) { + assert(!"Unexpected lack of get_query_result_resource"); + return; + } + q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type, + index, resource, offset); +} + +static void nvc0_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, uint mode) @@ -262,6 +280,7 @@ nvc0_init_query_functions(struct nvc0_context *nvc0) pipe->begin_query = nvc0_begin_query; pipe->end_query = nvc0_end_query; pipe->get_query_result = nvc0_get_query_result; + pipe->get_query_result_resource = nvc0_get_query_result_resource; pipe->render_condition = nvc0_render_condition; nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h index c46361c..a887b22 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h @@ -14,6 +14,13 @@ struct nvc0_query_funcs { void (*end_query)(struct nvc0_context *, struct nvc0_query *); boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *, boolean, union pipe_query_result *); + void (*get_query_result_resource)(struct nvc0_context *nvc0, + struct nvc0_query *q, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset); }; struct nvc0_query { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 1bed016..34b4b46 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -358,11 +358,117 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, return true; } +static void +nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, + struct nvc0_query *q, + boolean wait, + enum pipe_query_value_type result_type, + int index, + struct pipe_resource *resource, + unsigned offset) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_hw_query *hq = nvc0_hw_query(q); + struct nv04_resource *buf = nv04_resource(resource); + unsigned stride; + + assert(!hq->funcs || !hq->funcs->get_query_result); + + if (index == -1) { + if (hq->state != NVC0_HW_QUERY_STATE_READY) + nvc0_hw_query_update(nvc0->screen->base.client, q); + uint64_t ready = hq->state == NVC0_HW_QUERY_STATE_READY; + nvc0->base.push_cb(&nvc0->base, buf, offset, + result_type >= QUERY_I64 ? 2 : 1, (uint32_t *)&ready); + return; + } + + /* We either need to compute a 32- or 64-bit difference between 2 values, + * and then store the result as either a 32- or 64-bit value. As such let's + * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit + * ones), and have one macro that clamps result to i32, u32, or just + * outputs the difference (no need to worry about 64-bit clamping). + */ + if (wait) { + nvc0_hw_query_fifo_wait(push, q); + } + nouveau_pushbuf_space(push, 16, 2, 0); + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR); + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7); + if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */ + PUSH_DATA(push, 0x00000001); + else if (result_type == QUERY_I32) + PUSH_DATA(push, 0x7fffffff); + else if (result_type == QUERY_U32) + PUSH_DATA(push, 0xffffffff); + else + PUSH_DATA(push, 0x00000000); + + switch (q->type) { + case PIPE_QUERY_SO_STATISTICS: + stride = 2; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + stride = 12; + break; + default: + assert(index == 0); + stride = 1; + break; + } + + if (hq->is64bit) { + nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index, + 8 | NVC0_IB_ENTRY_1_NO_PREFETCH); + nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride), + 8 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } else { + nouveau_pushbuf_data(push, hq->bo, hq->offset + 4, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + PUSH_DATA(push, 0); + nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + PUSH_DATA(push, 0); + } + + if (!hq->is64bit) { + PUSH_DATA(push, hq->sequence); + if (hq->state != NVC0_HW_QUERY_STATE_READY) { + nouveau_pushbuf_data(push, hq->bo, hq->offset, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } else { + PUSH_DATA(push, hq->sequence); + } + } else { + if (nouveau_fence_signalled(hq->fence)) { + PUSH_DATA(push, 0); + PUSH_DATA(push, 0); + } else { + PUSH_DATA(push, hq->fence->sequence); + nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0, + 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); + } + } + + if (buf->mm) { + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); + } + + nvc0->cb_dirty = true; +} + static const struct nvc0_query_funcs hw_query_funcs = { .destroy_query = nvc0_hw_destroy_query, .begin_query = nvc0_hw_begin_query, .end_query = nvc0_hw_end_query, .get_query_result = nvc0_hw_get_query_result, + .get_query_result_resource = nvc0_hw_get_query_result_resource, }; struct nvc0_query * diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 2911a77..4988803 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -191,6 +191,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_QUERY_BUFFER_OBJECT: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -217,7 +218,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: - case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_VENDOR_ID: @@ -1047,6 +1047,7 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); + MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); -- 2.4.10 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev