On 08/27/2014 02:30 PM, Jordan Justen wrote: > From: Paul Berry <stereotype...@gmail.com> > > The hardware state for compute shaders is almost entirely orthogonal > to the hardware state for 3D rendering. To avoid sending unnecessary > state to the hardware, we'll need to have a separate set of state > atoms for the compute pipeline and the 3D pipeline. That means we > need to maintain two separate sets of dirty bits to determine which > state atoms need to be run. > > But the dirty bits are not completely independent; for example, if > BRW_NEW_SURFACES is flagged while doing 3D rendering, then not only do > we need to re-run 3D state atoms that depend on BRW_NEW_SURFACES, but > we also need to re-run compute state atoms that depend on > BRW_NEW_SURFACES. But we'll also need to re-run those state atoms the > next time the compute pipeline is run. > > To accomplish this, we record two sets of dirty bits, one for each > pipeline. When bits are dirtied (via SET_DIRTY_BIT() or > SET_DIRTY_ALL()) we set them to the dirty state in both pipelines. > When brw_state_upload() is run, we clear the dirty bits just for the > pipeline that was run. > > Note that since the number of pipelines is known at compile time to be > 2, the compiler should unroll the loops in SET_DIRTY_BIT() and > SET_DIRTY_ALL(). > > Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com> > --- > src/mesa/drivers/dri/i965/brw_context.h | 40 > ++++++++++++++++++++++++---- > src/mesa/drivers/dri/i965/brw_draw.c | 8 +++--- > src/mesa/drivers/dri/i965/brw_state.h | 4 +-- > src/mesa/drivers/dri/i965/brw_state_upload.c | 14 ++++++---- > 4 files changed, 50 insertions(+), 16 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index a44b77f..bab0f39 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -243,9 +243,33 @@ struct brw_state_flags { > > > /** > + * Enum representing the different pipelines. > + */ > +typedef enum { > + /** > + * 3D rendering pipeline (vertex through fragment shader). > + */ > + BRW_PIPELINE_3D, > + > + /** > + * Compute shader pipeline. > + */ > + BRW_PIPELINE_COMPUTE, > + > + BRW_NUM_PIPELINES > +} brw_pipeline; > + > + > +/** > * Set one of the bits in a field of brw_state_flags. > */ > -#define SET_DIRTY_BIT(FIELD, FLAG) brw->state.dirty.FIELD |= (FLAG) > +#define SET_DIRTY_BIT(FIELD, FLAG) \ > + do { \ > + int which_pipeline; \ > + for (which_pipeline = 0; which_pipeline < BRW_NUM_PIPELINES; \
We use C99 in the driver, so this can be: for (int which_pipeline = 0; which_pipeline < BRW_NUM_PIPELINES; \ I might also be tempted to change the variable name to which_pipe... so that it all fits on one line. *shrug* > + which_pipeline++) \ > + brw->state.pipeline_dirty[which_pipeline].FIELD |= (FLAG); \ > + } while (false) > > > /** > @@ -253,16 +277,21 @@ struct brw_state_flags { > */ > #define SET_DIRTY_ALL(FIELD) \ > do { \ > + int which_pipeline; \ > /* ~0 == 0xffffffff, so make sure field is <= 32 bits */ \ > - STATIC_ASSERT(sizeof(brw->state.dirty.FIELD) == 4); \ > - brw->state.dirty.FIELD = ~0; \ > + STATIC_ASSERT(sizeof(brw->state.pipeline_dirty[0].FIELD) == 4); \ > + for (which_pipeline = 0; which_pipeline < BRW_NUM_PIPELINES; \ > + which_pipeline++) \ > + brw->state.pipeline_dirty[which_pipeline].FIELD = ~0; \ > } while (false) > > > /** > * Check one of the bits in a field of brw_state_flags. > */ > -#define CHECK_DIRTY_BIT(FIELD, FLAG) ((brw->state.dirty.FIELD & (FLAG)) != 0) > +#define CHECK_DIRTY_BIT(FIELD, FLAG) \ > + ((brw->state.pipeline_dirty[brw->state.current_pipeline].FIELD & (FLAG)) \ > + != 0) > > > /** Subclass of Mesa vertex program */ > @@ -1071,7 +1100,8 @@ struct brw_context > > GLuint NewGLState; > struct { > - struct brw_state_flags dirty; > + struct brw_state_flags pipeline_dirty[BRW_NUM_PIPELINES]; > + brw_pipeline current_pipeline; > } state; > > struct brw_cache cache; > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > b/src/mesa/drivers/dri/i965/brw_draw.c > index 2b773c1..3bdbb43 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw.c > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > @@ -447,9 +447,9 @@ retry: > * *_set_prim or intel_batchbuffer_flush(), which only impacts > * brw->state.dirty.brw. > */ > - if (brw->state.dirty.brw) { > + if (brw->state.pipeline_dirty[BRW_PIPELINE_3D].brw) { > brw->no_batch_wrap = true; > - brw_upload_state(brw); > + brw_upload_state(brw, BRW_PIPELINE_3D); > } > > brw_emit_prim(brw, &prims[i], brw->primitive); > @@ -480,8 +480,8 @@ retry: > /* Now that we know we haven't run out of aperture space, we can safely > * reset the dirty bits. > */ > - if (brw->state.dirty.brw) > - brw_clear_dirty_bits(brw); > + if (brw->state.pipeline_dirty[BRW_PIPELINE_3D].brw) > + brw_clear_dirty_bits(brw, BRW_PIPELINE_3D); > } > > if (brw->always_flush_batch) > diff --git a/src/mesa/drivers/dri/i965/brw_state.h > b/src/mesa/drivers/dri/i965/brw_state.h > index bbaa85c..69657c1 100644 > --- a/src/mesa/drivers/dri/i965/brw_state.h > +++ b/src/mesa/drivers/dri/i965/brw_state.h > @@ -160,8 +160,8 @@ brw_depthbuffer_format(struct brw_context *brw); > /*********************************************************************** > * brw_state.c > */ > -void brw_upload_state(struct brw_context *brw); > -void brw_clear_dirty_bits(struct brw_context *brw); > +void brw_upload_state(struct brw_context *brw, brw_pipeline pipeline); > +void brw_clear_dirty_bits(struct brw_context *brw, brw_pipeline pipeline); > void brw_init_state(struct brw_context *brw); > void brw_destroy_state(struct brw_context *brw); > > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c > b/src/mesa/drivers/dri/i965/brw_state_upload.c > index 7324274..8e45f29 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c > @@ -389,7 +389,8 @@ void brw_init_state( struct brw_context *brw ) > /* Make sure that brw->state.dirty.brw has enough bits to hold all > possible > * dirty flags. > */ > - STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw)); > + STATIC_ASSERT(BRW_NUM_STATE_BITS <= > + 8 * sizeof(brw->state.pipeline_dirty[0].brw)); > > ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK; > ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK; > @@ -565,13 +566,16 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map) > /*********************************************************************** > * Emit all state: > */ > -void brw_upload_state(struct brw_context *brw) > +void brw_upload_state(struct brw_context *brw, brw_pipeline pipeline) > { > struct gl_context *ctx = &brw->ctx; > - struct brw_state_flags *state = &brw->state.dirty; > + struct brw_state_flags *state = &brw->state.pipeline_dirty[pipeline]; > int i; > static int dirty_count = 0; > > + assert(0 <= pipeline && pipeline < BRW_NUM_PIPELINES); > + brw->state.current_pipeline = pipeline; > + > SET_DIRTY_BIT(mesa, brw->NewGLState); > brw->NewGLState = 0; > > @@ -677,8 +681,8 @@ void brw_upload_state(struct brw_context *brw) > * brw_upload_state() call. > */ > void > -brw_clear_dirty_bits(struct brw_context *brw) > +brw_clear_dirty_bits(struct brw_context *brw, brw_pipeline pipeline) > { > - struct brw_state_flags *state = &brw->state.dirty; > + struct brw_state_flags *state = &brw->state.pipeline_dirty[pipeline]; > memset(state, 0, sizeof(*state)); > } > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev