On Wed, Apr 29, 2015 at 9:14 PM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > This takes a different approach to previously, we cannot index into the > inputMapping with anything but the mesa attribute index, so we can't use > the just add one to index trick, we need more info to add one to it > after we've mapped the input.
Almost certainly a failing on my part, but the above makes little sense to me. > > (Fixed copy propgation and cleaned up a little) > > v2: drop float64 format check, just attr->Doubles. > merge enable patch. > v3: cleanup code a bit. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/mesa/state_tracker/st_atom_array.c | 171 > ++++++++++++++++++++++------- > src/mesa/state_tracker/st_extensions.c | 4 +- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 23 +++- > src/mesa/state_tracker/st_program.c | 5 + > src/mesa/state_tracker/st_program.h | 1 + > 5 files changed, 159 insertions(+), 45 deletions(-) > > diff --git a/src/mesa/state_tracker/st_atom_array.c > b/src/mesa/state_tracker/st_atom_array.c > index d4fb8b8..0f2720b 100644 > --- a/src/mesa/state_tracker/st_atom_array.c > +++ b/src/mesa/state_tracker/st_atom_array.c > @@ -44,7 +44,6 @@ > > #include "cso_cache/cso_context.h" > #include "util/u_math.h" > - > #include "main/bufferobj.h" > #include "main/glformats.h" > > @@ -311,6 +310,17 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum > format, > return PIPE_FORMAT_NONE; /* silence compiler warning */ > } > > +static const struct gl_client_array * > +get_client_array(const struct st_vertex_program *vp, > + const struct gl_client_array **arrays, > + int attr) > +{ > + const GLuint mesaAttr = vp->index_to_input[attr]; > + /* st_program uses 0xffffffff to denote a double placeholder attribute */ > + if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER) > + return NULL; > + return arrays[mesaAttr]; > +} newline > /** > * Examine the active arrays to determine if we have interleaved > * vertex arrays all living in one VBO, or all living in user space. > @@ -327,11 +337,16 @@ is_interleaved_arrays(const struct st_vertex_program > *vp, > GLboolean userSpaceBuffer = GL_FALSE; > > for (attr = 0; attr < vpv->num_inputs; attr++) { > - const GLuint mesaAttr = vp->index_to_input[attr]; > - const struct gl_client_array *array = arrays[mesaAttr]; > - const struct gl_buffer_object *bufObj = array->BufferObj; > - const GLsizei stride = array->StrideB; /* in bytes */ > + const struct gl_client_array *array; > + const struct gl_buffer_object *bufObj; > + GLsizei stride; > > + array = get_client_array(vp, arrays, attr); > + if (!array) > + continue; > + > + stride = array->StrideB; /* in bytes */ > + bufObj = array->BufferObj; > if (attr == 0) { > /* save info about the first array */ > firstStride = stride; > @@ -358,6 +373,55 @@ is_interleaved_arrays(const struct st_vertex_program *vp, > return GL_TRUE; > } > > +static void init_velement(struct pipe_vertex_element *velement, > + int src_offset, int format, > + int instance_divisor, int vbo_index) > +{ > + velement->src_offset = src_offset; > + velement->src_format = format; > + velement->instance_divisor = instance_divisor; > + velement->vertex_buffer_index = vbo_index; > + assert(velement->src_format); > +} > + > +static void init_velement_lowered(struct st_context *st, > + struct pipe_vertex_element *velements, > + int src_offset, int format, > + int instance_divisor, int vbo_index, > + int nr_components, GLboolean doubles, > + GLuint *attr_idx) > +{ > + int idx = *attr_idx; > + if (doubles) { > + int lower_format; > + > + if (nr_components == 1) > + lower_format = PIPE_FORMAT_R32G32_UINT; > + else if (nr_components >= 2) > + lower_format = PIPE_FORMAT_R32G32B32A32_UINT; > + > + init_velement(&velements[idx], src_offset, > + lower_format, instance_divisor, vbo_index); > + idx++; > + > + if (nr_components > 2) { > + if (nr_components == 3) > + lower_format = PIPE_FORMAT_R32G32_UINT; > + else if (nr_components >= 4) > + lower_format = PIPE_FORMAT_R32G32B32A32_UINT; > + > + init_velement(&velements[idx], src_offset + 4 * sizeof(float), > + lower_format, instance_divisor, vbo_index); > + idx++; > + } > + } else { > + init_velement(&velements[idx], src_offset, > + format, instance_divisor, vbo_index); > + idx++; > + } > + *attr_idx = idx; > +} > + > /** > * Set up for drawing interleaved arrays that all live in one VBO > * or all live in user space. > @@ -365,13 +429,15 @@ is_interleaved_arrays(const struct st_vertex_program > *vp, > * \param velements returns vertex element info > */ > static boolean > -setup_interleaved_attribs(const struct st_vertex_program *vp, > +setup_interleaved_attribs(struct st_context *st, > + const struct st_vertex_program *vp, > const struct st_vp_variant *vpv, > const struct gl_client_array **arrays, > struct pipe_vertex_buffer *vbuffer, > - struct pipe_vertex_element velements[]) > + struct pipe_vertex_element velements[], > + unsigned *num_velements) > { > - GLuint attr; > + GLuint attr, attr_idx; > const GLubyte *low_addr = NULL; > GLboolean usingVBO; /* all arrays in a VBO? */ > struct gl_buffer_object *bufobj; > @@ -381,8 +447,10 @@ setup_interleaved_attribs(const struct st_vertex_program > *vp, > * Init bufobj and stride. > */ > if (vpv->num_inputs) { > - const GLuint mesaAttr0 = vp->index_to_input[0]; > - const struct gl_client_array *array = arrays[mesaAttr0]; > + const struct gl_client_array *array; > + > + array = get_client_array(vp, arrays, 0); > + assert(array); > > /* Since we're doing interleaved arrays, we know there'll be at most > * one buffer object and the stride will be the same for all arrays. > @@ -394,7 +462,11 @@ setup_interleaved_attribs(const struct st_vertex_program > *vp, > low_addr = arrays[vp->index_to_input[0]]->Ptr; > > for (attr = 1; attr < vpv->num_inputs; attr++) { > - const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr; > + const GLubyte *start; > + array = get_client_array(vp, arrays, attr); > + if (!array) > + continue; > + start = array->Ptr; > low_addr = MIN2(low_addr, start); > } > } > @@ -408,25 +480,33 @@ setup_interleaved_attribs(const struct > st_vertex_program *vp, > /* are the arrays in user space? */ > usingVBO = _mesa_is_bufferobj(bufobj); > > + attr_idx = 0; > for (attr = 0; attr < vpv->num_inputs; attr++) { > - const GLuint mesaAttr = vp->index_to_input[attr]; > - const struct gl_client_array *array = arrays[mesaAttr]; > - unsigned src_offset = (unsigned) (array->Ptr - low_addr); > + const struct gl_client_array *array; > + unsigned src_offset; > + unsigned src_format; > + > + array = get_client_array(vp, arrays, attr); > + if (!array) > + continue; Is this the reason you can't store attr+1 in there? > > + src_offset = (unsigned) (array->Ptr - low_addr); > assert(array->_ElementSize == > _mesa_bytes_per_vertex_attrib(array->Size, array->Type)); > > - velements[attr].src_offset = src_offset; > - velements[attr].instance_divisor = array->InstanceDivisor; > - velements[attr].vertex_buffer_index = 0; > - velements[attr].src_format = st_pipe_vertex_format(array->Type, > - array->Size, > - array->Format, > - array->Normalized, > - array->Integer); > - assert(velements[attr].src_format); > + src_format = st_pipe_vertex_format(array->Type, > + array->Size, > + array->Format, > + array->Normalized, > + array->Integer); > + > + init_velement_lowered(st, velements, src_offset, src_format, > + array->InstanceDivisor, 0, > + array->Size, array->Doubles, &attr_idx); > } > > + *num_velements = attr_idx; > + > /* > * Return the vbuffer info and setup user-space attrib info, if needed. > */ > @@ -472,17 +552,25 @@ setup_non_interleaved_attribs(struct st_context *st, > const struct st_vp_variant *vpv, > const struct gl_client_array **arrays, > struct pipe_vertex_buffer vbuffer[], > - struct pipe_vertex_element velements[]) > + struct pipe_vertex_element velements[], > + unsigned *num_velements) > { > struct gl_context *ctx = st->ctx; > - GLuint attr; > + GLuint attr, attr_idx = 0; > > for (attr = 0; attr < vpv->num_inputs; attr++) { > const GLuint mesaAttr = vp->index_to_input[attr]; > - const struct gl_client_array *array = arrays[mesaAttr]; > - struct gl_buffer_object *bufobj = array->BufferObj; > - GLsizei stride = array->StrideB; > + const struct gl_client_array *array; > + struct gl_buffer_object *bufobj; > + GLsizei stride; > + unsigned src_format; > > + array = get_client_array(vp, arrays, attr); > + if (!array) > + continue; > + > + stride = array->StrideB; > + bufobj = array->BufferObj; > assert(array->_ElementSize == > _mesa_bytes_per_vertex_attrib(array->Size, array->Type)); > > @@ -524,16 +612,19 @@ setup_non_interleaved_attribs(struct st_context *st, > /* common-case setup */ > vbuffer[attr].stride = stride; /* in bytes */ > > - velements[attr].src_offset = 0; > - velements[attr].instance_divisor = array->InstanceDivisor; > - velements[attr].vertex_buffer_index = attr; > - velements[attr].src_format = st_pipe_vertex_format(array->Type, > - array->Size, > - array->Format, > - array->Normalized, > - array->Integer); > - assert(velements[attr].src_format); > + src_format = st_pipe_vertex_format(array->Type, > + array->Size, > + array->Format, > + array->Normalized, > + array->Integer); > + > + init_velement_lowered(st, velements, 0, src_format, > + array->InstanceDivisor, attr, > + array->Size, array->Doubles, &attr_idx); > + > } > + > + *num_velements = attr_idx; > return TRUE; > } > > @@ -563,25 +654,23 @@ static void update_array(struct st_context *st) > * Setup the vbuffer[] and velements[] arrays. > */ > if (is_interleaved_arrays(vp, vpv, arrays)) { > - if (!setup_interleaved_attribs(vp, vpv, arrays, vbuffer, velements)) { > + if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, > velements, &num_velements)) { > st->vertex_array_out_of_memory = TRUE; > return; > } > > num_vbuffers = 1; > - num_velements = vpv->num_inputs; > if (num_velements == 0) > num_vbuffers = 0; > } > else { > if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer, > - velements)) { > + velements, &num_velements)) { > st->vertex_array_out_of_memory = TRUE; > return; > } > > num_vbuffers = vpv->num_inputs; > - num_velements = vpv->num_inputs; > } > > cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer); > diff --git a/src/mesa/state_tracker/st_extensions.c > b/src/mesa/state_tracker/st_extensions.c > index 82e4a30..b1057f3 100644 > --- a/src/mesa/state_tracker/st_extensions.c > +++ b/src/mesa/state_tracker/st_extensions.c > @@ -909,6 +909,8 @@ void st_init_extensions(struct pipe_screen *screen, > if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX, > PIPE_SHADER_CAP_DOUBLES) && > screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, > - PIPE_SHADER_CAP_DOUBLES)) > + PIPE_SHADER_CAP_DOUBLES)) { > extensions->ARB_gpu_shader_fp64 = GL_TRUE; > + extensions->ARB_vertex_attrib_64bit = GL_TRUE; > + } > } > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 08957dc..70fbfb1 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -88,6 +88,7 @@ public: > this->reladdr = NULL; > this->reladdr2 = NULL; > this->has_index2 = false; > + this->double_reg2 = false; > } > > st_src_reg(gl_register_file file, int index, int type) > @@ -101,6 +102,7 @@ public: > this->reladdr = NULL; > this->reladdr2 = NULL; > this->has_index2 = false; > + this->double_reg2 = false; > } > > st_src_reg(gl_register_file file, int index, int type, int index2D) > @@ -114,6 +116,7 @@ public: > this->reladdr = NULL; > this->reladdr2 = NULL; > this->has_index2 = false; > + this->double_reg2 = false; > } > > st_src_reg() > @@ -127,6 +130,7 @@ public: > this->reladdr = NULL; > this->reladdr2 = NULL; > this->has_index2 = false; > + this->double_reg2 = false; > } > > explicit st_src_reg(st_dst_reg reg); > @@ -141,6 +145,7 @@ public: > st_src_reg *reladdr; > st_src_reg *reladdr2; > bool has_index2; > + bool double_reg2; Perhaps a word about what this is? Unlike all the other entries, it's not self-explanatory. I guess it's to indicate a source that's the "second" piece of a double-wide value? > }; > > class st_dst_reg { > @@ -197,6 +202,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) > this->index2D = 0; > this->reladdr2 = NULL; > this->has_index2 = false; > + this->double_reg2 = false; > } > > st_dst_reg::st_dst_reg(st_src_reg reg) > @@ -677,8 +683,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned > op, > > if (dinst->src[j].type == GLSL_TYPE_DOUBLE) { > dinst->src[j].index = initial_src_idx[j]; > - if (swz > 1) > + if (swz > 1) { > + dinst->src[j].double_reg2 = true; > dinst->src[j].index++; > + } > > if (swz & 1) > dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, > SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); > @@ -3705,6 +3713,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) > } else { > if (first->src[0].file != copy_chan->src[0].file || > first->src[0].index != copy_chan->src[0].index || > + first->src[0].double_reg2 != > copy_chan->src[0].double_reg2 || > first->src[0].index2D != copy_chan->src[0].index2D) { > good = false; > break; > @@ -3720,6 +3729,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) > inst->src[r].index = first->src[0].index; > inst->src[r].index2D = first->src[0].index2D; > inst->src[r].has_index2 = first->src[0].has_index2; > + inst->src[r].double_reg2 = first->src[0].double_reg2; > > int swizzle = 0; > for (int i = 0; i < 4; i++) { > @@ -4552,6 +4562,9 @@ dst_register(struct st_translate *t, > static struct ureg_src > src_register(struct st_translate *t, const st_src_reg *reg) > { > + int index = reg->index; > + int double_reg2 = reg->double_reg2 ? 1 : 0; > + > switch(reg->file) { > case PROGRAM_UNDEFINED: > return ureg_imm4f(t->ureg, 0, 0, 0, 0); > @@ -4577,8 +4590,12 @@ src_register(struct st_translate *t, const st_src_reg > *reg) > return t->immediates[reg->index]; > > case PROGRAM_INPUT: > - assert(t->inputMapping[reg->index] < ARRAY_SIZE(t->inputs)); > - return t->inputs[t->inputMapping[reg->index]]; > + /* GLSL inputs are 64-bit containers, so we have to > + * map back to the original index and add the offset after > + * mapping. */ > + index -= double_reg2; > + assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); > + return t->inputs[t->inputMapping[index] + double_reg2]; Just a thought -- why not make t->inputMapping[index+1] contain t->inputMapping[index]+1? That'd make these gymnastics less necessary, right? Perhaps it's complicated though... > > case PROGRAM_OUTPUT: > assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs)); > diff --git a/src/mesa/state_tracker/st_program.c > b/src/mesa/state_tracker/st_program.c > index d93b3c7..a9110d3 100644 > --- a/src/mesa/state_tracker/st_program.c > +++ b/src/mesa/state_tracker/st_program.c > @@ -194,6 +194,11 @@ st_prepare_vertex_program(struct gl_context *ctx, > stvp->input_to_index[attr] = stvp->num_inputs; > stvp->index_to_input[stvp->num_inputs] = attr; > stvp->num_inputs++; > + if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) > { > + /* add placeholder for second part of a double attribute */ > + stvp->index_to_input[stvp->num_inputs] = > ST_DOUBLE_ATTRIB_PLACEHOLDER; > + stvp->num_inputs++; Why not just set this to the right thing? Or is there no right thing? Seems like you end up using it as though it were attr + 1... > + } > } > } > /* bit of a hack, presetup potentially unused edgeflag input */ > diff --git a/src/mesa/state_tracker/st_program.h > b/src/mesa/state_tracker/st_program.h > index b2c86fa..a2c5606 100644 > --- a/src/mesa/state_tracker/st_program.h > +++ b/src/mesa/state_tracker/st_program.h > @@ -45,6 +45,7 @@ > extern "C" { > #endif > > +#define ST_DOUBLE_ATTRIB_PLACEHOLDER 0xffffffff > > /** Fragment program variant key */ > struct st_fp_variant_key > -- > 2.1.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev