Luca, In this change you've got an int value (copy_size) which has some special meaning when negative -- can you add comments explaining what the meaning of a negative size is? Is there a way to use some more explicit flag value to indicate this condition?
Keith On Thu, 2010-08-12 at 10:08 -0700, Luca Barbieri wrote: > When used in GPU drivers, translate can be used to simultaneously > perform a gather operation, and convert away from unsupported formats. > > In this use case, input and output formats will often be identical: clearly > it would make sense to use a memcpy in this case. > > Instead, translate will insist to convert to and from 32-bit floating point > numbers. > > This is not only extremely expensive, but it also loses precision for > 32/64-bit integers and 64-bit floating point numbers. > > This patch changes translate_generic to just use memcpy if the formats are > identical, non-blocked, and with an integral number of bytes per pixel (note > that all sensible vertex formats are like this). > --- > .../auxiliary/translate/translate_generic.c | 93 +++++++++++++------ > 1 files changed, 63 insertions(+), 30 deletions(-) > > diff --git a/src/gallium/auxiliary/translate/translate_generic.c > b/src/gallium/auxiliary/translate/translate_generic.c > index 42cfd76..57a42b7 100644 > --- a/src/gallium/auxiliary/translate/translate_generic.c > +++ b/src/gallium/auxiliary/translate/translate_generic.c > @@ -63,6 +63,7 @@ struct translate_generic { > const uint8_t *input_ptr; > unsigned input_stride; > unsigned max_index; > + int copy_size; > > } attrib[PIPE_MAX_ATTRIBS]; > > @@ -380,9 +381,10 @@ static void PIPE_CDECL generic_run_elts( struct > translate *translate, > float data[4]; > char *dst = vert + tg->attrib[attr].output_offset; > > - if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { > + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { > const uint8_t *src; > unsigned index; > + int copy_size; > > if (tg->attrib[attr].instance_divisor) { > index = instance_id / tg->attrib[attr].instance_divisor; > @@ -396,27 +398,34 @@ static void PIPE_CDECL generic_run_elts( struct > translate *translate, > src = tg->attrib[attr].input_ptr + > tg->attrib[attr].input_stride * index; > > - tg->attrib[attr].fetch( data, src, 0, 0 ); > - > - if (0) > - debug_printf("Fetch elt attr %d from %p stride %d div %u > max %u index %d: " > - " %f, %f, %f, %f \n", > - attr, > - tg->attrib[attr].input_ptr, > - tg->attrib[attr].input_stride, > - tg->attrib[attr].instance_divisor, > - tg->attrib[attr].max_index, > - index, > - data[0], data[1],data[2], data[3]); > + copy_size = tg->attrib[attr].copy_size; > + if(likely(copy_size >= 0)) > + memcpy(dst, src, tg->attrib[attr].copy_size); > + else > + { > + tg->attrib[attr].fetch( data, src, 0, 0 ); > + > + if (0) > + debug_printf("Fetch elt attr %d from %p stride %d div > %u max %u index %d: " > + " %f, %f, %f, %f \n", > + attr, > + tg->attrib[attr].input_ptr, > + tg->attrib[attr].input_stride, > + tg->attrib[attr].instance_divisor, > + tg->attrib[attr].max_index, > + index, > + data[0], data[1],data[2], data[3]); > + tg->attrib[attr].emit( data, dst ); > + } > } else { > - data[0] = (float)instance_id; > + if(likely(tg->attrib[attr].copy_size >= 0)) > + memcpy(data, &instance_id, 4); > + else > + { > + data[0] = (float)instance_id; > + tg->attrib[attr].emit( data, dst ); > + } > } > - > - if (0) > - debug_printf("vert %d/%d attr %d: %f %f %f %f\n", > - i, elt, attr, data[0], data[1], data[2], data[3]); > - > - tg->attrib[attr].emit( data, dst ); > } > vert += tg->translate.key.output_stride; > } > @@ -448,6 +457,7 @@ static void PIPE_CDECL generic_run( struct translate > *translate, > if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { > const uint8_t *src; > unsigned index; > + int copy_size; > > if (tg->attrib[attr].instance_divisor) { > index = instance_id / tg->attrib[attr].instance_divisor; > @@ -462,25 +472,33 @@ static void PIPE_CDECL generic_run( struct translate > *translate, > src = tg->attrib[attr].input_ptr + > tg->attrib[attr].input_stride * index; > > - tg->attrib[attr].fetch( data, src, 0, 0 ); > + copy_size = tg->attrib[attr].copy_size; > + if(likely(copy_size >= 0)) > + memcpy(dst, src, tg->attrib[attr].copy_size); > + else > + { > + tg->attrib[attr].fetch( data, src, 0, 0 ); > > - if (0) > - debug_printf("Fetch linear attr %d from %p stride %d index > %d: " > + if (0) > + debug_printf("Fetch linear attr %d from %p stride %d > index %d: " > " %f, %f, %f, %f \n", > attr, > tg->attrib[attr].input_ptr, > tg->attrib[attr].input_stride, > index, > data[0], data[1],data[2], data[3]); > + > + tg->attrib[attr].emit( data, dst ); > + } > } else { > - data[0] = (float)instance_id; > + if(likely(tg->attrib[attr].copy_size >= 0)) > + memcpy(data, &instance_id, 4); > + else > + { > + data[0] = (float)instance_id; > + tg->attrib[attr].emit( data, dst ); > + } > } > - > - if (0) > - debug_printf("vert %d attr %d: %f %f %f %f\n", > - i, attr, data[0], data[1], data[2], data[3]); > - > - tg->attrib[attr].emit( data, dst ); > } > > vert += tg->translate.key.output_stride; > @@ -547,6 +565,21 @@ struct translate *translate_generic_create( const struct > translate_key *key ) > tg->attrib[i].emit = get_emit_func(key->element[i].output_format); > tg->attrib[i].output_offset = key->element[i].output_offset; > > + tg->attrib[i].copy_size = -1; > + if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID) > + { > + if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED > + || key->element[i].output_format == > PIPE_FORMAT_R32_SSCALED) > + tg->attrib[i].copy_size = 4; > + } > + else > + { > + if(key->element[i].input_format == key->element[i].output_format > + && format_desc->block.width == 1 > + && format_desc->block.height == 1 > + && !(format_desc->block.bits & 7)) > + tg->attrib[i].copy_size = format_desc->block.bits >> 3; > + } > } > > tg->nr_attrib = key->nr_elements; _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev