On Fri, Oct 28, 2011 at 7:42 PM, Ian Romanick <i...@freedesktop.org> wrote: > diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp > index db2f200..50a724b 100644 > --- a/src/mesa/main/uniform_query.cpp > +++ b/src/mesa/main/uniform_query.cpp > @@ -22,15 +22,16 @@ > * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. > */ > +#include <stdlib.h> > #include "main/core.h" > #include "main/context.h" > #include "ir.h" > #include "ir_uniform.h" > +#include "program/hash_table.h" > #include "../glsl/program.h" > #include "../glsl/ir_uniform.h" > > extern "C" { > -#include "main/image.h" > #include "main/shaderapi.h" > #include "main/shaderobj.h" > #include "uniforms.h" > @@ -44,42 +45,30 @@ _mesa_GetActiveUniformARB(GLhandleARB program, GLuint > index, > GET_CURRENT_CONTEXT(ctx); > struct gl_shader_program *shProg = > _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform"); > - const struct gl_program_parameter *param; > > if (!shProg) > return; > > - if (!shProg->Uniforms || index >= shProg->Uniforms->NumUniforms) { > + if (index >= shProg->NumUserUniformStorage) { > _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform(index)"); > return; > } > > - param = get_uniform_parameter(shProg, index); > - if (!param) > - return; > - > - const struct gl_uniform *const uni = &shProg->Uniforms->Uniforms[index]; > + const struct gl_uniform_storage *const uni = > &shProg->UniformStorage[index]; > > if (nameOut) { > - _mesa_copy_string(nameOut, maxLength, length, param->Name); > + _mesa_copy_string(nameOut, maxLength, length, uni->name); > } > > if (size) { > - GLint typeSize = _mesa_sizeof_glsl_type(uni->Type->gl_type); > - if ((GLint) param->Size > typeSize) { > - /* This is an array. > - * Array elements are placed on vector[4] boundaries so they're > - * a multiple of four floats. We round typeSize up to next multiple > - * of four to get the right size below. > - */ > - typeSize = (typeSize + 3) & ~3; > - } > - /* Note that the returned size is in units of the <type>, not bytes */ > - *size = param->Size / typeSize; > + /* array_elements is zero for non-arrays, but the API requires that 1 > be > + * returned. > + */ > + *size = MAX2(1, uni->array_elements); > } > > if (type) { > - *type = uni->Type->gl_type; > + *type = uni->type->gl_type; > } > } > > @@ -409,12 +398,21 @@ validate_uniform_parameters(struct gl_context *ctx, > > _mesa_uniform_split_location_offset(location, loc, array_index); > > - if (*loc >= shProg->Uniforms->NumUniforms) { > + if (*loc >= shProg->NumUserUniformStorage) { > _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)", > caller, location); > return false; > } > > + /* This case should be impossible. The implication is that a call like > + * glGetUniformLocation(prog, "foo[8]") was successful but "foo" is not an > + * array. > + */ > + if (*array_index != 0 && shProg->UniformStorage[*loc].array_elements == > 0) { > + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)", > + caller, location); > + return false; > + } > return true; > } > > @@ -423,72 +421,81 @@ validate_uniform_parameters(struct gl_context *ctx, > */ > extern "C" void > _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, > - GLsizei bufSize, GLenum returnType, GLvoid *paramsOut) > + GLsizei bufSize, enum glsl_base_type returnType, > + GLvoid *paramsOut) > { > struct gl_shader_program *shProg = > _mesa_lookup_shader_program_err(ctx, program, "glGetUniformfv"); > - struct gl_program *prog; > - GLint paramPos; > + struct gl_uniform_storage *uni; > unsigned loc, offset; > > if (!validate_uniform_parameters(ctx, shProg, location, 1, > &loc, &offset, "glGetUniform", true)) > return; > > - if (!find_uniform_parameter_pos(shProg, loc, &prog, ¶mPos)) { > - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformfv(location)"); > - } > - else { > - const struct gl_program_parameter *p = > - &prog->Parameters->Parameters[paramPos]; > - gl_constant_value (*values)[4]; > - GLint rows, cols, i, j, k; > - GLsizei numBytes; > - GLenum storage_type; > - > - values = prog->Parameters->ParameterValues + paramPos + offset; > - > - get_uniform_rows_cols(p, &rows, &cols); > - > - numBytes = rows * cols * _mesa_sizeof_type(returnType); > - if (bufSize < numBytes) { > - _mesa_error( ctx, GL_INVALID_OPERATION, > - "glGetnUniformfvARB(out of bounds: bufSize is %d," > - " but %d bytes are required)", bufSize, numBytes ); > - return; > - } > + uni = &shProg->UniformStorage[loc]; > > - if (ctx->Const.NativeIntegers) { > - storage_type = base_uniform_type(p->DataType); > - } else { > - storage_type = GL_FLOAT; > - } > + { > + unsigned elements = (uni->type->is_sampler()) > + ? 1 : uni->type->components(); > > - k = 0; > - for (i = 0; i < rows; i++) { > - for (j = 0; j < cols; j++ ) { > - void *out = (char *)paramsOut + 4 * k; > + /* Calculate the source base address *BEFORE* modifying elements to > + * account for the size of the user's buffer. > + */ > + const union gl_constant_value *const src = > + &uni->storage[offset * elements]; > + > + unsigned bytes = sizeof(uni->storage[0]) * elements; > + if (bytes > bufSize) { > + elements = bufSize / sizeof(uni->storage[0]); > + bytes = bufSize; > + } > > + /* If the return type and the uniform's native type are "compatible," > + * just memcpy the data. If the types are not compatible, perform a > + * slower convert-and-copy process. > + */ > + if (returnType == uni->type->base_type > + || ((returnType == GLSL_TYPE_INT > + || returnType == GLSL_TYPE_UINT > + || returnType == GLSL_TYPE_SAMPLER) > + && > + (uni->type->base_type == GLSL_TYPE_INT > + || uni->type->base_type == GLSL_TYPE_UINT > + || uni->type->base_type == GLSL_TYPE_SAMPLER))) { > + memcpy(paramsOut, src, bytes); > + } else { > + union gl_constant_value *const dst = > + (union gl_constant_value *) paramsOut; > + > + /* This code could be optimized by putting the loop inside the switch > + * statements. However, this is not expected to be > + * performance-critical code. > + */ > + for (unsigned i = 0; i < elements; i++) { > switch (returnType) { > - case GL_FLOAT: > - switch (storage_type) { > - case GL_FLOAT: > - *(float *)out = values[i][j].f; > + case GLSL_TYPE_FLOAT: > + switch (uni->type->base_type) { > + case GLSL_TYPE_UINT: > + dst[i].f = (float) src[i].u; > + break; > + case GLSL_TYPE_INT: > + case GLSL_TYPE_SAMPLER: > + dst[i].f = (float) src[i].i; > break; > - case GL_INT: > - case GL_BOOL: /* boolean is just an integer 1 or 0. */ > - *(float *)out = values[i][j].i; > + case GLSL_TYPE_BOOL: > + dst[i].f = src[i].i ? 1.0f : 0.0f; > break; > - case GL_UNSIGNED_INT: > - *(float *)out = values[i][j].u; > + default: > + assert(!"Should not get here."); > break; > } > break; > > - case GL_INT: > - case GL_UNSIGNED_INT: > - switch (storage_type) { > - case GL_FLOAT: > + case GLSL_TYPE_INT: > + case GLSL_TYPE_UINT: > + switch (uni->type->base_type) { > + case GLSL_TYPE_FLOAT: > /* While the GL 3.2 core spec doesn't explicitly > * state how conversion of float uniforms to integer > * values works, in section 6.2 "State Tables" on > @@ -506,23 +513,21 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint > program, GLint location, > * a floating-point value is rounded to the > * nearest integer..." > */ > - *(int *)out = IROUND(values[i][j].f); > + dst[i].i = IROUND(src[i].f); > break; > - > - case GL_INT: > - case GL_UNSIGNED_INT: > - case GL_BOOL: > - /* type conversions for these to int/uint are just > - * copying the data. > - */ > - *(int *)out = values[i][j].i; > + case GLSL_TYPE_BOOL: > + dst[i].i = src[i].i ? 1 : 0; > break; > + default: > + assert(!"Should not get here."); > break; > } > break; > - } > > - k++; > + default: > + assert(!"Should not get here."); > + break; > + } > } > } > } > @@ -925,9 +930,12 @@ _mesa_uniform(struct gl_context *ctx, struct > gl_shader_program *shProg, > GLint location, GLsizei count, > const GLvoid *values, GLenum type) > { > - struct gl_uniform *uniform; > - GLint elems; > unsigned loc, offset; > + unsigned components; > + unsigned src_components; > + unsigned i; > + enum glsl_base_type basicType; > + struct gl_uniform_storage *uni; > > ASSERT_OUTSIDE_BEGIN_END(ctx); > > @@ -935,73 +943,207 @@ _mesa_uniform(struct gl_context *ctx, struct > gl_shader_program *shProg, > &loc, &offset, "glUniform", false)) > return; > > - elems = _mesa_sizeof_glsl_type(type); > + uni = &shProg->UniformStorage[loc]; > > - FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); > + /* Verify that the types are compatible. > + */ > + switch (type) { > + case GL_FLOAT: > + basicType = GLSL_TYPE_FLOAT; > + src_components = 1; > + break; > + case GL_FLOAT_VEC2: > + basicType = GLSL_TYPE_FLOAT; > + src_components = 2; > + break; > + case GL_FLOAT_VEC3: > + basicType = GLSL_TYPE_FLOAT; > + src_components = 3; > + break; > + case GL_FLOAT_VEC4: > + basicType = GLSL_TYPE_FLOAT; > + src_components = 4; > + break; > + case GL_UNSIGNED_INT: > + basicType = GLSL_TYPE_UINT; > + src_components = 1; > + break; > + case GL_UNSIGNED_INT_VEC2: > + basicType = GLSL_TYPE_UINT; > + src_components = 2; > + break; > + case GL_UNSIGNED_INT_VEC3: > + basicType = GLSL_TYPE_UINT; > + src_components = 3; > + break; > + case GL_UNSIGNED_INT_VEC4: > + basicType = GLSL_TYPE_UINT; > + src_components = 4; > + break; > + case GL_INT: > + basicType = GLSL_TYPE_INT; > + src_components = 1; > + break; > + case GL_INT_VEC2: > + basicType = GLSL_TYPE_INT; > + src_components = 2; > + break; > + case GL_INT_VEC3: > + basicType = GLSL_TYPE_INT; > + src_components = 3; > + break; > + case GL_INT_VEC4: > + basicType = GLSL_TYPE_INT; > + src_components = 4; > + break; > + case GL_BOOL: > + case GL_BOOL_VEC2: > + case GL_BOOL_VEC3: > + case GL_BOOL_VEC4: > + case GL_FLOAT_MAT2: > + case GL_FLOAT_MAT2x3: > + case GL_FLOAT_MAT2x4: > + case GL_FLOAT_MAT3x2: > + case GL_FLOAT_MAT3: > + case GL_FLOAT_MAT3x4: > + case GL_FLOAT_MAT4x2: > + case GL_FLOAT_MAT4x3: > + case GL_FLOAT_MAT4: > + default: > + _mesa_problem(NULL, "Invalid type in %s", __func__); > + return; > + } > > - uniform = &shProg->Uniforms->Uniforms[loc]; > + if (uni->type->is_sampler()) { > + components = 1; > + } else { > + components = uni->type->vector_elements; > + } > + > + bool match; > + switch (uni->type->base_type) { > + case GLSL_TYPE_BOOL: > + match = true; > + break; > + case GLSL_TYPE_SAMPLER: > + match = (basicType == GLSL_TYPE_INT); > + break; > + default: > + match = (basicType == uni->type->base_type); > + break; > + } > + > + if (uni->type->is_matrix() || components != src_components || !match) { > + _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)"); > + return; > + } > > if (ctx->Shader.Flags & GLSL_UNIFORMS) { > - const GLenum basicType = base_uniform_type(type); > - GLint i; > - printf("Mesa: set program %u uniform %s (loc %d) to: ", > - shProg->Name, uniform->Name, location); > - if (basicType == GL_INT) { > - const GLint *v = (const GLint *) values; > - for (i = 0; i < count * elems; i++) { > - printf("%d ", v[i]); > - } > - } > - else if (basicType == GL_UNSIGNED_INT) { > - const GLuint *v = (const GLuint *) values; > - for (i = 0; i < count * elems; i++) { > - printf("%u ", v[i]); > - } > - } > - else { > - const GLfloat *v = (const GLfloat *) values; > - assert(basicType == GL_FLOAT); > - for (i = 0; i < count * elems; i++) { > - printf("%g ", v[i]); > + log_uniform(values, basicType, components, 1, count, > + false, shProg, location, uni); > + } > + > + /* Validate the texture unit setting. > + */ > + /* FINISHME: I cannot find any justification for this in the GL spec. > + */ > + if (uni->type->is_sampler()) { > + for (i = 0; i < count; i++) { > + const unsigned texUnit = ((unsigned *) values)[i]; > + > + /* check that the sampler (tex unit index) is legal */ > + if (texUnit >= ctx->Const.MaxCombinedTextureImageUnits) { > + _mesa_error(ctx, GL_INVALID_VALUE, > + "glUniform1i(invalid sampler/tex unit index for " > + "uniform %d)", > + location); > + return; > } > } > - printf("\n"); > } > > - /* A uniform var may be used by both a vertex shader and a fragment > - * shader. We may need to update one or both shader's uniform here: > + /* Page 82 (page 96 of the PDF) of the OpenGL 2.1 spec says: > + * > + * "When loading N elements starting at an arbitrary position k in a > + * uniform declared as an array, elements k through k + N - 1 in the > + * array will be replaced with the new values. Values for any array > + * element that exceeds the highest array element index used, as > + * reported by GetActiveUniform, will be ignored by the GL." > + * > + * Clamp 'count' to a valid value. Note that for non-arrays a count > 1 > + * will have already generated an error. > */ > - if (shProg->_LinkedShaders[MESA_SHADER_VERTEX]) { > - /* convert uniform location to program parameter index */ > - GLint index = uniform->VertPos; > - if (index >= 0) { > - set_program_uniform(ctx, > - > shProg->_LinkedShaders[MESA_SHADER_VERTEX]->Program, > - index, offset, type, count, elems, values); > - } > + if (uni->array_elements != 0) { > + if (offset >= uni->array_elements) > + return; > + > + count = MIN2(count, (uni->array_elements - offset)); > } > > - if (shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]) { > - /* convert uniform location to program parameter index */ > - GLint index = uniform->FragPos; > - if (index >= 0) { > - set_program_uniform(ctx, > - > shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program, > - index, offset, type, count, elems, values); > + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); > + > + /* Store the data in the "actual type" backing storage for the uniform. > + */ > + if (!uni->type->is_boolean()) { > + memcpy(&uni->storage[components * offset], values, > + sizeof(uni->storage[0]) * components * count); > + } else { > + const union gl_constant_value *src = > + (const union gl_constant_value *) values; > + union gl_constant_value *dst = &uni->storage[components * offset]; > + const unsigned elems = components * count; > + > + for (i = 0; i < elems; i++) { > + if (basicType == GLSL_TYPE_FLOAT) { > + dst[i].i = src[i].f != 0.0f ? 1 : 0; > + } else { > + dst[i].i = src[i].i != 0 ? 1 : 0; > + } > } > } > > - if (shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]) { > - /* convert uniform location to program parameter index */ > - GLint index = uniform->GeomPos; > - if (index >= 0) { > - set_program_uniform(ctx, > - > shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program, > - index, offset, type, count, elems, values); > + uni->initialized = true; > + uni->dirty = true; > + > + _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); > + > + /* If the uniform is a sampler, do the extra magic necessary to propagate > + * the changes through. > + */ > + if (uni->type->is_sampler()) { > + for (i = 0; i < count; i++) { > + shProg->SamplerUnits[uni->sampler + offset + i] = > + ((unsigned *) values)[i]; > } > - } > > - uniform->Initialized = GL_TRUE; > + bool flushed = false; > + for (i = 0; i < MESA_SHADER_TYPES; i++) { > + struct gl_program *prog; > + > + if (shProg->_LinkedShaders[i] == NULL) > + continue; > + > + prog = shProg->_LinkedShaders[i]->Program; > + > + assert(sizeof(prog->SamplerUnits) == sizeof(shProg->SamplerUnits)); > + > + if (memcmp(prog->SamplerUnits, > + shProg->SamplerUnits, > + sizeof(shProg->SamplerUnits)) != 0) { > + if (!flushed) { > + FLUSH_VERTICES(ctx, _NEW_TEXTURE | _NEW_PROGRAM); > + flushed = true; > + } > + > + memcpy(prog->SamplerUnits, > + shProg->SamplerUnits, > + sizeof(shProg->SamplerUnits)); > + > + _mesa_update_shader_textures_used(prog); > + (void) ctx->Driver.ProgramStringNotify(ctx, prog->Target, prog);
Hi Ian, This ProgramStringNotify call causes excessive shader recompilations in the game Cogs (from some Humble Bundle). Approximately 25% of in-game CPU time is spent on translating shaders and register allocation. If the shaders were larger, it would be much worse. Moreover, I have realized calling ProgramStringNotify is not needed for st/mesa in this case at all (I just comment it out if I want higher frame rates), because it doesn't use the prog->SamplerUnits array when translating shaders. The array is only used when we set samplers, so setting _NEW_TEXTURE is sufficient. It seems that i965 could do the same thing. Another option would be to cache precompiled shaders, but that seems more complicated than doing what st/mesa does. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev