On 01/03/2012 09:43 PM, Marek Olšák wrote: > On Fri, Oct 28, 2011 at 7:42 PM, Ian Romanick <i...@freedesktop.org> wrote: >> diff --git a/src/mesa/main/uniform_query.cpp >> b/src/mesa/main/uniform_query.cpp >> index db2f200..50a724b 100644 >> --- a/src/mesa/main/uniform_query.cpp >> +++ b/src/mesa/main/uniform_query.cpp >> @@ -22,15 +22,16 @@ >> * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN >> * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. >> */ >> +#include <stdlib.h> >> #include "main/core.h" >> #include "main/context.h" >> #include "ir.h" >> #include "ir_uniform.h" >> +#include "program/hash_table.h" >> #include "../glsl/program.h" >> #include "../glsl/ir_uniform.h" >> >> extern "C" { >> -#include "main/image.h" >> #include "main/shaderapi.h" >> #include "main/shaderobj.h" >> #include "uniforms.h" >> @@ -44,42 +45,30 @@ _mesa_GetActiveUniformARB(GLhandleARB program, GLuint >> index, >> GET_CURRENT_CONTEXT(ctx); >> struct gl_shader_program *shProg = >> _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform"); >> - const struct gl_program_parameter *param; >> >> if (!shProg) >> return; >> >> - if (!shProg->Uniforms || index >= shProg->Uniforms->NumUniforms) { >> + if (index >= shProg->NumUserUniformStorage) { >> _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform(index)"); >> return; >> } >> >> - param = get_uniform_parameter(shProg, index); >> - if (!param) >> - return; >> - >> - const struct gl_uniform *const uni = &shProg->Uniforms->Uniforms[index]; >> + const struct gl_uniform_storage *const uni = >> &shProg->UniformStorage[index]; >> >> if (nameOut) { >> - _mesa_copy_string(nameOut, maxLength, length, param->Name); >> + _mesa_copy_string(nameOut, maxLength, length, uni->name); >> } >> >> if (size) { >> - GLint typeSize = _mesa_sizeof_glsl_type(uni->Type->gl_type); >> - if ((GLint) param->Size > typeSize) { >> - /* This is an array. >> - * Array elements are placed on vector[4] boundaries so they're >> - * a multiple of four floats. We round typeSize up to next >> multiple >> - * of four to get the right size below. >> - */ >> - typeSize = (typeSize + 3) & ~3; >> - } >> - /* Note that the returned size is in units of the <type>, not bytes */ >> - *size = param->Size / typeSize; >> + /* array_elements is zero for non-arrays, but the API requires that 1 >> be >> + * returned. >> + */ >> + *size = MAX2(1, uni->array_elements); >> } >> >> if (type) { >> - *type = uni->Type->gl_type; >> + *type = uni->type->gl_type; >> } >> } >> >> @@ -409,12 +398,21 @@ validate_uniform_parameters(struct gl_context *ctx, >> >> _mesa_uniform_split_location_offset(location, loc, array_index); >> >> - if (*loc >= shProg->Uniforms->NumUniforms) { >> + if (*loc >= shProg->NumUserUniformStorage) { >> _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)", >> caller, location); >> return false; >> } >> >> + /* This case should be impossible. The implication is that a call like >> + * glGetUniformLocation(prog, "foo[8]") was successful but "foo" is not >> an >> + * array. >> + */ >> + if (*array_index != 0 && shProg->UniformStorage[*loc].array_elements == >> 0) { >> + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)", >> + caller, location); >> + return false; >> + } >> return true; >> } >> >> @@ -423,72 +421,81 @@ validate_uniform_parameters(struct gl_context *ctx, >> */ >> extern "C" void >> _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, >> - GLsizei bufSize, GLenum returnType, GLvoid *paramsOut) >> + GLsizei bufSize, enum glsl_base_type returnType, >> + GLvoid *paramsOut) >> { >> struct gl_shader_program *shProg = >> _mesa_lookup_shader_program_err(ctx, program, "glGetUniformfv"); >> - struct gl_program *prog; >> - GLint paramPos; >> + struct gl_uniform_storage *uni; >> unsigned loc, offset; >> >> if (!validate_uniform_parameters(ctx, shProg, location, 1, >> &loc, &offset, "glGetUniform", true)) >> return; >> >> - if (!find_uniform_parameter_pos(shProg, loc, &prog, ¶mPos)) { >> - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformfv(location)"); >> - } >> - else { >> - const struct gl_program_parameter *p = >> - &prog->Parameters->Parameters[paramPos]; >> - gl_constant_value (*values)[4]; >> - GLint rows, cols, i, j, k; >> - GLsizei numBytes; >> - GLenum storage_type; >> - >> - values = prog->Parameters->ParameterValues + paramPos + offset; >> - >> - get_uniform_rows_cols(p, &rows, &cols); >> - >> - numBytes = rows * cols * _mesa_sizeof_type(returnType); >> - if (bufSize < numBytes) { >> - _mesa_error( ctx, GL_INVALID_OPERATION, >> - "glGetnUniformfvARB(out of bounds: bufSize is %d," >> - " but %d bytes are required)", bufSize, numBytes ); >> - return; >> - } >> + uni = &shProg->UniformStorage[loc]; >> >> - if (ctx->Const.NativeIntegers) { >> - storage_type = base_uniform_type(p->DataType); >> - } else { >> - storage_type = GL_FLOAT; >> - } >> + { >> + unsigned elements = (uni->type->is_sampler()) >> + ? 1 : uni->type->components(); >> >> - k = 0; >> - for (i = 0; i < rows; i++) { >> - for (j = 0; j < cols; j++ ) { >> - void *out = (char *)paramsOut + 4 * k; >> + /* Calculate the source base address *BEFORE* modifying elements to >> + * account for the size of the user's buffer. >> + */ >> + const union gl_constant_value *const src = >> + &uni->storage[offset * elements]; >> + >> + unsigned bytes = sizeof(uni->storage[0]) * elements; >> + if (bytes > bufSize) { >> + elements = bufSize / sizeof(uni->storage[0]); >> + bytes = bufSize; >> + } >> >> + /* If the return type and the uniform's native type are "compatible," >> + * just memcpy the data. If the types are not compatible, perform a >> + * slower convert-and-copy process. >> + */ >> + if (returnType == uni->type->base_type >> + || ((returnType == GLSL_TYPE_INT >> + || returnType == GLSL_TYPE_UINT >> + || returnType == GLSL_TYPE_SAMPLER) >> + && >> + (uni->type->base_type == GLSL_TYPE_INT >> + || uni->type->base_type == GLSL_TYPE_UINT >> + || uni->type->base_type == GLSL_TYPE_SAMPLER))) { >> + memcpy(paramsOut, src, bytes); >> + } else { >> + union gl_constant_value *const dst = >> + (union gl_constant_value *) paramsOut; >> + >> + /* This code could be optimized by putting the loop inside the >> switch >> + * statements. However, this is not expected to be >> + * performance-critical code. >> + */ >> + for (unsigned i = 0; i < elements; i++) { >> switch (returnType) { >> - case GL_FLOAT: >> - switch (storage_type) { >> - case GL_FLOAT: >> - *(float *)out = values[i][j].f; >> + case GLSL_TYPE_FLOAT: >> + switch (uni->type->base_type) { >> + case GLSL_TYPE_UINT: >> + dst[i].f = (float) src[i].u; >> + break; >> + case GLSL_TYPE_INT: >> + case GLSL_TYPE_SAMPLER: >> + dst[i].f = (float) src[i].i; >> break; >> - case GL_INT: >> - case GL_BOOL: /* boolean is just an integer 1 or 0. */ >> - *(float *)out = values[i][j].i; >> + case GLSL_TYPE_BOOL: >> + dst[i].f = src[i].i ? 1.0f : 0.0f; >> break; >> - case GL_UNSIGNED_INT: >> - *(float *)out = values[i][j].u; >> + default: >> + assert(!"Should not get here."); >> break; >> } >> break; >> >> - case GL_INT: >> - case GL_UNSIGNED_INT: >> - switch (storage_type) { >> - case GL_FLOAT: >> + case GLSL_TYPE_INT: >> + case GLSL_TYPE_UINT: >> + switch (uni->type->base_type) { >> + case GLSL_TYPE_FLOAT: >> /* While the GL 3.2 core spec doesn't explicitly >> * state how conversion of float uniforms to integer >> * values works, in section 6.2 "State Tables" on >> @@ -506,23 +513,21 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint >> program, GLint location, >> * a floating-point value is rounded to the >> * nearest integer..." >> */ >> - *(int *)out = IROUND(values[i][j].f); >> + dst[i].i = IROUND(src[i].f); >> break; >> - >> - case GL_INT: >> - case GL_UNSIGNED_INT: >> - case GL_BOOL: >> - /* type conversions for these to int/uint are just >> - * copying the data. >> - */ >> - *(int *)out = values[i][j].i; >> + case GLSL_TYPE_BOOL: >> + dst[i].i = src[i].i ? 1 : 0; >> break; >> + default: >> + assert(!"Should not get here."); >> break; >> } >> break; >> - } >> >> - k++; >> + default: >> + assert(!"Should not get here."); >> + break; >> + } >> } >> } >> } >> @@ -925,9 +930,12 @@ _mesa_uniform(struct gl_context *ctx, struct >> gl_shader_program *shProg, >> GLint location, GLsizei count, >> const GLvoid *values, GLenum type) >> { >> - struct gl_uniform *uniform; >> - GLint elems; >> unsigned loc, offset; >> + unsigned components; >> + unsigned src_components; >> + unsigned i; >> + enum glsl_base_type basicType; >> + struct gl_uniform_storage *uni; >> >> ASSERT_OUTSIDE_BEGIN_END(ctx); >> >> @@ -935,73 +943,207 @@ _mesa_uniform(struct gl_context *ctx, struct >> gl_shader_program *shProg, >> &loc, &offset, "glUniform", false)) >> return; >> >> - elems = _mesa_sizeof_glsl_type(type); >> + uni = &shProg->UniformStorage[loc]; >> >> - FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); >> + /* Verify that the types are compatible. >> + */ >> + switch (type) { >> + case GL_FLOAT: >> + basicType = GLSL_TYPE_FLOAT; >> + src_components = 1; >> + break; >> + case GL_FLOAT_VEC2: >> + basicType = GLSL_TYPE_FLOAT; >> + src_components = 2; >> + break; >> + case GL_FLOAT_VEC3: >> + basicType = GLSL_TYPE_FLOAT; >> + src_components = 3; >> + break; >> + case GL_FLOAT_VEC4: >> + basicType = GLSL_TYPE_FLOAT; >> + src_components = 4; >> + break; >> + case GL_UNSIGNED_INT: >> + basicType = GLSL_TYPE_UINT; >> + src_components = 1; >> + break; >> + case GL_UNSIGNED_INT_VEC2: >> + basicType = GLSL_TYPE_UINT; >> + src_components = 2; >> + break; >> + case GL_UNSIGNED_INT_VEC3: >> + basicType = GLSL_TYPE_UINT; >> + src_components = 3; >> + break; >> + case GL_UNSIGNED_INT_VEC4: >> + basicType = GLSL_TYPE_UINT; >> + src_components = 4; >> + break; >> + case GL_INT: >> + basicType = GLSL_TYPE_INT; >> + src_components = 1; >> + break; >> + case GL_INT_VEC2: >> + basicType = GLSL_TYPE_INT; >> + src_components = 2; >> + break; >> + case GL_INT_VEC3: >> + basicType = GLSL_TYPE_INT; >> + src_components = 3; >> + break; >> + case GL_INT_VEC4: >> + basicType = GLSL_TYPE_INT; >> + src_components = 4; >> + break; >> + case GL_BOOL: >> + case GL_BOOL_VEC2: >> + case GL_BOOL_VEC3: >> + case GL_BOOL_VEC4: >> + case GL_FLOAT_MAT2: >> + case GL_FLOAT_MAT2x3: >> + case GL_FLOAT_MAT2x4: >> + case GL_FLOAT_MAT3x2: >> + case GL_FLOAT_MAT3: >> + case GL_FLOAT_MAT3x4: >> + case GL_FLOAT_MAT4x2: >> + case GL_FLOAT_MAT4x3: >> + case GL_FLOAT_MAT4: >> + default: >> + _mesa_problem(NULL, "Invalid type in %s", __func__); >> + return; >> + } >> >> - uniform = &shProg->Uniforms->Uniforms[loc]; >> + if (uni->type->is_sampler()) { >> + components = 1; >> + } else { >> + components = uni->type->vector_elements; >> + } >> + >> + bool match; >> + switch (uni->type->base_type) { >> + case GLSL_TYPE_BOOL: >> + match = true; >> + break; >> + case GLSL_TYPE_SAMPLER: >> + match = (basicType == GLSL_TYPE_INT); >> + break; >> + default: >> + match = (basicType == uni->type->base_type); >> + break; >> + } >> + >> + if (uni->type->is_matrix() || components != src_components || !match) { >> + _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)"); >> + return; >> + } >> >> if (ctx->Shader.Flags & GLSL_UNIFORMS) { >> - const GLenum basicType = base_uniform_type(type); >> - GLint i; >> - printf("Mesa: set program %u uniform %s (loc %d) to: ", >> - shProg->Name, uniform->Name, location); >> - if (basicType == GL_INT) { >> - const GLint *v = (const GLint *) values; >> - for (i = 0; i < count * elems; i++) { >> - printf("%d ", v[i]); >> - } >> - } >> - else if (basicType == GL_UNSIGNED_INT) { >> - const GLuint *v = (const GLuint *) values; >> - for (i = 0; i < count * elems; i++) { >> - printf("%u ", v[i]); >> - } >> - } >> - else { >> - const GLfloat *v = (const GLfloat *) values; >> - assert(basicType == GL_FLOAT); >> - for (i = 0; i < count * elems; i++) { >> - printf("%g ", v[i]); >> + log_uniform(values, basicType, components, 1, count, >> + false, shProg, location, uni); >> + } >> + >> + /* Validate the texture unit setting. >> + */ >> + /* FINISHME: I cannot find any justification for this in the GL spec. >> + */ >> + if (uni->type->is_sampler()) { >> + for (i = 0; i < count; i++) { >> + const unsigned texUnit = ((unsigned *) values)[i]; >> + >> + /* check that the sampler (tex unit index) is legal */ >> + if (texUnit >= ctx->Const.MaxCombinedTextureImageUnits) { >> + _mesa_error(ctx, GL_INVALID_VALUE, >> + "glUniform1i(invalid sampler/tex unit index for " >> + "uniform %d)", >> + location); >> + return; >> } >> } >> - printf("\n"); >> } >> >> - /* A uniform var may be used by both a vertex shader and a fragment >> - * shader. We may need to update one or both shader's uniform here: >> + /* Page 82 (page 96 of the PDF) of the OpenGL 2.1 spec says: >> + * >> + * "When loading N elements starting at an arbitrary position k in a >> + * uniform declared as an array, elements k through k + N - 1 in the >> + * array will be replaced with the new values. Values for any array >> + * element that exceeds the highest array element index used, as >> + * reported by GetActiveUniform, will be ignored by the GL." >> + * >> + * Clamp 'count' to a valid value. Note that for non-arrays a count > 1 >> + * will have already generated an error. >> */ >> - if (shProg->_LinkedShaders[MESA_SHADER_VERTEX]) { >> - /* convert uniform location to program parameter index */ >> - GLint index = uniform->VertPos; >> - if (index >= 0) { >> - set_program_uniform(ctx, >> - >> shProg->_LinkedShaders[MESA_SHADER_VERTEX]->Program, >> - index, offset, type, count, elems, values); >> - } >> + if (uni->array_elements != 0) { >> + if (offset >= uni->array_elements) >> + return; >> + >> + count = MIN2(count, (uni->array_elements - offset)); >> } >> >> - if (shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]) { >> - /* convert uniform location to program parameter index */ >> - GLint index = uniform->FragPos; >> - if (index >= 0) { >> - set_program_uniform(ctx, >> - >> shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program, >> - index, offset, type, count, elems, values); >> + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); >> + >> + /* Store the data in the "actual type" backing storage for the uniform. >> + */ >> + if (!uni->type->is_boolean()) { >> + memcpy(&uni->storage[components * offset], values, >> + sizeof(uni->storage[0]) * components * count); >> + } else { >> + const union gl_constant_value *src = >> + (const union gl_constant_value *) values; >> + union gl_constant_value *dst = &uni->storage[components * offset]; >> + const unsigned elems = components * count; >> + >> + for (i = 0; i < elems; i++) { >> + if (basicType == GLSL_TYPE_FLOAT) { >> + dst[i].i = src[i].f != 0.0f ? 1 : 0; >> + } else { >> + dst[i].i = src[i].i != 0 ? 1 : 0; >> + } >> } >> } >> >> - if (shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]) { >> - /* convert uniform location to program parameter index */ >> - GLint index = uniform->GeomPos; >> - if (index >= 0) { >> - set_program_uniform(ctx, >> - >> shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program, >> - index, offset, type, count, elems, values); >> + uni->initialized = true; >> + uni->dirty = true; >> + >> + _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); >> + >> + /* If the uniform is a sampler, do the extra magic necessary to propagate >> + * the changes through. >> + */ >> + if (uni->type->is_sampler()) { >> + for (i = 0; i < count; i++) { >> + shProg->SamplerUnits[uni->sampler + offset + i] = >> + ((unsigned *) values)[i]; >> } >> - } >> >> - uniform->Initialized = GL_TRUE; >> + bool flushed = false; >> + for (i = 0; i < MESA_SHADER_TYPES; i++) { >> + struct gl_program *prog; >> + >> + if (shProg->_LinkedShaders[i] == NULL) >> + continue; >> + >> + prog = shProg->_LinkedShaders[i]->Program; >> + >> + assert(sizeof(prog->SamplerUnits) == sizeof(shProg->SamplerUnits)); >> + >> + if (memcmp(prog->SamplerUnits, >> + shProg->SamplerUnits, >> + sizeof(shProg->SamplerUnits)) != 0) { >> + if (!flushed) { >> + FLUSH_VERTICES(ctx, _NEW_TEXTURE | _NEW_PROGRAM); >> + flushed = true; >> + } >> + >> + memcpy(prog->SamplerUnits, >> + shProg->SamplerUnits, >> + sizeof(shProg->SamplerUnits)); >> + >> + _mesa_update_shader_textures_used(prog); >> + (void) ctx->Driver.ProgramStringNotify(ctx, prog->Target, prog); > > Hi Ian, > > This ProgramStringNotify call causes excessive shader recompilations > in the game Cogs (from some Humble Bundle). Approximately 25% of > in-game CPU time is spent on translating shaders and register > allocation. If the shaders were larger, it would be much worse. > Moreover, I have realized calling ProgramStringNotify is not needed > for st/mesa in this case at all (I just comment it out if I want > higher frame rates), because it doesn't use the prog->SamplerUnits > array when translating shaders. The array is only used when we set > samplers, so setting _NEW_TEXTURE is sufficient. It seems that i965 > could do the same thing. Another option would be to cache precompiled > shaders, but that seems more complicated than doing what st/mesa does. > > Marek
Yeah, I noticed that a while back too, but we never arrived at a solution and it slipped off my radar. I agree completely, ProgramStringNotify is completely overkill and bogus. We actually already rely on _NEW_TEXTURE for all shader programs. Since the uniform code already flags _NEW_TEXTURE, we should be able to just add the SamplerUnits array to the program key (specifically, brw_sampler_prog_key_data), and that should take care of any state dependent recompiles. Then we wouldn't need ProgramStringNotify at all. Right? _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev