On Wed, Jan 4, 2012 at 12:28 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > On 01/03/2012 09:43 PM, Marek Olšák wrote: >> On Fri, Oct 28, 2011 at 7:42 PM, Ian Romanick <i...@freedesktop.org> wrote: >>> diff --git a/src/mesa/main/uniform_query.cpp >>> b/src/mesa/main/uniform_query.cpp >>> index db2f200..50a724b 100644 >>> --- a/src/mesa/main/uniform_query.cpp >>> +++ b/src/mesa/main/uniform_query.cpp >>> @@ -22,15 +22,16 @@ >>> * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN >>> * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE >>> SOFTWARE. >>> */ >>> +#include <stdlib.h> >>> #include "main/core.h" >>> #include "main/context.h" >>> #include "ir.h" >>> #include "ir_uniform.h" >>> +#include "program/hash_table.h" >>> #include "../glsl/program.h" >>> #include "../glsl/ir_uniform.h" >>> >>> extern "C" { >>> -#include "main/image.h" >>> #include "main/shaderapi.h" >>> #include "main/shaderobj.h" >>> #include "uniforms.h" >>> @@ -44,42 +45,30 @@ _mesa_GetActiveUniformARB(GLhandleARB program, GLuint >>> index, >>> GET_CURRENT_CONTEXT(ctx); >>> struct gl_shader_program *shProg = >>> _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform"); >>> - const struct gl_program_parameter *param; >>> >>> if (!shProg) >>> return; >>> >>> - if (!shProg->Uniforms || index >= shProg->Uniforms->NumUniforms) { >>> + if (index >= shProg->NumUserUniformStorage) { >>> _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform(index)"); >>> return; >>> } >>> >>> - param = get_uniform_parameter(shProg, index); >>> - if (!param) >>> - return; >>> - >>> - const struct gl_uniform *const uni = &shProg->Uniforms->Uniforms[index]; >>> + const struct gl_uniform_storage *const uni = >>> &shProg->UniformStorage[index]; >>> >>> if (nameOut) { >>> - _mesa_copy_string(nameOut, maxLength, length, param->Name); >>> + _mesa_copy_string(nameOut, maxLength, length, uni->name); >>> } >>> >>> if (size) { >>> - GLint typeSize = _mesa_sizeof_glsl_type(uni->Type->gl_type); >>> - if ((GLint) param->Size > typeSize) { >>> - /* This is an array. >>> - * Array elements are placed on vector[4] boundaries so they're >>> - * a multiple of four floats. We round typeSize up to next >>> multiple >>> - * of four to get the right size below. >>> - */ >>> - typeSize = (typeSize + 3) & ~3; >>> - } >>> - /* Note that the returned size is in units of the <type>, not bytes >>> */ >>> - *size = param->Size / typeSize; >>> + /* array_elements is zero for non-arrays, but the API requires that >>> 1 be >>> + * returned. >>> + */ >>> + *size = MAX2(1, uni->array_elements); >>> } >>> >>> if (type) { >>> - *type = uni->Type->gl_type; >>> + *type = uni->type->gl_type; >>> } >>> } >>> >>> @@ -409,12 +398,21 @@ validate_uniform_parameters(struct gl_context *ctx, >>> >>> _mesa_uniform_split_location_offset(location, loc, array_index); >>> >>> - if (*loc >= shProg->Uniforms->NumUniforms) { >>> + if (*loc >= shProg->NumUserUniformStorage) { >>> _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)", >>> caller, location); >>> return false; >>> } >>> >>> + /* This case should be impossible. The implication is that a call like >>> + * glGetUniformLocation(prog, "foo[8]") was successful but "foo" is not >>> an >>> + * array. >>> + */ >>> + if (*array_index != 0 && shProg->UniformStorage[*loc].array_elements == >>> 0) { >>> + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)", >>> + caller, location); >>> + return false; >>> + } >>> return true; >>> } >>> >>> @@ -423,72 +421,81 @@ validate_uniform_parameters(struct gl_context *ctx, >>> */ >>> extern "C" void >>> _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, >>> - GLsizei bufSize, GLenum returnType, GLvoid *paramsOut) >>> + GLsizei bufSize, enum glsl_base_type returnType, >>> + GLvoid *paramsOut) >>> { >>> struct gl_shader_program *shProg = >>> _mesa_lookup_shader_program_err(ctx, program, "glGetUniformfv"); >>> - struct gl_program *prog; >>> - GLint paramPos; >>> + struct gl_uniform_storage *uni; >>> unsigned loc, offset; >>> >>> if (!validate_uniform_parameters(ctx, shProg, location, 1, >>> &loc, &offset, "glGetUniform", true)) >>> return; >>> >>> - if (!find_uniform_parameter_pos(shProg, loc, &prog, ¶mPos)) { >>> - _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformfv(location)"); >>> - } >>> - else { >>> - const struct gl_program_parameter *p = >>> - &prog->Parameters->Parameters[paramPos]; >>> - gl_constant_value (*values)[4]; >>> - GLint rows, cols, i, j, k; >>> - GLsizei numBytes; >>> - GLenum storage_type; >>> - >>> - values = prog->Parameters->ParameterValues + paramPos + offset; >>> - >>> - get_uniform_rows_cols(p, &rows, &cols); >>> - >>> - numBytes = rows * cols * _mesa_sizeof_type(returnType); >>> - if (bufSize < numBytes) { >>> - _mesa_error( ctx, GL_INVALID_OPERATION, >>> - "glGetnUniformfvARB(out of bounds: bufSize is %d," >>> - " but %d bytes are required)", bufSize, numBytes ); >>> - return; >>> - } >>> + uni = &shProg->UniformStorage[loc]; >>> >>> - if (ctx->Const.NativeIntegers) { >>> - storage_type = base_uniform_type(p->DataType); >>> - } else { >>> - storage_type = GL_FLOAT; >>> - } >>> + { >>> + unsigned elements = (uni->type->is_sampler()) >>> + ? 1 : uni->type->components(); >>> >>> - k = 0; >>> - for (i = 0; i < rows; i++) { >>> - for (j = 0; j < cols; j++ ) { >>> - void *out = (char *)paramsOut + 4 * k; >>> + /* Calculate the source base address *BEFORE* modifying elements to >>> + * account for the size of the user's buffer. >>> + */ >>> + const union gl_constant_value *const src = >>> + &uni->storage[offset * elements]; >>> + >>> + unsigned bytes = sizeof(uni->storage[0]) * elements; >>> + if (bytes > bufSize) { >>> + elements = bufSize / sizeof(uni->storage[0]); >>> + bytes = bufSize; >>> + } >>> >>> + /* If the return type and the uniform's native type are "compatible," >>> + * just memcpy the data. If the types are not compatible, perform a >>> + * slower convert-and-copy process. >>> + */ >>> + if (returnType == uni->type->base_type >>> + || ((returnType == GLSL_TYPE_INT >>> + || returnType == GLSL_TYPE_UINT >>> + || returnType == GLSL_TYPE_SAMPLER) >>> + && >>> + (uni->type->base_type == GLSL_TYPE_INT >>> + || uni->type->base_type == GLSL_TYPE_UINT >>> + || uni->type->base_type == GLSL_TYPE_SAMPLER))) { >>> + memcpy(paramsOut, src, bytes); >>> + } else { >>> + union gl_constant_value *const dst = >>> + (union gl_constant_value *) paramsOut; >>> + >>> + /* This code could be optimized by putting the loop inside the >>> switch >>> + * statements. However, this is not expected to be >>> + * performance-critical code. >>> + */ >>> + for (unsigned i = 0; i < elements; i++) { >>> switch (returnType) { >>> - case GL_FLOAT: >>> - switch (storage_type) { >>> - case GL_FLOAT: >>> - *(float *)out = values[i][j].f; >>> + case GLSL_TYPE_FLOAT: >>> + switch (uni->type->base_type) { >>> + case GLSL_TYPE_UINT: >>> + dst[i].f = (float) src[i].u; >>> + break; >>> + case GLSL_TYPE_INT: >>> + case GLSL_TYPE_SAMPLER: >>> + dst[i].f = (float) src[i].i; >>> break; >>> - case GL_INT: >>> - case GL_BOOL: /* boolean is just an integer 1 or 0. */ >>> - *(float *)out = values[i][j].i; >>> + case GLSL_TYPE_BOOL: >>> + dst[i].f = src[i].i ? 1.0f : 0.0f; >>> break; >>> - case GL_UNSIGNED_INT: >>> - *(float *)out = values[i][j].u; >>> + default: >>> + assert(!"Should not get here."); >>> break; >>> } >>> break; >>> >>> - case GL_INT: >>> - case GL_UNSIGNED_INT: >>> - switch (storage_type) { >>> - case GL_FLOAT: >>> + case GLSL_TYPE_INT: >>> + case GLSL_TYPE_UINT: >>> + switch (uni->type->base_type) { >>> + case GLSL_TYPE_FLOAT: >>> /* While the GL 3.2 core spec doesn't explicitly >>> * state how conversion of float uniforms to integer >>> * values works, in section 6.2 "State Tables" on >>> @@ -506,23 +513,21 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint >>> program, GLint location, >>> * a floating-point value is rounded to the >>> * nearest integer..." >>> */ >>> - *(int *)out = IROUND(values[i][j].f); >>> + dst[i].i = IROUND(src[i].f); >>> break; >>> - >>> - case GL_INT: >>> - case GL_UNSIGNED_INT: >>> - case GL_BOOL: >>> - /* type conversions for these to int/uint are just >>> - * copying the data. >>> - */ >>> - *(int *)out = values[i][j].i; >>> + case GLSL_TYPE_BOOL: >>> + dst[i].i = src[i].i ? 1 : 0; >>> break; >>> + default: >>> + assert(!"Should not get here."); >>> break; >>> } >>> break; >>> - } >>> >>> - k++; >>> + default: >>> + assert(!"Should not get here."); >>> + break; >>> + } >>> } >>> } >>> } >>> @@ -925,9 +930,12 @@ _mesa_uniform(struct gl_context *ctx, struct >>> gl_shader_program *shProg, >>> GLint location, GLsizei count, >>> const GLvoid *values, GLenum type) >>> { >>> - struct gl_uniform *uniform; >>> - GLint elems; >>> unsigned loc, offset; >>> + unsigned components; >>> + unsigned src_components; >>> + unsigned i; >>> + enum glsl_base_type basicType; >>> + struct gl_uniform_storage *uni; >>> >>> ASSERT_OUTSIDE_BEGIN_END(ctx); >>> >>> @@ -935,73 +943,207 @@ _mesa_uniform(struct gl_context *ctx, struct >>> gl_shader_program *shProg, >>> &loc, &offset, "glUniform", false)) >>> return; >>> >>> - elems = _mesa_sizeof_glsl_type(type); >>> + uni = &shProg->UniformStorage[loc]; >>> >>> - FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); >>> + /* Verify that the types are compatible. >>> + */ >>> + switch (type) { >>> + case GL_FLOAT: >>> + basicType = GLSL_TYPE_FLOAT; >>> + src_components = 1; >>> + break; >>> + case GL_FLOAT_VEC2: >>> + basicType = GLSL_TYPE_FLOAT; >>> + src_components = 2; >>> + break; >>> + case GL_FLOAT_VEC3: >>> + basicType = GLSL_TYPE_FLOAT; >>> + src_components = 3; >>> + break; >>> + case GL_FLOAT_VEC4: >>> + basicType = GLSL_TYPE_FLOAT; >>> + src_components = 4; >>> + break; >>> + case GL_UNSIGNED_INT: >>> + basicType = GLSL_TYPE_UINT; >>> + src_components = 1; >>> + break; >>> + case GL_UNSIGNED_INT_VEC2: >>> + basicType = GLSL_TYPE_UINT; >>> + src_components = 2; >>> + break; >>> + case GL_UNSIGNED_INT_VEC3: >>> + basicType = GLSL_TYPE_UINT; >>> + src_components = 3; >>> + break; >>> + case GL_UNSIGNED_INT_VEC4: >>> + basicType = GLSL_TYPE_UINT; >>> + src_components = 4; >>> + break; >>> + case GL_INT: >>> + basicType = GLSL_TYPE_INT; >>> + src_components = 1; >>> + break; >>> + case GL_INT_VEC2: >>> + basicType = GLSL_TYPE_INT; >>> + src_components = 2; >>> + break; >>> + case GL_INT_VEC3: >>> + basicType = GLSL_TYPE_INT; >>> + src_components = 3; >>> + break; >>> + case GL_INT_VEC4: >>> + basicType = GLSL_TYPE_INT; >>> + src_components = 4; >>> + break; >>> + case GL_BOOL: >>> + case GL_BOOL_VEC2: >>> + case GL_BOOL_VEC3: >>> + case GL_BOOL_VEC4: >>> + case GL_FLOAT_MAT2: >>> + case GL_FLOAT_MAT2x3: >>> + case GL_FLOAT_MAT2x4: >>> + case GL_FLOAT_MAT3x2: >>> + case GL_FLOAT_MAT3: >>> + case GL_FLOAT_MAT3x4: >>> + case GL_FLOAT_MAT4x2: >>> + case GL_FLOAT_MAT4x3: >>> + case GL_FLOAT_MAT4: >>> + default: >>> + _mesa_problem(NULL, "Invalid type in %s", __func__); >>> + return; >>> + } >>> >>> - uniform = &shProg->Uniforms->Uniforms[loc]; >>> + if (uni->type->is_sampler()) { >>> + components = 1; >>> + } else { >>> + components = uni->type->vector_elements; >>> + } >>> + >>> + bool match; >>> + switch (uni->type->base_type) { >>> + case GLSL_TYPE_BOOL: >>> + match = true; >>> + break; >>> + case GLSL_TYPE_SAMPLER: >>> + match = (basicType == GLSL_TYPE_INT); >>> + break; >>> + default: >>> + match = (basicType == uni->type->base_type); >>> + break; >>> + } >>> + >>> + if (uni->type->is_matrix() || components != src_components || !match) { >>> + _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)"); >>> + return; >>> + } >>> >>> if (ctx->Shader.Flags & GLSL_UNIFORMS) { >>> - const GLenum basicType = base_uniform_type(type); >>> - GLint i; >>> - printf("Mesa: set program %u uniform %s (loc %d) to: ", >>> - shProg->Name, uniform->Name, location); >>> - if (basicType == GL_INT) { >>> - const GLint *v = (const GLint *) values; >>> - for (i = 0; i < count * elems; i++) { >>> - printf("%d ", v[i]); >>> - } >>> - } >>> - else if (basicType == GL_UNSIGNED_INT) { >>> - const GLuint *v = (const GLuint *) values; >>> - for (i = 0; i < count * elems; i++) { >>> - printf("%u ", v[i]); >>> - } >>> - } >>> - else { >>> - const GLfloat *v = (const GLfloat *) values; >>> - assert(basicType == GL_FLOAT); >>> - for (i = 0; i < count * elems; i++) { >>> - printf("%g ", v[i]); >>> + log_uniform(values, basicType, components, 1, count, >>> + false, shProg, location, uni); >>> + } >>> + >>> + /* Validate the texture unit setting. >>> + */ >>> + /* FINISHME: I cannot find any justification for this in the GL spec. >>> + */ >>> + if (uni->type->is_sampler()) { >>> + for (i = 0; i < count; i++) { >>> + const unsigned texUnit = ((unsigned *) values)[i]; >>> + >>> + /* check that the sampler (tex unit index) is legal */ >>> + if (texUnit >= ctx->Const.MaxCombinedTextureImageUnits) { >>> + _mesa_error(ctx, GL_INVALID_VALUE, >>> + "glUniform1i(invalid sampler/tex unit index for " >>> + "uniform %d)", >>> + location); >>> + return; >>> } >>> } >>> - printf("\n"); >>> } >>> >>> - /* A uniform var may be used by both a vertex shader and a fragment >>> - * shader. We may need to update one or both shader's uniform here: >>> + /* Page 82 (page 96 of the PDF) of the OpenGL 2.1 spec says: >>> + * >>> + * "When loading N elements starting at an arbitrary position k in a >>> + * uniform declared as an array, elements k through k + N - 1 in the >>> + * array will be replaced with the new values. Values for any array >>> + * element that exceeds the highest array element index used, as >>> + * reported by GetActiveUniform, will be ignored by the GL." >>> + * >>> + * Clamp 'count' to a valid value. Note that for non-arrays a count > 1 >>> + * will have already generated an error. >>> */ >>> - if (shProg->_LinkedShaders[MESA_SHADER_VERTEX]) { >>> - /* convert uniform location to program parameter index */ >>> - GLint index = uniform->VertPos; >>> - if (index >= 0) { >>> - set_program_uniform(ctx, >>> - >>> shProg->_LinkedShaders[MESA_SHADER_VERTEX]->Program, >>> - index, offset, type, count, elems, values); >>> - } >>> + if (uni->array_elements != 0) { >>> + if (offset >= uni->array_elements) >>> + return; >>> + >>> + count = MIN2(count, (uni->array_elements - offset)); >>> } >>> >>> - if (shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]) { >>> - /* convert uniform location to program parameter index */ >>> - GLint index = uniform->FragPos; >>> - if (index >= 0) { >>> - set_program_uniform(ctx, >>> - >>> shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program, >>> - index, offset, type, count, elems, values); >>> + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); >>> + >>> + /* Store the data in the "actual type" backing storage for the uniform. >>> + */ >>> + if (!uni->type->is_boolean()) { >>> + memcpy(&uni->storage[components * offset], values, >>> + sizeof(uni->storage[0]) * components * count); >>> + } else { >>> + const union gl_constant_value *src = >>> + (const union gl_constant_value *) values; >>> + union gl_constant_value *dst = &uni->storage[components * offset]; >>> + const unsigned elems = components * count; >>> + >>> + for (i = 0; i < elems; i++) { >>> + if (basicType == GLSL_TYPE_FLOAT) { >>> + dst[i].i = src[i].f != 0.0f ? 1 : 0; >>> + } else { >>> + dst[i].i = src[i].i != 0 ? 1 : 0; >>> + } >>> } >>> } >>> >>> - if (shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]) { >>> - /* convert uniform location to program parameter index */ >>> - GLint index = uniform->GeomPos; >>> - if (index >= 0) { >>> - set_program_uniform(ctx, >>> - >>> shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program, >>> - index, offset, type, count, elems, values); >>> + uni->initialized = true; >>> + uni->dirty = true; >>> + >>> + _mesa_propagate_uniforms_to_driver_storage(uni, offset, count); >>> + >>> + /* If the uniform is a sampler, do the extra magic necessary to >>> propagate >>> + * the changes through. >>> + */ >>> + if (uni->type->is_sampler()) { >>> + for (i = 0; i < count; i++) { >>> + shProg->SamplerUnits[uni->sampler + offset + i] = >>> + ((unsigned *) values)[i]; >>> } >>> - } >>> >>> - uniform->Initialized = GL_TRUE; >>> + bool flushed = false; >>> + for (i = 0; i < MESA_SHADER_TYPES; i++) { >>> + struct gl_program *prog; >>> + >>> + if (shProg->_LinkedShaders[i] == NULL) >>> + continue; >>> + >>> + prog = shProg->_LinkedShaders[i]->Program; >>> + >>> + assert(sizeof(prog->SamplerUnits) == sizeof(shProg->SamplerUnits)); >>> + >>> + if (memcmp(prog->SamplerUnits, >>> + shProg->SamplerUnits, >>> + sizeof(shProg->SamplerUnits)) != 0) { >>> + if (!flushed) { >>> + FLUSH_VERTICES(ctx, _NEW_TEXTURE | _NEW_PROGRAM); >>> + flushed = true; >>> + } >>> + >>> + memcpy(prog->SamplerUnits, >>> + shProg->SamplerUnits, >>> + sizeof(shProg->SamplerUnits)); >>> + >>> + _mesa_update_shader_textures_used(prog); >>> + (void) ctx->Driver.ProgramStringNotify(ctx, prog->Target, prog); >> >> Hi Ian, >> >> This ProgramStringNotify call causes excessive shader recompilations >> in the game Cogs (from some Humble Bundle). Approximately 25% of >> in-game CPU time is spent on translating shaders and register >> allocation. If the shaders were larger, it would be much worse. >> Moreover, I have realized calling ProgramStringNotify is not needed >> for st/mesa in this case at all (I just comment it out if I want >> higher frame rates), because it doesn't use the prog->SamplerUnits >> array when translating shaders. The array is only used when we set >> samplers, so setting _NEW_TEXTURE is sufficient. It seems that i965 >> could do the same thing. Another option would be to cache precompiled >> shaders, but that seems more complicated than doing what st/mesa does. >> >> Marek > > Yeah, I noticed that a while back too, but we never arrived at a > solution and it slipped off my radar. I agree completely, > ProgramStringNotify is completely overkill and bogus. > > We actually already rely on _NEW_TEXTURE for all shader programs. Since > the uniform code already flags _NEW_TEXTURE, we should be able to just > add the SamplerUnits array to the program key (specifically, > brw_sampler_prog_key_data), and that should take care of any state > dependent recompiles. Then we wouldn't need ProgramStringNotify at all. > > Right?
Yeah, that could work. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev