Ian Romanick <i...@freedesktop.org> writes: > On 04/28/2016 09:46 PM, Francisco Jerez wrote: >> Matt Turner <matts...@gmail.com> writes: >> >>> On Thu, Apr 28, 2016 at 12:19 AM, Francisco Jerez <curroje...@riseup.net> >>> wrote: >>>> --- >>>> src/mesa/drivers/dri/i965/brw_disasm.c | 90 >>>> ++++++++++++++++++++++++++-------- >>>> 1 file changed, 69 insertions(+), 21 deletions(-) >>>> >>>> diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c >>>> b/src/mesa/drivers/dri/i965/brw_disasm.c >>>> index 15d9383..0125434 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_disasm.c >>>> +++ b/src/mesa/drivers/dri/i965/brw_disasm.c >>>> @@ -30,9 +30,8 @@ >>>> #include "brw_inst.h" >>>> #include "brw_eu.h" >>>> >>>> -static const struct opcode_desc opcode_descs[128] = { >>>> +static const struct opcode_desc gen4_opcode_descs[128] = { >>>> [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, >>>> - [BRW_OPCODE_MOVI] = { .name = "movi", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, >>>> [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, >>>> [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, >>>> @@ -40,27 +39,17 @@ static const struct opcode_desc opcode_descs[128] = { >>>> [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, >>>> [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, >>>> [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, >>>> - [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 }, >>>> - [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 }, >>>> - [BRW_OPCODE_BFREV] = { .name = "bfrev", .nsrc = 1, .ndst = 1 }, >>>> - [BRW_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 }, >>>> - [BRW_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 }, >>>> - [BRW_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 }, >>>> >>>> [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, >>>> - [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, >>>> - [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 }, >>>> - [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 }, >>>> [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, >>>> - [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, >>>> >>>> [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, >>>> @@ -73,17 +62,9 @@ static const struct opcode_desc opcode_descs[128] = { >>>> [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, >>>> - [BRW_OPCODE_CSEL] = { .name = "csel", .nsrc = 3, .ndst = 1 }, >>>> - [BRW_OPCODE_BFE] = { .name = "bfe", .nsrc = 3, .ndst = 1 }, >>>> - [BRW_OPCODE_BFI1] = { .name = "bfi1", .nsrc = 2, .ndst = 1 }, >>>> - [BRW_OPCODE_BFI2] = { .name = "bfi2", .nsrc = 3, .ndst = 1 }, >>>> - [BRW_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 }, >>>> - [BRW_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 }, >>>> >>>> [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, >>>> [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, >>>> - [BRW_OPCODE_SENDS] = { .name = "sends", .nsrc = 2, .ndst = 1 }, >>>> - [BRW_OPCODE_SENDSC] = { .name = "sendsc", .nsrc = 2, .ndst = 1 }, >>>> [BRW_OPCODE_ILLEGAL] = { .name = "illegal", .nsrc = 0, .ndst = 0 }, >>>> [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, >>>> [BRW_OPCODE_NENOP] = { .name = "nenop", .nsrc = 0, .ndst = 0 }, >>>> @@ -104,6 +85,70 @@ static const struct opcode_desc opcode_descs[128] = { >>>> [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 0, .ndst = 0 }, >>>> }; >>>> >>>> +static const struct opcode_desc g45_opcode_descs[128] = { >>>> + [BRW_OPCODE_MOVI] = { .name = "movi", .nsrc = 2, .ndst = 1 }, >>>> + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, >>>> +}; >>>> + >>>> +static const struct opcode_desc gen6_opcode_descs[128] = { >>>> + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, >>>> + [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 }, >>>> + [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 }, >>>> +}; >>>> + >>>> +static const struct opcode_desc gen7_opcode_descs[128] = { >>>> + [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 }, >>>> + [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 }, >>>> + [BRW_OPCODE_BFREV] = { .name = "bfrev", .nsrc = 1, .ndst = 1 }, >>>> + [BRW_OPCODE_BFE] = { .name = "bfe", .nsrc = 3, .ndst = 1 }, >>>> + [BRW_OPCODE_BFI1] = { .name = "bfi1", .nsrc = 2, .ndst = 1 }, >>>> + [BRW_OPCODE_BFI2] = { .name = "bfi2", .nsrc = 3, .ndst = 1 }, >>>> + [BRW_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 }, >>>> + [BRW_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 }, >>>> + [BRW_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 }, >>>> + [BRW_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 }, >>>> + [BRW_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 }, >>>> +}; >>>> + >>>> +static const struct opcode_desc gen8_opcode_descs[128] = { >>>> + [BRW_OPCODE_CSEL] = { .name = "csel", .nsrc = 3, .ndst = 1 }, >>>> +}; >>>> + >>>> +static const struct opcode_desc gen9_opcode_descs[128] = { >>>> + [BRW_OPCODE_SENDS] = { .name = "sends", .nsrc = 2, .ndst = 1 }, >>>> + [BRW_OPCODE_SENDSC] = { .name = "sendsc", .nsrc = 2, .ndst = 1 }, >>> >>> A 128*16-byte array for each differing generation seems really bad to >>> me, especially when they add 1-3 opcodes. A half a page for... one >>> opcode. >>> >> No, it's not, an array bounded by 128*16B for each major hardware >> generation is a tiny amount of memory by (even not so) modern standards. >> I have the suspicion you're trying to optimize prematurely, or do you >> have any evidence that your suggestion can substantially improve >> performance? > > I don't think performance is the issue. I think the issue is storage > space. How does the output of 'size i965_dri.so' change from the > beginning to the end of the series? > No surprises there... Comparing two local release builds this patch increases the size of the i965_dri.so binary by roughly 11kB or 0.04% of the size of the driver...
>>> Your idea to order the array by opcode (but not index by opcode) and >>> then by generation seems more appealing to me than this. Especially >>> since you could make an initial guess that the opcode you want lives >>> at opcode_desc[opcode] and start your (linear) search from there. >>> >>> For the common case (accessing an opcode that always exists) I think >>> that's actually fewer operations than searching five opcode_descs >>> arrays before finding it. >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev