Hi guys,
On 21.12.2011 17:27, Maarten Lankhorst wrote:
Also a remark about your patch, wish you had inlined it..
Sorry, not at home right now and I can't get this Thunderbird here to
inline the patches correctly.
diff --git a/src/gallium/auxiliary/vl/vl_vlc.h
b/src/gallium/auxiliary/vl/vl_vlc.h
index dc4faed..f961b8b 100644
--- a/src/gallium/auxiliary/vl/vl_vlc.h
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -115,22 +164,24 @@ vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data,
unsigned len)
static INLINE unsigned
vl_vlc_bits_left(struct vl_vlc *vlc)
{
+ unsigned i;
signed bytes_left = ((uint8_t*)vlc->end)-((uint8_t*)vlc->data);
+ for (i = 0; i< vlc->num_inputs; ++i)
+ bytes_left += vlc->sizes[i];
Calculating this more than once is wasteful, shouldn't this be done once in
init?
Good idea, but doesn't make so much of a difference, since
vl_vlc_bits_left is only called once per macroblock.
return bytes_left * 8 + vlc->valid_bits;
}
....
@@ -86,28 +123,40 @@ vl_vlc_fillbits(struct vl_vlc *vlc)
vlc->buffer |= (uint64_t)value<< (32 - vlc->valid_bits);
++vlc->data;
vlc->valid_bits += 32;
+
+ /* if this input is depleted, go to next one */
+ if (vlc->data>= vlc->end&& vlc->num_inputs) {
+ unsigned bytes_overdue = ((uint8_t*)vlc->data)-((uint8_t*)vlc->end);
+
+ /* adjust valid bits */
+ vlc->valid_bits -= bytes_overdue * 8;
+
+ /* clear not valid bits */
+ vlc->buffer&= ~((1LL<< (64 - vlc->valid_bits)) - 1);
+
+ /* go on to next input */
+ vl_vlc_next_input(vlc);
+
+ /* and retry to fill the buffer */
+ vl_vlc_fillbits(vlc);
Is this recursion necessary? Can't you simply change the if to a while?
Also if you are evil and put in a zero-sized buffer, this code will not work,
so the if checks needs to be reworked. Same problem with vl_vlc_next_input and
unaligned pointers, you can cause it to crash with alignment 4n+1 and len<= 2.
Yeah, all the corner cases like len < 4, badly aligned start and end of
buffer etc where not really covered by the last version of the patch.
Take a look at the attached one, does it now cover all cases?
On 21.12.2011 17:24, Younes Manton wrote:
2011/12/21 Maarten Lankhorst<m.b.lankho...@gmail.com>:
Hey Christian,
On 12/21/2011 04:41 PM, Christian König wrote:
Those functions are called a couple of million times a second, so even if the
assertion is only tested in debug builds it has quite an effect on performance,
sometimes even masquerading real performance problems. So my practice was to
only uncomment them when I work on that part of the code. I don't want to
change the Assert semantics in any way, just a simple define enabling certain
assertions only under certain conditions should be sufficient.
I think it makes sense to have the debug builds enabling those asserts by
default.
You could always do this in vl_mpeg12_bitstream.c when testing:
#include "u_debug.h"
#undef assert
#include "vl_vlc.h"
#define assert debug_assert
On a related note, why is the vl code using assert.h directly instead of
u_debug.h ?
~Maarten
u_debug.h didn't always have assert wrappers.
I wasn't even aware that gallium has its own assert implementation in
u_debug.h, cause a whole bunch of state trackers and drivers are using
assert.h instead.
Anyway, I have no idea what I tried the last time to make it perform so
awfuly that I need to comment it out. So I think just leaving the
assertions enabled for now should be ok. As Maarten pointed out
disabling it is just a two liner.
Christian.
>From 740ce9bf4e4703f143c1fe29b8d62c7d237b7f9a Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <m.b.lankho...@gmail.com>
Date: Tue, 20 Dec 2011 12:43:23 +0100
Subject: [PATCH] vl: Only initialize vlc once
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
And add more sanity checks to stream. This shouldn't
break things beyond those that aren't broken already.
v2: also implement multiple inputs for the vlc functions
v3: some bug fixes for buffer size and alignment corner cases
Signed-off-by: Maarten Lankhorst <m.b.lankho...@gmail.com>
Signed-off-by: Christian König <deathsim...@vodafone.de>
---
src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 50 ++++------
src/gallium/auxiliary/vl/vl_vlc.h | 123 ++++++++++++++++++-----
2 files changed, 117 insertions(+), 56 deletions(-)
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 936cf2c..cf75000 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -786,7 +786,7 @@ entry:
}
}
-static INLINE bool
+static INLINE void
decode_slice(struct vl_mpg12_bs *bs)
{
struct pipe_mpeg12_macroblock mb;
@@ -800,20 +800,25 @@ decode_slice(struct vl_mpg12_bs *bs)
mb.blocks = dct_blocks;
reset_predictor(bs);
+ vl_vlc_fillbits(&bs->vlc);
dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)];
- if (vl_vlc_get_uimsbf(&bs->vlc, 1))
+ if (vl_vlc_get_uimsbf(&bs->vlc, 1)) {
+ vl_vlc_fillbits(&bs->vlc);
while (vl_vlc_get_uimsbf(&bs->vlc, 9) & 1)
vl_vlc_fillbits(&bs->vlc);
+ }
vl_vlc_fillbits(&bs->vlc);
+ assert(vl_vlc_bits_left(&bs->vlc) > 23 && vl_vlc_peekbits(&bs->vlc, 23));
do {
int inc = 0;
- while (vl_vlc_peekbits(&bs->vlc, 11) == 15) {
- vl_vlc_eatbits(&bs->vlc, 11);
- vl_vlc_fillbits(&bs->vlc);
- }
+ if (bs->decoder->profile == PIPE_VIDEO_PROFILE_MPEG1)
+ while (vl_vlc_peekbits(&bs->vlc, 11) == 15) {
+ vl_vlc_eatbits(&bs->vlc, 11);
+ vl_vlc_fillbits(&bs->vlc);
+ }
while (vl_vlc_peekbits(&bs->vlc, 11) == 8) {
vl_vlc_eatbits(&bs->vlc, 11);
@@ -928,7 +933,6 @@ decode_slice(struct vl_mpg12_bs *bs)
mb.num_skipped_macroblocks = 0;
bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1);
- return true;
}
void
@@ -959,32 +963,20 @@ void
vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer)
{
assert(bs);
- assert(buffer && num_bytes);
- while(num_bytes > 2) {
- if (buffer[0] == 0x00 && buffer[1] == 0x00 && buffer[2] == 0x01 &&
- buffer[3] >= 0x01 && buffer[3] < 0xAF) {
- unsigned consumed;
+ vl_vlc_init(&bs->vlc, 1, (const void * const *)&buffer, &num_bytes);
+ while (vl_vlc_bits_left(&bs->vlc) > 32) {
+ uint32_t code = vl_vlc_peekbits(&bs->vlc, 32);
- buffer += 3;
- num_bytes -= 3;
-
- vl_vlc_init(&bs->vlc, buffer, num_bytes);
-
- if (!decode_slice(bs))
- return;
-
- consumed = num_bytes - vl_vlc_bits_left(&bs->vlc) / 8;
-
- /* crap, this is a bug we have consumed more bytes than left in the buffer */
- assert(consumed <= num_bytes);
-
- num_bytes -= consumed;
- buffer += consumed;
+ if (code >= 0x101 && code <= 0x1AF) {
+ vl_vlc_eatbits(&bs->vlc, 24);
+ decode_slice(bs);
+ vl_vlc_bytealign(&bs->vlc);
} else {
- ++buffer;
- --num_bytes;
+ vl_vlc_eatbits(&bs->vlc, 8);
}
+
+ vl_vlc_fillbits(&bs->vlc);
}
}
diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h
index dc4faed..cf6156c 100644
--- a/src/gallium/auxiliary/vl/vl_vlc.h
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -28,19 +28,23 @@
#ifndef vl_vlc_h
#define vl_vlc_h
-#include <assert.h>
-
#include "pipe/p_compiler.h"
#include "util/u_math.h"
#include "util/u_pointer.h"
+#include "util/u_debug.h"
struct vl_vlc
{
uint64_t buffer;
unsigned valid_bits;
- uint32_t *data;
- uint32_t *end;
+ const uint32_t *data;
+ const uint32_t *end;
+
+ unsigned num_inputs;
+ const void *const *inputs;
+ const unsigned *sizes;
+ unsigned bytes_left;
};
struct vl_vlc_entry
@@ -60,6 +64,9 @@ vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_v
{
unsigned i, bits = util_logbase2(dst_size);
+ assert(dst && dst_size);
+ assert(src && src_size);
+
for (i=0;i<dst_size;++i) {
dst[i].length = 0;
dst[i].value = 0;
@@ -72,42 +79,97 @@ vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_v
}
static INLINE void
+vl_vlc_next_input(struct vl_vlc *vlc)
+{
+ const uint8_t* data = vlc->inputs[0];
+ unsigned len = vlc->sizes[0];
+
+ assert(vlc);
+ assert(vlc->num_inputs);
+
+ vlc->bytes_left -= len;
+
+ /* align the data pointer */
+ while (len && pointer_to_uintptr(data) & 3) {
+ vlc->buffer |= (uint64_t)*data << (56 - vlc->valid_bits);
+ ++data;
+ --len;
+ vlc->valid_bits += 8;
+ }
+ vlc->data = (const uint32_t*)data;
+ vlc->end = (const uint32_t*)(data + len);
+
+ --vlc->num_inputs;
+ ++vlc->inputs;
+ ++vlc->sizes;
+}
+
+static INLINE void
vl_vlc_fillbits(struct vl_vlc *vlc)
{
- if (vlc->valid_bits < 32) {
- uint32_t value = *vlc->data;
+ assert(vlc);
- //assert(vlc->data <= vlc->end);
+ while (vlc->valid_bits < 32) {
+ /* if this input is depleted */
+ while (vlc->data >= vlc->end) {
+
+ if (vlc->num_inputs)
+ /* go on to next input */
+ vl_vlc_next_input(vlc);
+ else
+ /* or give up since we don't have anymore inputs */
+ return;
+ }
+
+ if (vlc->valid_bits < 32) {
+ /* we now have input, but still not enough bits in buffer */
+ uint32_t value = *vlc->data;
#ifndef PIPE_ARCH_BIG_ENDIAN
- value = util_bswap32(value);
+ value = util_bswap32(value);
#endif
- vlc->buffer |= (uint64_t)value << (32 - vlc->valid_bits);
- ++vlc->data;
- vlc->valid_bits += 32;
+ vlc->buffer |= (uint64_t)value << (32 - vlc->valid_bits);
+ ++vlc->data;
+ vlc->valid_bits += 32;
+
+ /* gone over the end of the buffer? */
+ if (vlc->data > vlc->end) {
+ unsigned bytes_overdue = ((uint8_t*)vlc->data)-((uint8_t*)vlc->end);
+
+ /* adjust valid bits */
+ vlc->valid_bits -= bytes_overdue * 8;
+
+ /* clear not valid bits */
+ vlc->buffer &= ~((1LL << (64 - vlc->valid_bits)) - 1);
+
+ /* set to buffers end */
+ vlc->data = vlc->end;
+ }
+ }
}
}
static INLINE void
-vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len)
+vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs,
+ const void *const *inputs, const unsigned *sizes)
{
+ unsigned i;
+
assert(vlc);
- assert(data && len);
+ assert(num_inputs);
vlc->buffer = 0;
vlc->valid_bits = 0;
+ vlc->num_inputs = num_inputs;
+ vlc->inputs = inputs;
+ vlc->sizes = sizes;
+ vlc->bytes_left = 0;
- /* align the data pointer */
- while (pointer_to_uintptr(data) & 3) {
- vlc->buffer |= (uint64_t)*data << (56 - vlc->valid_bits);
- ++data;
- --len;
- vlc->valid_bits += 8;
- }
- vlc->data = (uint32_t*)data;
- vlc->end = (uint32_t*)(data + len);
+ for (i = 0; i < num_inputs; ++i)
+ vlc->bytes_left += sizes[i];
+ vl_vlc_next_input(vlc);
vl_vlc_fillbits(vlc);
vl_vlc_fillbits(vlc);
}
@@ -116,21 +178,21 @@ static INLINE unsigned
vl_vlc_bits_left(struct vl_vlc *vlc)
{
signed bytes_left = ((uint8_t*)vlc->end)-((uint8_t*)vlc->data);
+ bytes_left += vlc->bytes_left;
return bytes_left * 8 + vlc->valid_bits;
}
static INLINE unsigned
vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
{
- //assert(vlc->valid_bits >= num_bits);
-
+ assert(vlc->valid_bits >= num_bits || vlc->data >= vlc->end);
return vlc->buffer >> (64 - num_bits);
}
static INLINE void
vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
{
- //assert(vlc->valid_bits > num_bits);
+ assert(vlc->valid_bits >= num_bits);
vlc->buffer <<= num_bits;
vlc->valid_bits -= num_bits;
@@ -141,7 +203,7 @@ vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
{
unsigned value;
- //assert(vlc->valid_bits >= num_bits);
+ assert(vlc->valid_bits >= num_bits);
value = vlc->buffer >> (64 - num_bits);
vl_vlc_eatbits(vlc, num_bits);
@@ -154,7 +216,7 @@ vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
{
signed value;
- //assert(vlc->valid_bits >= num_bits);
+ assert(vlc->valid_bits >= num_bits);
value = ((int64_t)vlc->buffer) >> (64 - num_bits);
vl_vlc_eatbits(vlc, num_bits);
@@ -167,7 +229,14 @@ vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned n
{
tbl += vl_vlc_peekbits(vlc, num_bits);
vl_vlc_eatbits(vlc, tbl->length);
+ assert(tbl->length);
return tbl->value;
}
+static INLINE void
+vl_vlc_bytealign(struct vl_vlc *vlc)
+{
+ vl_vlc_eatbits(vlc, vlc->valid_bits & 7);
+}
+
#endif /* vl_vlc_h */
--
1.7.5.4
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev