Hi guys, With some help from the nouveau team I managed to get video acceleration working for my nv96 card. The video buffer api works well enough for nouveau, I added flags to vl_video_buffer_create_ex so I could force a linear surface with a nouveau specific resource flag, which I only specified when hardware that potentially supported hardware decoding was found. With the video buffer API, I only needed to specify that and I could get it to work. This made it easy for me, I only had to write code to talk to the decoder.
The api for implementing the decoder I'm less happy about. I know this is because there is no real support yet for other decoders, but I think pipe_video_decode_buffer api is wrong right now. It assumes that the state tracker knows enough about how the decoder wants to interpret the macroblocks. The nouveau hardware decoder has to interpret it in it's own way, so that makes it need a different api. I think the best thing would be to pass information about the macroblock with a pointer to the data blocks, and then let the decoder buffer decide how to interpret it. Also is it the intention to only start decoding when XvMCPutSurface is called? If the reference surfaces are passed, I can start decoding in XvMCRenderSurface. I'd also like it if flush_buffer is removed, and instead the video buffers are passed to end_frame. Some of the methods to pipe_video_buffer also appear to be g3dvl specific, so could it be split out? I was thinking of something like this for pipe_video_decode_buffer, with flush_buffer in the decoder gone: struct pipe_video_decode_buffer { struct pipe_video_decoder *decoder; /* Should not leak even when begin_frame was called */ void (*destroy)(struct pipe_video_decode_buffer *decbuf); void (*begin_frame)(struct pipe_video_decode_buffer *decbuf); /* *ONLY* called on bitstream acceleration, makes no sense to * call for XvMC, this allows it to be set to NULL */ void (*set_quant_matrix)(struct pipe_video_decode_buffer *decbuf, const uint8_t intra_matrix[64], const uint8_t non_intra_matrix[64]); /* Same story here */ void (*decode_bitstream)(struct pipe_video_decode_buffer *decbuf, unsigned num_bytes, const void *data, struct pipe_picture_desc *picture, unsigned num_ycbcr_blocks[3]); /* Can be NULL when bitstream acceleration is used. * Append a single macroblock to the list for decoding */ void (*decode_macroblock)(struct pipe_video_decode_buffer *decbuf, struct pipe_video_macroblock *mb, short *datablocks); /* If end frame is not set, it means more macroblocks may be * queued after this, and this is just an intermediate render, * if its beneficial to do so. Otherwise just return without * doing anything. */ void (*render_frame)(struct pipe_video_decode_buffer *decbuf, struct pipe_video_buffer *frames[3], bool end_frame); }; Comments are welcome. The functions I removed should probably just be moved to a g3dvl specific struct vl_mpeg12_video_decode_buffer. If you feel like testing xvmc with a capable card, I put my tree at http://repo.or.cz/w/mesa/nouveau-pmpeg.git . I attached 2 patches, 1 is to clean up xvmc/test_rendering.c, the other allows me to specify a custom flag to force a linear surface. Should be mergeable right now. Special thanks to calim and mwk for their patience and help and to jb17bsome for the original code which I based this on, even though this code is significantly different from the original. :) Cheers, Maarten
>From 254b659ad0f36039687e01c5ab37d9e2e4804ad7 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst <m.b.lankho...@gmail.com> Date: Fri, 29 Jul 2011 13:26:45 +0200 Subject: [PATCH 2/7] xvmc tests: Clean up test_rendering slightly --- .../xorg/xvmc/tests/test_rendering.c | 38 ++++++++++++------- 1 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c index 6058783..94ac7d4 100644 --- a/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c +++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c @@ -40,8 +40,8 @@ #define MACROBLOCK_HEIGHT_IN_BLOCKS (MACROBLOCK_HEIGHT / BLOCK_HEIGHT) #define BLOCKS_PER_MACROBLOCK 6 -#define INPUT_WIDTH 16 -#define INPUT_HEIGHT 16 +#define INPUT_WIDTH 64 +#define INPUT_HEIGHT 64 #define INPUT_WIDTH_IN_MACROBLOCKS (INPUT_WIDTH / MACROBLOCK_WIDTH) #define INPUT_HEIGHT_IN_MACROBLOCKS (INPUT_HEIGHT / MACROBLOCK_HEIGHT) #define NUM_MACROBLOCKS (INPUT_WIDTH_IN_MACROBLOCKS * INPUT_HEIGHT_IN_MACROBLOCKS) @@ -51,7 +51,6 @@ #define DEFAULT_ACCEPTABLE_ERR 0.01 void ParseArgs(int argc, char **argv, unsigned int *output_width, unsigned int *output_height, double *acceptable_error, int *prompt); -void Gradient(short *block, unsigned int start, unsigned int stop, int horizontal); void ParseArgs(int argc, char **argv, unsigned int *output_width, unsigned int *output_height, double *acceptable_error, int *prompt) { @@ -59,7 +58,7 @@ void ParseArgs(int argc, char **argv, unsigned int *output_width, unsigned int * int i; *output_width = DEFAULT_OUTPUT_WIDTH; - *output_height = DEFAULT_OUTPUT_WIDTH; + *output_height = DEFAULT_OUTPUT_HEIGHT; *acceptable_error = DEFAULT_ACCEPTABLE_ERR; *prompt = 1; @@ -101,7 +100,7 @@ void ParseArgs(int argc, char **argv, unsigned int *output_width, unsigned int * ); } -void Gradient(short *block, unsigned int start, unsigned int stop, int horizontal) +static void Gradient(short *block, unsigned int start, unsigned int stop, int horizontal, unsigned int intra_unsigned) { unsigned int x, y; unsigned int range = stop - start; @@ -109,14 +108,22 @@ void Gradient(short *block, unsigned int start, unsigned int stop, int horizonta if (horizontal) { for (y = 0; y < BLOCK_HEIGHT; ++y) - for (x = 0; x < BLOCK_WIDTH; ++x) - block[y * BLOCK_WIDTH + x] = (short)(start + range * (x / (float)(BLOCK_WIDTH - 1))); + for (x = 0; x < BLOCK_WIDTH; ++x) { + *block = (short)(start + range * (x / (float)(BLOCK_WIDTH - 1))); + if (intra_unsigned) + *block += 1 << 10; + block++; + } } else { for (y = 0; y < BLOCK_HEIGHT; ++y) - for (x = 0; x < BLOCK_WIDTH; ++x) - block[y * BLOCK_WIDTH + x] = (short)(start + range * (y / (float)(BLOCK_HEIGHT - 1))); + for (x = 0; x < BLOCK_WIDTH; ++x) { + *block = (short)(start + range * (y / (float)(BLOCK_WIDTH - 1))); + if (intra_unsigned) + *block += 1 << 10; + block++; + } } } @@ -128,7 +135,7 @@ int main(int argc, char **argv) int prompt; Display *display; Window root, window; - const unsigned int mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2}; + const unsigned int mc_types[] = {XVMC_MOCOMP | XVMC_MPEG_2}; XvPortID port_num; int surface_type_id; unsigned int is_overlay, intra_unsigned; @@ -153,7 +160,7 @@ int main(int argc, char **argv) INPUT_HEIGHT, XVMC_CHROMA_FORMAT_420, mc_types, - 2, + sizeof(mc_types)/sizeof(*mc_types), &port_num, &surface_type_id, &is_overlay, @@ -213,7 +220,8 @@ int main(int argc, char **argv) blocks, (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH) / (float)(INPUT_WIDTH - 1))), (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH + BLOCK_WIDTH - 1) / (float)(INPUT_WIDTH - 1))), - 1 + 1, + intra_unsigned ); blocks += BLOCK_SIZE; @@ -229,7 +237,8 @@ int main(int argc, char **argv) blocks, (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH) / (float)(INPUT_WIDTH - 1))), (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH + BLOCK_WIDTH - 1) / (float)(INPUT_WIDTH - 1))), - 1 + 1, + intra_unsigned ); blocks += BLOCK_SIZE; @@ -239,7 +248,8 @@ int main(int argc, char **argv) blocks, (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH) / (float)(INPUT_WIDTH - 1))), (short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH + BLOCK_WIDTH - 1) / (float)(INPUT_WIDTH - 1))), - 1 + 1, + intra_unsigned ); blocks += BLOCK_SIZE; -- 1.7.6
>From eefb2ada3902cb4198b8f57acd57e2ec42402499 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst <m.b.lankho...@gmail.com> Date: Fri, 29 Jul 2011 13:30:45 +0200 Subject: [PATCH 3/7] g3dvl: Add resource flags to vl_video_buffer_create_ex Needed to make a linear texture for nouveau vpe acceleration --- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 8 ++++---- src/gallium/auxiliary/vl/vl_video_buffer.c | 5 +++-- src/gallium/auxiliary/vl/vl_video_buffer.h | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 61d947c..0aa02f7 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -97,7 +97,7 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer) dec->base.context, dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT, align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line, - 1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC + 1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC, 0 ); if (!buffer->zscan_source) @@ -737,7 +737,7 @@ init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_conf dec->idct_source = vl_video_buffer_create_ex ( dec->base.context, dec->base.width / 4, dec->base.height, 1, - dec->base.chroma_format, formats, PIPE_USAGE_STATIC + dec->base.chroma_format, formats, PIPE_USAGE_STATIC, 0 ); if (!dec->idct_source) @@ -748,7 +748,7 @@ init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_conf ( dec->base.context, dec->base.width / nr_of_idct_render_targets, dec->base.height / 4, nr_of_idct_render_targets, - dec->base.chroma_format, formats, PIPE_USAGE_STATIC + dec->base.chroma_format, formats, PIPE_USAGE_STATIC, 0 ); if (!dec->mc_source) @@ -794,7 +794,7 @@ init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, const struct format_ dec->mc_source = vl_video_buffer_create_ex ( dec->base.context, dec->base.width, dec->base.height, 1, - dec->base.chroma_format, formats, PIPE_USAGE_STATIC + dec->base.chroma_format, formats, PIPE_USAGE_STATIC, 0 ); return dec->mc_source != NULL; diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c index 4d8b664..f4e2d75 100644 --- a/src/gallium/auxiliary/vl/vl_video_buffer.c +++ b/src/gallium/auxiliary/vl/vl_video_buffer.c @@ -253,7 +253,7 @@ vl_video_buffer_create(struct pipe_context *pipe, result = vl_video_buffer_create_ex ( pipe, buffer_width, buffer_height, 1, - chroma_format, resource_formats, PIPE_USAGE_STATIC + chroma_format, resource_formats, PIPE_USAGE_STATIC, 0 ); if (result) result->buffer_format = buffer_format; @@ -266,7 +266,7 @@ vl_video_buffer_create_ex(struct pipe_context *pipe, unsigned width, unsigned height, unsigned depth, enum pipe_video_chroma_format chroma_format, const enum pipe_format resource_formats[VL_MAX_PLANES], - unsigned usage) + unsigned usage, unsigned resource_flags) { struct vl_video_buffer *buffer; struct pipe_resource templ; @@ -295,6 +295,7 @@ vl_video_buffer_create_ex(struct pipe_context *pipe, templ.array_size = 1; templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; templ.usage = usage; + templ.flags = resource_flags; buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ); if (!buffer->resources[0]) diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h index 291d15c..ec5a5b9 100644 --- a/src/gallium/auxiliary/vl/vl_video_buffer.h +++ b/src/gallium/auxiliary/vl/vl_video_buffer.h @@ -86,6 +86,6 @@ vl_video_buffer_create_ex(struct pipe_context *pipe, unsigned width, unsigned height, unsigned depth, enum pipe_video_chroma_format chroma_format, const enum pipe_format resource_formats[VL_MAX_PLANES], - unsigned usage); + unsigned usage, unsigned resource_flags); #endif /* vl_ycbcr_buffer_h */ -- 1.7.6
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev