Attached an updated patch...

On Thu, 2009-12-10 at 19:52 +0000, Andre Draszik wrote:
> Hi,
> 
> Using libjpeg's raw decode facility, we can make it output planar YUV
> 4:4:4 (the new DSPF_YUV444P) or 4:2:0 (DSPF_I420) and then use the
> graphics card to do the YUV->RGB conversion.
> 
> This will greatly increase speed when this type of acceleration is
> available in the gfxdriver.
> 
> 
> Andre'
> 

>From a6558a896ac28675e593487cf26d20fb74067a67 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Andr=C3=A9=20Draszik?= <andre.dras...@st.com>
Date: Thu, 3 Dec 2009 04:34:56 +0000
Subject: [PATCH 1/1] jpeg: implement raw libjpeg decode for possible HW acceleration
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit

using libjpeg's raw decode facility, we can make it output planar YUV 4:4:4
or 4:2:0 and then use the graphics card to do the YUV->RGB conversion.

This will greatly increase speed when this type of acceleration is available in the
gfxdriver.

Signed-off-by: André Draszik <andre.dras...@st.com>
---
 .../idirectfbimageprovider_jpeg.c                  |  346 +++++++++++++++++++-
 1 files changed, 328 insertions(+), 18 deletions(-)

diff --git a/interfaces/IDirectFBImageProvider/idirectfbimageprovider_jpeg.c b/interfaces/IDirectFBImageProvider/idirectfbimageprovider_jpeg.c
index 205ceca..80fa794 100644
--- a/interfaces/IDirectFBImageProvider/idirectfbimageprovider_jpeg.c
+++ b/interfaces/IDirectFBImageProvider/idirectfbimageprovider_jpeg.c
@@ -72,6 +72,8 @@ Construct( IDirectFBImageProvider *thiz,
 
 DIRECT_INTERFACE_IMPLEMENTATION( IDirectFBImageProvider, JPEG )
 
+D_DEBUG_DOMAIN( JPEG, "image/jpeg", "ImageProvider: JPEG");
+
 /*
  * private data struct of IDirectFBImageProvider_JPEG
  */
@@ -91,6 +93,8 @@ typedef struct {
      int                  image_height; /*  height of image data  */
 
      CoreDFB             *core;
+
+     CoreSurface         *decode_surface;
 } IDirectFBImageProvider_JPEG_data;
 
 static DirectResult
@@ -368,6 +372,9 @@ IDirectFBImageProvider_JPEG_Destruct( IDirectFBImageProvider *thiz )
      if (data->image)
           D_FREE( data->image );
 
+     if (data->decode_surface)
+          dfb_surface_unref( data->decode_surface );
+
      DIRECT_DEALLOCATE_INTERFACE( thiz );
 }
 
@@ -393,6 +400,20 @@ IDirectFBImageProvider_JPEG_Release( IDirectFBImageProvider *thiz )
      return DFB_OK;
 }
 
+static void
+JPEG_stretchblit (CardState    *state,
+                  DFBRectangle *src_rect,
+                  DFBRectangle *dst_rect)
+{
+  D_DEBUG_AT (JPEG, "StretchBlit %dx%d -> %dx%d (%s -> %s)\n",
+              src_rect->w, src_rect->h, dst_rect->w, dst_rect->h,
+              dfb_pixelformat_name (state->source->config.format),
+              dfb_pixelformat_name (state->destination->config.format));
+
+  /* thankfully this is intelligent enough to do a simple blit if possible */
+  dfb_gfxcard_stretchblit (src_rect, dst_rect, state);
+}
+
 static DFBResult
 IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
                                       IDirectFBSurface       *destination,
@@ -405,8 +426,9 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
      DFBSurfacePixelFormat  format;
      IDirectFBSurface_data *dst_data;
      CoreSurface           *dst_surface;
-     CoreSurfaceBufferLock  lock;
+     CoreSurfaceBufferLock  lock = { .pitch = 0 };
      DIRenderCallbackResult cb_result = DIRCR_OK;
+     bool                   try_raw = true;
 
      DIRECT_INTERFACE_GET_DATA(IDirectFBImageProvider_JPEG)
 
@@ -439,10 +461,6 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
           rect = dst_data->area.wanted;
      }
 
-     ret = dfb_surface_lock_buffer( dst_surface, CSBR_BACK, CSAID_CPU, CSAF_WRITE, &lock );
-     if (ret)
-          return ret;
-
      if (data->image &&
          (rect.x || rect.y || rect.w != data->image_width || rect.h != data->image_height)) {
            D_FREE( data->image );
@@ -452,7 +470,8 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
      }
 
      /* actual loading and rendering */
-     if (!data->image) {
+     if (!data->image
+         && !data->decode_surface) {
           struct jpeg_decompress_struct cinfo;
           struct my_error_mgr jerr;
           JSAMPARRAY buffer;      /* Output row buffer */
@@ -464,6 +483,7 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
           cinfo.err = jpeg_std_error(&jerr.pub);
           jerr.pub.error_exit = jpeglib_panic;
 
+          /* we don't get here in case of 'raw decode' errors */
           if (setjmp(jerr.setjmp_buffer)) {
                D_ERROR( "ImageProvider/JPEG: Error during decoding!\n" );
 
@@ -472,7 +492,8 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
                if (data->image) {
                     dfb_scale_linear_32( data->image, data->image_width, data->image_height,
                                          lock.addr, lock.pitch, &rect, dst_surface, &clip );
-                    dfb_surface_unlock_buffer( dst_surface, &lock );
+                    if (lock.pitch)
+                         dfb_surface_unlock_buffer( dst_surface, &lock );
                     if (data->render_callback) {
                          DFBRectangle r = { 0, 0, data->image_width, data->image_height };
 
@@ -482,12 +503,13 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
 
                     return DFB_INCOMPLETE;
                }
-               else
+               else if (lock.pitch)
                     dfb_surface_unlock_buffer( dst_surface, &lock );
 
                return DFB_FAILURE;
           }
 
+restart:
           jpeg_create_decompress(&cinfo);
           jpeg_buffer_src(&cinfo, data->buffer, 0);
           jpeg_read_header(&cinfo, TRUE);
@@ -501,10 +523,7 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
 #endif
           jpeg_calc_output_dimensions(&cinfo);
 
-          if (cinfo.output_width == rect.w && cinfo.output_height == rect.h) {
-               direct = true;
-          }
-          else if (rect.x == 0 && rect.y == 0) {
+          if (rect.x == 0 && rect.y == 0) {
 #if JPEG_LIB_VERSION >= 70
                cinfo.scale_num = 16;
                while (cinfo.scale_num > 1) {
@@ -526,6 +545,267 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
 
           cinfo.output_components = 3;
 
+          data->image_width = cinfo.output_width;
+          data->image_height = cinfo.output_height;
+
+          /* can we do a raw decode? */
+          if (cinfo.num_components == 3
+              && cinfo.jpeg_color_space == JCS_YCbCr
+              && try_raw) {
+               CoreSurfaceConfig config;
+               int pitch_y, pitch_cb, pitch_cr;
+               int offset_cb, offset_cr;
+
+               pitch_cb = (cinfo.cur_comp_info[1]->downsampled_width + 7L) & ~7L;
+               pitch_cr = (cinfo.cur_comp_info[2]->downsampled_width + 7L) & ~7L;
+
+               if (cinfo.max_h_samp_factor == 1
+                   && cinfo.max_v_samp_factor == 1
+                   && cinfo.cur_comp_info[0]->h_samp_factor == 1
+                   && cinfo.cur_comp_info[0]->v_samp_factor == 1
+                   && cinfo.cur_comp_info[1]->h_samp_factor == 1
+                   && cinfo.cur_comp_info[1]->v_samp_factor == 1
+                   && cinfo.cur_comp_info[2]->h_samp_factor == 1
+                   && cinfo.cur_comp_info[2]->v_samp_factor == 1) {
+                    D_DEBUG_AT (JPEG, "doing raw YCbCr 4:4:4 JPEG decode\n");
+                    config.format = DSPF_YUV444P;
+                    config.size.w = (data->image_width + 7L) & ~7L;
+                    config.size.h = (data->image_height + 7L) & ~7L;
+                    pitch_y = (cinfo.cur_comp_info[0]->downsampled_width + 7L) & ~7L;
+                    offset_cb = pitch_y * config.size.h;
+                    offset_cr = pitch_y * config.size.h;
+               }
+               else if (cinfo.max_h_samp_factor == 2
+                   && cinfo.max_v_samp_factor == 2
+                   && cinfo.cur_comp_info[0]->h_samp_factor == 2
+                   && cinfo.cur_comp_info[0]->v_samp_factor == 2
+                   && cinfo.cur_comp_info[1]->h_samp_factor == 1
+                   && cinfo.cur_comp_info[1]->v_samp_factor == 1
+                   && cinfo.cur_comp_info[2]->h_samp_factor == 1
+                   && cinfo.cur_comp_info[2]->v_samp_factor == 1) {
+                    D_DEBUG_AT (JPEG, "doing raw YCbCr 4:2:0 JPEG decode\n");
+                    config.format = DSPF_I420;
+                    config.size.w = (data->image_width + 15L) & ~15L;
+                    config.size.h = (data->image_height + 15L) & ~15L;
+                    pitch_y = (cinfo.cur_comp_info[0]->downsampled_width + 15L) & ~15L;
+                    offset_cb = pitch_y * config.size.h;
+                    offset_cr = pitch_cb * config.size.h/2;
+               }
+#if 0
+               else if (cinfo.max_h_samp_factor == 2
+                   && cinfo.max_v_samp_factor == 1
+                   && cinfo.cur_comp_info[0]->h_samp_factor == 2
+                   && cinfo.cur_comp_info[0]->v_samp_factor == 1
+                   && cinfo.cur_comp_info[1]->h_samp_factor == 1
+                   && cinfo.cur_comp_info[1]->v_samp_factor == 1
+                   && cinfo.cur_comp_info[2]->h_samp_factor == 1
+                   && cinfo.cur_comp_info[2]->v_samp_factor == 1) {
+                    D_DEBUG_AT (JPEG, "doing raw YCbCr 4:2:2 JPEG decode\n");
+                    config.format = DSPF_YUV422P;
+                    config.size.w = (data->image_width + 15L) & ~15L;
+                    config.size.h = (data->image_height + 15L) & ~15L;
+                    pitch_y = (cinfo.cur_comp_info[0]->downsampled_width + 15L) & ~15L;
+                    offset_cb = pitch_y * config.size.h;
+                    offset_cr = pitch_cb * config.size.h/2;
+               }
+#endif
+               else
+                    goto pure_software;
+
+
+               /* yes, we can handle this raw format! */
+               cinfo.raw_data_out = true;
+
+               CoreSurfaceBufferLock lock2;
+
+               D_DEBUG_AT (JPEG, "  -> output w/h: %d/%d "
+                           "downsampled(0,1,2) w/h: %d/%d %d/%d %d/%d "
+                           "pitches y/cb/cr: %d/%d/%d\n",
+                           data->image_width, data->image_height,
+                           cinfo.cur_comp_info[0]->downsampled_width,
+                           cinfo.cur_comp_info[0]->downsampled_height,
+                           cinfo.cur_comp_info[1]->downsampled_width,
+                           cinfo.cur_comp_info[1]->downsampled_height,
+                           cinfo.cur_comp_info[2]->downsampled_width,
+                           cinfo.cur_comp_info[2]->downsampled_height,
+                           pitch_y, pitch_cb, pitch_cr);
+
+               config.flags = CSCONF_SIZE | CSCONF_FORMAT | CSCONF_CAPS;
+               config.caps  = DSCAPS_VIDEOONLY;
+               #ifdef DIRECT_BUILD_DEBUG
+               config.caps |= DSCAPS_SHARED;
+               #endif
+               if (dfb_surface_create (data->core,
+                                       &config,
+                                       CSTF_NONE,
+                                       0,
+                                       NULL,
+                                       &data->decode_surface)) {
+                    D_ERROR ("failed to create temporary decode surface\n");
+                    goto pure_software;
+               }
+
+               ret = dfb_surface_lock_buffer (data->decode_surface,
+                                              CSBR_BACK,
+                                              CSAID_CPU, CSAF_WRITE, &lock2);
+               if (ret) {
+                    dfb_surface_unref (data->decode_surface);
+                    data->decode_surface = NULL;
+                    goto pure_software;
+               }
+
+               /* Worst case, 2x2 chroma subsampling where an MCU is
+                  16 lines. libjpeg won't decode more than one MCU in one
+                  go. */
+#define MAX_MCULINES 16
+               JSAMPROW Yrows[MAX_MCULINES];
+               JSAMPROW Cbrows[MAX_MCULINES];
+               JSAMPROW Crrows[MAX_MCULINES];
+
+               JSAMPARRAY jpeg_buffer[3] = {
+                    [0] = Yrows,
+                    [1] = Cbrows,
+                    [2] = Crrows,
+               };
+
+               /* Initialize the various pointers to build a planar YUV
+                  buffer. */
+               void *yaddr  = lock2.addr;
+               void *cbaddr = yaddr  + offset_cb;
+               void *craddr = cbaddr + offset_cr;
+               int   l;
+
+               for (l = 0; l < MAX_MCULINES; ++l) {
+                    Yrows[l]  = yaddr  + l * pitch_y;
+                    Cbrows[l] = cbaddr + l * pitch_cb;
+                    Crrows[l] = craddr + l * pitch_cr;
+               }
+
+               jpeg_start_decompress (&cinfo);
+
+               /* init a state, so that we can use gfxcard/blit to convert
+                  YUV to requested destination format */
+               CardState state;
+               dfb_state_init (&state, data->core);
+               dfb_state_set_source (&state, data->decode_surface);
+               dfb_state_set_destination (&state, dst_surface);
+               dfb_state_set_clip (&state, &clip);
+
+               while (cinfo.output_scanline < data->image_height
+                      && cb_result == DIRCR_OK) {
+                    int x = jpeg_read_raw_data (&cinfo, jpeg_buffer,
+                                                MAX_MCULINES);
+                    if (x <= 0)
+                         /* Actually, x == 0 means that we don't have enough
+                            data to continue decoding the picture. */
+                         break;
+
+                    D_DEBUG_AT (JPEG, "  -> decoded %d scanlines (out of %d)\n",
+                                cinfo.output_scanline, data->image_height);
+
+                    for (l = 0; l < MAX_MCULINES ; l++) {
+                         Yrows[l]  += ((x * cinfo.cur_comp_info[0]->h_samp_factor) / cinfo.max_h_samp_factor) * pitch_y;
+                         Cbrows[l] += ((x * cinfo.cur_comp_info[1]->h_samp_factor) / cinfo.max_h_samp_factor) * pitch_cb;
+                         Crrows[l] += ((x * cinfo.cur_comp_info[2]->h_samp_factor) / cinfo.max_h_samp_factor) * pitch_cr;
+                    }
+
+                    if (data->render_callback) {
+                         DFBRectangle src_rect = {
+                              .x = 0,
+                              .y = cinfo.output_scanline - x,
+                              .w = data->image_width,
+                              .h = x,
+                         };
+                         DFBRectangle r = src_rect;
+                         float factor = (rect.h
+                                         / (float) data->image_height);
+                         DFBRectangle dst_rect = {
+                              .x = rect.x,
+                              .y = (int) (src_rect.y * factor),
+                              .w = rect.w,
+                              .h = (int) (src_rect.h * factor),
+                         };
+
+                         D_DEBUG_AT (JPEG, "  -> render callback %d,%d %dx%d -> %d,%d %dx%d\n",
+                                     src_rect.x, src_rect.y, src_rect.w,
+                                     src_rect.h, dst_rect.x, dst_rect.y,
+                                     dst_rect.w, dst_rect.h);
+
+                         JPEG_stretchblit (&state, &src_rect, &dst_rect);
+
+                         cb_result = data->render_callback (&r,
+                                                            data->render_callback_context);
+                    }
+               }
+
+               D_DEBUG_AT (JPEG, "  -> decoded %d scanlines (out of %d)\n",
+                           cinfo.output_scanline, data->image_height);
+
+               if (cinfo.output_scanline < data->image_height
+                   || cb_result != DIRCR_OK) {
+                    if (cb_result != DIRCR_OK)
+                         D_ERROR ("raw decode failed after %d of %d scanlines, "
+                                  "trying pure software\n",
+                                  cinfo.output_scanline, cinfo.output_height);
+                    jpeg_abort_decompress (&cinfo);
+                    jpeg_destroy_decompress (&cinfo);
+                    dfb_surface_unlock_buffer (data->decode_surface, &lock2);
+                    dfb_surface_unref (data->decode_surface);
+                    data->decode_surface = NULL;
+
+                    dfb_state_set_source (&state, NULL);
+                    dfb_state_set_destination (&state, NULL);
+                    dfb_state_destroy (&state);
+
+                    if (cb_result != DIRCR_OK)
+                         return DFB_INTERRUPTED;
+
+                    try_raw = false;
+                    goto restart;
+               }
+
+               jpeg_finish_decompress (&cinfo);
+               jpeg_destroy_decompress (&cinfo);
+
+               dfb_surface_unlock_buffer (data->decode_surface, &lock2);
+
+               /* use DFB to convert raw YUV to destination format, and
+                  apply any necessary additional clip/stretch */
+               {
+               DFBRectangle src_rect, r;
+
+               src_rect.x = 0;
+               src_rect.y = 0;
+               src_rect.w = data->image_width;
+               src_rect.h = cinfo.output_height;
+               r = src_rect;
+
+               JPEG_stretchblit (&state, &src_rect, &rect);
+
+               /* remove the state */
+               dfb_state_set_source (&state, NULL);
+               dfb_state_set_destination (&state, NULL);
+               dfb_state_destroy (&state);
+
+               if (data->render_callback)
+                    data->render_callback (&src_rect,
+                                           data->render_callback_context);
+               }
+
+               return DFB_OK;
+          }
+
+pure_software:
+          ret = dfb_surface_lock_buffer( dst_surface, CSBR_BACK, CSAID_CPU, CSAF_WRITE, &lock );
+          if (ret) {
+               jpeg_abort_decompress(&cinfo);
+               jpeg_destroy_decompress(&cinfo);
+               return ret;
+          }
+
+          if (cinfo.output_width == rect.w && cinfo.output_height == rect.h)
+               direct = true;
+
           switch (dst_surface->config.format) {
                case DSPF_NV16:
                     uv_offset = dst_surface->config.size.h * lock.pitch;
@@ -546,9 +826,6 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
 
           jpeg_start_decompress(&cinfo);
 
-          data->image_width = cinfo.output_width;
-          data->image_height = cinfo.output_height;
-
           row_stride = cinfo.output_width * 3;
 
           buffer = (*cinfo.mem->alloc_sarray)((j_common_ptr) &cinfo,
@@ -618,17 +895,51 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz,
                jpeg_finish_decompress(&cinfo);
           }
           jpeg_destroy_decompress(&cinfo);
+
+          dfb_surface_unlock_buffer( dst_surface, &lock );
+     }
+     else if (data->decode_surface) {
+          CardState    state;
+          DFBRectangle src_rect = {
+               .x = 0,
+               .y = 0,
+               .w = data->image_width,
+               .h = data->image_height
+          };
+
+          /* use DFB to convert raw YUV to destination format, and
+             apply any necessary additional clip/stretch */
+          dfb_state_init (&state, data->core);
+          dfb_state_set_source (&state, data->decode_surface);
+          dfb_state_set_destination (&state, dst_surface);
+          dfb_state_set_clip (&state, &clip);
+
+          JPEG_stretchblit (&state, &src_rect, &rect);
+
+          /* remove the state */
+          dfb_state_set_source (&state, NULL);
+          dfb_state_set_destination (&state, NULL);
+          dfb_state_destroy (&state);
+
+          if (data->render_callback) {
+               DFBRectangle r = { 0, 0, data->image_width, data->image_height };
+               data->render_callback (&r, data->render_callback_context);
+          }
      }
      else {
+          ret = dfb_surface_lock_buffer( dst_surface, CSBR_BACK, CSAID_CPU, CSAF_WRITE, &lock );
+          if (ret)
+            return ret;
+
           dfb_scale_linear_32( data->image, data->image_width, data->image_height,
                                lock.addr, lock.pitch, &rect, dst_surface, &clip );
           if (data->render_callback) {
                DFBRectangle r = { 0, 0, data->image_width, data->image_height };
                data->render_callback( &r, data->render_callback_context );
           }
-     }
 
-     dfb_surface_unlock_buffer( dst_surface, &lock );
+          dfb_surface_unlock_buffer( dst_surface, &lock );
+     }
 
      if (cb_result != DIRCR_OK)
          return DFB_INTERRUPTED;
@@ -676,4 +987,3 @@ IDirectFBImageProvider_JPEG_GetImageDescription( IDirectFBImageProvider *thiz,
 
      return DFB_OK;
 }
-
-- 
1.6.3.3

_______________________________________________
directfb-dev mailing list
directfb-dev@directfb.org
http://mail.directfb.org/cgi-bin/mailman/listinfo/directfb-dev

Reply via email to