On 2021-06-17 20:28, Lingjiang Fang wrote:
fix bugs of previous patch, ping for review
---
  doc/filters.texi     |  7 +++++++
  libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++-
  2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index da8f7d7726..a955cf46e0 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -15451,6 +15451,13 @@ Set character whitelist.
@item blacklist
  Set character blacklist.
+
+@item x, y
+Set top point position of subregion, not support expression now

This isn't idiomatic. And the docs should state what the option accepts, not what it doesn't.

Change to

    Set position of top-left corner, in pixels.


+
+@item w, h
+Set width and height of subregion
+
  @end table
The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
index 6de474025a..e96dce2d87 100644
--- a/libavfilter/vf_ocr.c
+++ b/libavfilter/vf_ocr.c
@@ -33,6 +33,8 @@ typedef struct OCRContext {
      char *language;
      char *whitelist;
      char *blacklist;
+    int x, y;
+    int w, h;
TessBaseAPI *tess;
  } OCRContext;
@@ -45,6 +47,10 @@ static const AVOption ocr_options[] = {
      { "language",  "set language",            OFFSET(language),  AV_OPT_TYPE_STRING, 
{.str="eng"}, 0, 0, FLAGS },
      { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, 
{.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ 
"}, 0, 0, FLAGS },
      { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, 
{.str=""},    0, 0, FLAGS },
+    { "x",         "top x of sub region",     OFFSET(x),         
AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "y",         "top y of sub region",     OFFSET(y),         
AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "w",         "width of sub region",     OFFSET(w),         
AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "h",         "height of sub region",    OFFSET(h),         
AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
      { NULL }
  };
@@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx)
      return ff_set_common_formats(ctx, fmts_list);
  }
+static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h)
+{
+    // 0 <= x < pic_w
+    if (*x >= pic_w)
+        *x = 0;
+    // 0 <= y < pic_h
+    if (*y >= pic_h)
+        *y = 0;
+
+    if (*w == 0 || *w + *x > pic_w)
+        *w = pic_w - *x;
+    if (*h == 0 || *h + *y > pic_h)
+        *h = pic_h - *y;
+}
+
  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  {
      AVDictionary **metadata = &in->metadata;
@@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
      char *result;
      int *confs;
+ // TODO: support expression
+    int x = s->x;
+    int y = s->y;
+    int w = s->w;
+    int h = s->h;
+    check_fix(&x, &y, &w, &h, in->width, in->height);
+    if ( x != s->x || y != s->y  ||
+        (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) {
+        av_log(s, AV_LOG_WARNING, "config error, subregion changed to x=%d, y=%d, 
w=%d, h=%d\n",
+                                                                    x, y, w, 
h);
+    }
+
      result = TessBaseAPIRect(s->tess, in->data[0], 1,
-                             in->linesize[0], 0, 0, in->width, in->height);
+                             in->linesize[0], x, y, w, h);
      confs = TessBaseAPIAllWordConfidences(s->tess);
      av_dict_set(metadata, "lavfi.ocr.text", result, 0);
      for (int i = 0; confs[i] != -1; i++) {

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to