Dňa 14.03.2012 19:49, Curtis wrote / napísal(a):
> I am using the vs 3 .net wrapper.
> When I run the function Recognize it ocrs the image fine and I can get
> the string.
> I need the confidence level of each character, but it is always 0.
> What am I doing wrong?
>
>
>
>         Dim image As New Bitmap("C:\MyImage.tif")
>         Dim ocr As New TesseractProcessor
>
>         ocr.Init(Nothing, "eng", False)
>         Console.WriteLine(ocr.Recognize(image))
>
>
>         ocr.InitForAnalysePage()
>         ocr.SetVariable("tessedit_thresholding_method", "1")
>         ocr.SetVariable("save_best_choices", "T")
>
>
>         Dim doc As DocumentLayout = ocr.AnalyseLayout(image)
>         For Each blk As OCR.TesseractWrapper.Block In doc.Blocks
>             Console.WriteLine("Block Confidence: " & blk.Confidence)
>
>
>             For Each para As Paragraph In blk.Paragraphs
>                 Console.WriteLine("para Confidence: " &
> para.Confidence)
>
>                 For Each ln As TextLine In para.Lines
>                     Console.WriteLine("ln Confidence: " &
> ln.Confidence)
>
>                     For Each wrd As Word In ln.Words
>                         Console.WriteLine("wrd Confidence: " &
> wrd.Confidence)
>                         Console.WriteLine("wrd Text: " & wrd.Text)
>
>                         For Each ch As Character In wrd.CharList
>                             Console.WriteLine("V:" & ch.Value)
>                             Console.WriteLine("C:" & ch.Confidence)
>                         Next
>
>                     Next
>
>                 Next
>             Next
>         Next
>
Hi,

I am not familiar with .net so I can not help you directly.

It looks like that .net wrapper was not updated for quite a long time
(revision 590 without 3.01 code)...
Anyway if somebody interesting in char confidence he can try to use (in
c++) GetComponentImages&tesseract::RIL_SYMBOL +
PageSegMode&tesseract::PSM_SINGLE_CHAR. Simple test file attached.
Tested in 3.02 (in svn) code.

Zdenko

-- 
You received this message because you are subscribed to the Google
Groups "tesseract-ocr" group.
To post to this group, send email to tesseract-ocr@googlegroups.com
To unsubscribe from this group, send email to
tesseract-ocr+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/tesseract-ocr?hl=en
/*
  compile:
  $ g++ test_confidence.cpp -I/usr/local/include/tesseract/ -I/usr/include/leptonica/ \
    -ltesseract -llept -o test_confidence
  run:
  $ ./test_confidence
*/

#include <baseapi.h>
#include <allheaders.h>

int main() {
    Pix *image;
    BOX *box;
    l_int32 i, nwords;
    char* outText;

    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
    if (api->Init(NULL, "eng")) {
        fprintf(stderr, "Could not initialize tesseract.\n");
        exit(1);
    }

    image = pixRead("/usr/src/tesseract-3.02/phototest.tif");
    api->SetImage(image);

    // split image to symbols
    Boxa* boxes =  api->GetComponentImages(tesseract::RIL_SYMBOL, true,
                                           NULL, NULL);
    api->SetPageSegMode(tesseract::PSM_SINGLE_CHAR);

    nwords = boxaGetCount(boxes);
    printf("Boxa count: %d\n", nwords);
    for (i = 0; i < nwords; i++) {
        box = boxaGetBox(boxes, i, L_CLONE);
        api->SetRectangle(box->x, box->y, box->w, box->h);
        outText = api->GetUTF8Text();
        // remove "\n" from outText
        outText[strcspn(outText, "\n")] = '\0';
        int conf = api->MeanTextConf();
        printf("Box[%d]: x=%d, y=%d, string='%s', confidence: %d\n",
               i, box->x, box->y, outText, conf);
    }

    api->Clear();
    api->End();
    delete [] outText;
    pixDestroy(&image);
    return 0;
}

Reply via email to