Using the C++ API, we can extract each character recognized in the image using
``` #include <tesseract/baseapi.h> #include <leptonica/allheaders.h> int main() { char *outText; tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); // Initialize tesseract-ocr with English, without specifying tessdata path if (api->Init(NULL, "eng")) { fprintf(stderr, "Could not initialize tesseract.\n"); exit(1); } Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif"); api->SetImage(image); api->Recognize(0); tesseract::ResultIterator* ri = api->GetIterator(); tesseract::PageIteratorLevel level = tesseract:: PSM_SINGLE_CHAR ; if (ri != 0) { do { const char* word = ri->GetUTF8Text(level); float conf = ri->Confidence(level); int x1, y1, x2, y2; ri->BoundingBox(level, &x1, &y1, &x2, &y2); printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n", word, conf, x1, y1, x2, y2); delete[] word; } while (ri->Next(level)); } // Destroy used object and release memory api->End(); delete api; delete [] outText; pixDestroy(&image); return 0; } ``` This will only report a single recognized character though. However, I would assume that in the backend, there can be multiple possible matches for a position and only the most likely one is reported in the code above. Is there a way to get all possible matches for an image? -- You received this message because you are subscribed to the Google Groups "tesseract-ocr" group. To unsubscribe from this group and stop receiving emails from it, send an email to tesseract-ocr+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/5ca0c728-9e29-4fa3-b459-e1d6eee84963n%40googlegroups.com.