• C++实例 调用Tesseract OCR的API


    1. 前言

    Tesseract OCR支持不同调用方式(详情请看具体实例),同一种调用方式也可以设置不同模式。
    调用方法或模式不同,对OCR识别结果的精度有一定影响。模式设置不同,输出的结果格式也不一致。
    实际项目中,需要根据需求比较各方法的优劣从而选择最合适的。

    2. 模式

    构造体定义
    enum PageIteratorLevel {
      RIL_BLOCK,    // Block of text/image/separator line.
      RIL_PARA,     // Paragraph within a block.
      RIL_TEXTLINE, // Line within a paragraph.
      RIL_WORD,     // Word within a textline.
      RIL_SYMBOL    // Symbol/character within a word.
    };
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8

    RIL_BLOCK:把原稿分割成不同区域,按区域识别文字,OCR结果是每个区域的字符串
    RIL_TEXTLINE:按行识别文字,OCR结果是一行一行的字符串
    RIL_WORD: 按单词识别文字,OCR结果是一个一个的单词
    RIL_SYMBOL:按字符识别文字,OCR结果是一个一个的字符

    3. 调用方式C++ Examples**【转自官网】

    C++API实例:https://tesseract-ocr.github.io/tessdoc/Examples_C++.html
    API实例:https://tesseract-ocr.github.io/tessdoc/#api-examples

    注意
    如果C++的实例代码编译不通过,可能是需要添加以下头文件

    #include 
    
    • 1

    3.1 Basic_example

    #include 
    #include 
    
    int main()
    {
        char *outText;
    
        tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
        // Initialize tesseract-ocr with English, without specifying tessdata path
        if (api->Init(NULL, "eng")) {
            fprintf(stderr, "Could not initialize tesseract.\n");
            exit(1);
        }
    
        // Open input image with leptonica library
        Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
        api->SetImage(image);
        // Get OCR result
        outText = api->GetUTF8Text();
        printf("OCR output:\n%s", outText);
    
        // Destroy used object and release memory
        api->End();
        delete api;
        delete [] outText;
        pixDestroy(&image);
    
        return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29

    3.2 SetRectangle_example

    如果只想识别特定区域的文字,可以用这个方法。需提前设定指定区域的坐标。

    #include 
    #include 
    
    int main()
    {
        char *outText;
    
        tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
        // Initialize tesseract-ocr with English, without specifying tessdata path
        if (api->Init(NULL, "eng")) {
            fprintf(stderr, "Could not initialize tesseract.\n");
            exit(1);
        }
    
        // Open input image with leptonica library
        Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
        api->SetImage(image);
        // Restrict recognition to a sub-rectangle of the image
        // SetRectangle(left, top, width, height)
        api->SetRectangle(30, 86, 590, 100);
        // Get OCR result
        outText = api->GetUTF8Text();
        printf("OCR output:\n%s", outText);
    
        // Destroy used object and release memory
        api->End();
        delete api;
        delete [] outText;
        pixDestroy(&image);
    
        return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32

    3.3 GetComponentImages_example

    以Box的形式返回OCR结果

    #include 
    #include 
    
    int main()
    {
      char *outText;
      tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
      // Initialize tesseract-ocr with English, without specifying tessdata path
      if (api->Init(NULL, "eng")) {
          fprintf(stderr, "Could not initialize tesseract.\n");
          exit(1);
      }
      Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
      api->SetImage(image);
      Boxa* boxes = api->GetComponentImages(tesseract::RIL_TEXTLINE, true, NULL, NULL);
      printf("Found %d textline image components.\n", boxes->n);
      for (int i = 0; i < boxes->n; i++) {
        BOX* box = boxaGetBox(boxes, i, L_CLONE);
        api->SetRectangle(box->x, box->y, box->w, box->h);
        char* ocrResult = api->GetUTF8Text();
        int conf = api->MeanTextConf();
        fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",
                        i, box->x, box->y, box->w, box->h, conf, ocrResult);
        boxDestroy(&box);
      }
      // Destroy used object and release memory
      api->End();
      delete api;
      delete [] outText;
      pixDestroy(&image);
    
      return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33

    3.4 ResultIterator_example

    以迭代器的形式返回OCR结果

    #include 
    #include 
    
    int main()
    {
      char *outText;
      tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
      // Initialize tesseract-ocr with English, without specifying tessdata path
      if (api->Init(NULL, "eng")) {
          fprintf(stderr, "Could not initialize tesseract.\n");
          exit(1);
      }
      Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
      api->SetImage(image);
      api->Recognize(0);
      tesseract::ResultIterator* ri = api->GetIterator();
      tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
      if (ri != 0) {
        do {
          const char* word = ri->GetUTF8Text(level);
          float conf = ri->Confidence(level);
          int x1, y1, x2, y2;
          ri->BoundingBox(level, &x1, &y1, &x2, &y2);
          printf("word: '%s';  \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
                   word, conf, x1, y1, x2, y2);
          delete[] word;
        } while (ri->Next(level));
      }
      // Destroy used object and release memory
      api->End();
      delete api;
      delete [] outText;
      pixDestroy(&image);
      return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35

    3.5 OSD_example

    如果需要判断文字的方向,可以参考这各个方法

    #include 
    #include 
    
    int main()
    {
        const char* inputfile = "/tesseract/testing/devatest-rotated-270.png";
        PIX *image = pixRead(inputfile);
        tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
        api->Init(NULL, "osd");
        api->SetPageSegMode(tesseract::PSM_OSD_ONLY);
        api->SetImage(image);
            
        int orient_deg;
        float orient_conf;
        const char* script_name;
        float script_conf;
        api->DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf);
        printf("************\n Orientation in degrees: %d\n Orientation confidence: %.2f\n"
        " Script: %s\n Script confidence: %.2f\n",
        orient_deg, orient_conf,
        script_name, script_conf);
        
        // Destroy used object and release memory
        api->End();
        delete api;
        pixDestroy(&image);
        
        return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29

    3.6 LSTM_Choices_example

    #include 
    #include 
    int main()
    {
        tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
    // Initialize tesseract-ocr with English, without specifying tessdata path
        if (api->Init(NULL, "eng")) {
            fprintf(stderr, "Could not initialize tesseract.\n");
            exit(1);
        }
    // Open input image with leptonica library
      Pix *image = pixRead("choices.png");
      api->SetImage(image);
    // Set lstm_choice_mode to alternative symbol choices per character, bbox is at word level.
      api->SetVariable("lstm_choice_mode", "2");
      api->Recognize(0);
      tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
      tesseract::ResultIterator* res_it = api->GetIterator();
    // Get confidence level for alternative symbol choices. Code is based on 
    // https://github.com/tesseract-ocr/tesseract/blob/main/src/api/hocrrenderer.cpp#L325-L344
      std::vector<std::vector<std::pair<const char*, float>>>* choiceMap = nullptr;
      if (res_it != 0) {
        do {
          const char* word;
          float conf;
          int x1, y1, x2, y2, tcnt = 1, gcnt = 1, wcnt = 0;
         res_it->BoundingBox(level, &x1, &y1, &x2, &y2);
         choiceMap = res_it->GetBestLSTMSymbolChoices();
          for (auto timestep : *choiceMap) {
            if (timestep.size() > 0) {
              for (auto & j : timestep) {
                conf = int(j.second * 100);
                word =  j.first;
                printf("%d  symbol: '%s';  \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
                            wcnt, word, conf, x1, y1, x2, y2);
               gcnt++;
              }
              tcnt++;
            }
          wcnt++;
          printf("\n");
          }
        } while (res_it->Next(level));
      }
    // Destroy used object and release memory
        api->End();
        delete api;
        pixDestroy(&image);
        return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50

    3.7 OpenCV_example

    /*
    Windows compile example:

    SET TESS_INSTALATION=C:/win64
    SET OPENCV_INSTALATION=C:/opencv/build
    cl OpenCV_example.cc -I %TESS_INSTALATION%/include -I %OPENCV_INSTALATION%/include /link /LIBPATH:%TESS_INSTALATION%/lib /LIBPATH:%OPENCV_INSTALATION%/x64/vc14/lib tesseract51.lib leptonica-1.83.0.lib opencv_world460.lib /machine:x64

    */

    #include
    #include
    #include
    #include

    int main(int argc, char *argv[]) {

    std::string outText, imPath = argv[1];
    cv::Mat im = cv::imread(imPath, cv::IMREAD_COLOR);
    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();

    api->Init(NULL, “eng”, tesseract::OEM_LSTM_ONLY);
    api->SetPageSegMode(tesseract::PSM_AUTO);
    api->SetImage(im.data, im.cols, im.rows, 3, im.step);
    outText = std::string(api->GetUTF8Text());
    std::cout << outText;
    api->End();
    delete api;
    return 0;
    }

    3.8 UserPatterns_example

    #include 
    #include 
    
    int main()
    {
        Pix *image;
        char *outText;
        char *configs[]={"path/to/my.patterns.config"};
        int configs_size = 1;
        tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
        if (api->Init(NULL, "eng", tesseract::OEM_LSTM_ONLY, configs, configs_size, NULL, NULL, false)) {
          fprintf(stderr, "Could not initialize tesseract.\n");
          exit(1);
        }
        image = pixRead("Arial.png");
        api->SetImage(image);
        outText = api->GetUTF8Text();
        printf(outText);
        api->End();
        delete api;
        delete [] outText;
        pixDestroy(&image);
        return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
  • 相关阅读:
    JavaScript 70 JavaScript Ajax 70.4 AJAX - 服务器响应
    Spring Cloud(十二):Spring Cloud Security
    docker-容器相关:运行、启动、停止、进入、导出和导入、删除
    大型化工企业数字化转型建议
    【电压质量】提高隔离电源系统的电压质量(Simulink实现)
    Redis缓存穿透,背八股文 居然没用!!!
    docker部署es+kibana
    代码随想录算法训练营Day 46 || 139.单词拆分、多重背包
    线程的状态
    Jenkins 如何玩转接口自动化测试?
  • 原文地址:https://blog.csdn.net/xiaofeizai1116/article/details/134326438