How to use tesseract-ocr

来源:互联网 发布:java常用算法手册pdf 编辑:程序博客网 时间:2024/05/29 17:38

使用动态库的调用方式:

1. Download tesseract-3.02.02-win32-lib-include-dirs  和leptonica-1.68-win32-lib-include-dirs from http://code.google.com/p/tesseract-ocr/downloads/list

2. 创建工程named TesseractSample 将tesseract-3.02.02-win32-lib-include-dirs里的include 和lib文件夹拷贝到TesseractSample下,将leptonica-1.68-win32-lib-include-dirs 内lib文件夹内的lept168d.lib 和liblept168d.dll 拷贝到TesseractSample工程lib文件夹下

3. 配置工程

项目->属性->c/c++->附加包含目录:..\include\tesseract

项目->属性->链接器->附加库目录:..\lib

代码:

#include "strngs.h"#include "baseapi.h"#pragma  comment(lib,"libtesseract302d.lib")#include <iostream>using namespace std;#define FILEPATH"F:\\TesseractSample\\Debug\\eng.bmp"#define DIRPATH"F:\\TesseractSample\\Debug"//#define FILEPATH"newpic.tif"string UTF8ToGBK(const std::string& strUTF8){int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);unsigned short * wszGBK = new unsigned short[len + 1];memset(wszGBK, 0, len * 2 + 2);MultiByteToWideChar(CP_UTF8, 0,LPCSTR(strUTF8.c_str()), -1, LPWSTR(wszGBK), len);len = WideCharToMultiByte(CP_ACP, 0,LPCWSTR(wszGBK), -1, NULL, 0, NULL, NULL);char *szGBK = new char[len + 1];memset(szGBK, 0, len + 1);WideCharToMultiByte(CP_ACP,0, LPCWSTR(wszGBK), -1, szGBK, len, NULL, NULL);//strUTF8 = szGBK;std::string strTemp(szGBK);delete[]szGBK;delete[]wszGBK;return strTemp;}int _tmain(int argc, _TCHAR* argv[]){/*string tmp="";if(getenv("TESSDATA_PREFIX")){tmp = getenv("TESSDATA_PREFIX");}*/tesseract::TessBaseAPI  api;  //DIRPATH 是程序的运行目录,如果将这个参数设置成null,那么需要设置环境变量TESSDATA_PREFIXapi.Init(DIRPATH, "eng", tesseract::OEM_DEFAULT);  //init set lang chinese: chi_sim,English:engSTRING text_out;  if (!api.ProcessPages(FILEPATH, NULL, 0, &text_out))  {  return 0;  }  cout<<text_out.string()<<endl;cout<<UTF8ToGBK(text_out.string()).c_str()<<endl;system("pause");return 0;}



 

原创粉丝点击