基于tesseract的多线程OCR服务器的JAVA实现

来源:互联网 发布:高校hiv检测包 知乎 编辑:程序博客网 时间:2024/06/04 19:39

本文探讨基于tesseract的多线程OCR服务器的JAVA实现,可同时对多个android手机客户端提供图片OCR服务 

project源码下载 http://download.csdn.net/user/yangliuy   

最近接手一个项目,项目的背景是要开发一个CS架构的发票真伪识别系统,客户端为android手机,采集发票图像传到服务器做OCR识别,识别出来的发票号码和发票密码发送到国税局官网发票真伪查询页面,然后将真伪信息返回给手机用户。为了开发一个多线程OCR服务器,我研究了JAVA图像处理及OCR技术。JAVA的强大的图形处理相关库如java.awt.image等为采集图像的裁剪、放缩、二值化、去噪等提供了良好的基础,而OCR主要采用了Goolge tesseract开源OCR引擎,tesseract安装在本地后可以用cmd命令行调用,而JAVA支持cmd命令的调用。此外还用到了JAVA线程池、互斥锁等多线程编程技术及socket等网络编程技术。源码如下

多线程Server端 Server.java

package com.serverMain;import java.net.ServerSocket;import java.util.concurrent.ExecutorService;import java.util.concurrent.Executors;import java.io.IOException;/** * @author yangliuis@pku.edu.cn * */public class Server extends Thread{private int port ;private ServerSocket server;private ExecutorService threadPool;//线程池public Server(int port) {super();this.port = port;}public void startServer ()throws IOException{server = new ServerSocket(port);threadPool = Executors.newCachedThreadPool();System.out.println("欢迎使用Helios系统,服务器启动");this.start();}public void run(){while(true){ try {ServerRun task = new ServerRun(server.accept());threadPool.execute(task);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}/** * @param args * @throws IOException  */public static void main(String[] args) throws IOException {// TODO Auto-generated method stubServer server = new Server(8089);server.startServer();}}

Server端任务ServerRun.java

package com.serverMain;import java.net.Socket;import java.net.URL;import java.net.URLConnection;import java.io.BufferedReader;import java.io.DataInputStream;import java.io.BufferedOutputStream;import java.io.FileOutputStream;import java.io.FileReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import com.imageHandle.OperateImage;import com.imageHandle.SoundBinImage;/** * @author yangliuis@pku.edu.cn * */public class ServerRun extends Thread implements Runnable{private static Integer invoicePicNum = 0;//发票图片序号//private static Integer captchasPicNum = 0;//验证码图片序号private Socket socket;private final String  invoiceDir = "F://Helios//data//invoice_image//";//private final String  captchasDir = "F://Helios//data//captchas_image//";public ServerRun(Socket socket){this.socket = socket;}public void run(){String invoicePicFilename = invoiceDir+"invoice_image_";invoicePicFilename += invoicePicNum+".jpg";try {DataInputStream dis = new DataInputStream(socket.getInputStream());BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(invoicePicFilename));byte buffer[] = new byte[1024];int eof = 0;while((eof = dis.read(buffer, 0, 1024)) != -1) {bos.write(buffer, 0 ,eof);}System.out.println("收到图片"+invoicePicFilename+"开始识别该图片");String invoiceInfo[] = new String[10];//发票信息包括发票代码、发票号码、发票密码String invoiceResult;//识别结果invoiceInfo = doOCRInvoice(invoicePicFilename);invoiceResult = postCheckInvoice(invoiceInfo);System.out.println("发票验证结束,验证结果为:"+invoiceResult);bos.close();dis.close();socket.close();synchronized (invoicePicNum){//invoicePicNum是图片序号,需要加锁,是多个线程操作互斥invoicePicNum++;}} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();}}/** * @param invoiceInfo */private String postCheckInvoice(String[] invoiceInfo) throws IOException{// TODO Auto-generated method stubString fpdm = invoiceInfo[0];String fphm = invoiceInfo[1];//String fphm = "27404666";//实验假发票的情况String fpmm = invoiceInfo[2];String result = "发票为假!";URL url;url = new URL("http://www.bjtax.gov.cn/ptfp/turn.jsp");URLConnection connection = url.openConnection();connection.setDoOutput(true);OutputStreamWriter out = new OutputStreamWriter(connection.getOutputStream(),"8859_1");String post = "fpdm="+fpdm+"&fphm="+fphm+"&fpmm="+fpmm+"&yzms=111111&sfzh=11111111111111111111&ip=127.0.0.1&isFrist=1";out.write(post);out.flush();out.close();String sCurrentLine = "";String sTotalString = "";InputStream l_urlStream = connection.getInputStream();BufferedReader l_reader = new BufferedReader(new InputStreamReader(l_urlStream, "utf-8"));while((sCurrentLine = l_reader.readLine()) != null){sTotalString = sCurrentLine +"\r\n";if(sTotalString.indexOf("正确查询")!=-1)result = "发票为真!";}return result;}/** * @param String invoicePicFilename * @return String[] invoiceInfo * @throws InterruptedException  * @throws IOException  */private String[] doOCRInvoice(String invoicePicFilename) throws InterruptedException, IOException {// TODO Auto-generated method stubString invoiceInfo[] = new String[10];//图像裁剪        OperateImage oPassword  =   new  OperateImage(700,2450,400,170);        try {        oPassword.setSrcpath(invoicePicFilename);          oPassword.setSubpath( invoiceDir+"password"+invoicePicNum+".jpg");        oPassword.cut() ;         OperateImage oNumber  =   new  OperateImage(320,80,800,300);        oNumber.setSrcpath(invoicePicFilename);          oNumber.setSubpath(invoiceDir+"number"+invoicePicNum+".jpg");        oNumber.cut() ;            } catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} //过滤背景色及图像二值化        new SoundBinImage().releaseSound(invoiceDir+"password"+invoicePicNum+".jpg",invoiceDir+"binpassword"+invoicePicNum+".png",60);        new SoundBinImage().releaseSound(invoiceDir+"number"+invoicePicNum+".jpg",invoiceDir+"binnumber"+invoicePicNum+".png",130);//png识别准确度更高        //OCR识别        String invoiceBinNumberFileName = invoiceDir+"binnumbertxt"+invoicePicNum;        String invoiceBinPasswordFileName = invoiceDir+"binpasswordtxt"+invoicePicNum;        Runtime run = Runtime.getRuntime();               Process pr1 = run.exec("cmd.exe /c tesseract "+invoiceDir+"binnumber"+invoicePicNum+".png"+" "+invoiceBinNumberFileName+" -l eng");        Process pr2 = run.exec("cmd.exe /c tesseract "+invoiceDir+"binpassword"+invoicePicNum+".png"+" "+invoiceBinPasswordFileName+" -l eng");        pr1.waitFor();//让调用线程阻塞,直到exec调用OCR完毕,否则会报错找不到txt文件        pr2.waitFor();        String line;        int i = 0;        //注意这里生成txt是需要时间的,所有进程需要等待直到返回再继续执行,否则就会找不到文件        FileReader frNum = new FileReader(invoiceBinNumberFileName+".txt");        FileReader frPass = new FileReader(invoiceBinPasswordFileName+".txt");        BufferedReader brNum = new BufferedReader(frNum);        while ((line = brNum.readLine()) != null)        {        invoiceInfo[i++] = line;        }        BufferedReader brPass = new BufferedReader(frPass);        i--;        while ((line = brPass.readLine()) != null)        {        invoiceInfo[i++] = line;        }        brNum.close();        brPass.close();         frNum.close();        frPass.close();        System.out.println("OCR识别结果:"+invoiceInfo[0]+" "+invoiceInfo[1]+" "+invoiceInfo[2]);return invoiceInfo;}}

图形处理类
图像过滤背景色及黑白二值化 SoundBinImage.java

package com.imageHandle; import java.awt.image.BufferedImage;import java.awt.image.Raster;import java.awt.image.WritableRaster;import java.io.File;import java.io.IOException;import javax.imageio.ImageIO; public class SoundBinImage {public void releaseSound(String filepath,String destpath, int Threshold){//过滤背景色进行黑白二值化处理try {BufferedImage bi=ImageIO.read(new File(filepath));int width=bi.getWidth();int height=bi.getHeight(); BufferedImage bi2=new BufferedImage(width,height,BufferedImage.TYPE_INT_ARGB);Raster raster=bi.getRaster();WritableRaster wr=bi2.getRaster();for(int i=0;i<width;i++){for(int j=0;j<height;j++){int[] a=new int[4];raster.getPixel(i, j, a);//System.out.println("("+a[0]+", "+a[1]+", "+a[2]+", "+a[3]+")");if((a[0]+a[1]+a[2])/3>Threshold){a[0]=255;a[1]=255;a[2]=255;a[3]=255;wr.setPixel(i, j, a);}else{a[0]=0;a[1]=0;a[2]=0;a[3]=255;wr.setPixel(i, j, a); }}}ImageIO.write(bi2, "PNG", new File(destpath));} catch (IOException e) {e.printStackTrace();}}public static void main(String[] args) {new SoundBinImage().releaseSound("C:\\deletesound\\password1.jpg","C:\\deletesound\\result.png", 60);}}

图片裁剪 OperateImage.java

package com.imageHandle;import java.awt.Rectangle;import java.awt.image.BufferedImage;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.util.Iterator;import javax.imageio.ImageIO;import javax.imageio.ImageReadParam;import javax.imageio.ImageReader;import javax.imageio.stream.ImageInputStream;public class OperateImage {           //===源图片路径名称如:c:/1.jpg     private String srcpath ;             //===剪切图片存放路径名称.如:c:/2.jpg    private String subpath ;        //===剪切点x坐标    private int x ;        private int y ;              //===剪切点宽度    private int width ;         private int height ;        public OperateImage(){                }      public OperateImage(int x,int y,int width,int height){         this.x = x ;         this.y = y ;         this.width = width ;            this.height = height ;    }        /**      * 对图片裁剪,并把裁剪完蛋新图片保存 。     */    public void cut() throws IOException{                  FileInputStream is = null ;        ImageInputStream iis =null ;             try{               //读取图片文件            is = new FileInputStream(srcpath);                         /*             * 返回包含所有当前已注册 ImageReader 的 Iterator,这些 ImageReader              * 声称能够解码指定格式。 参数:formatName - 包含非正式格式名称 .             *(例如 "jpeg" 或 "tiff")等 。              */            Iterator<ImageReader> it = ImageIO.getImageReadersByFormatName("jpg");              ImageReader reader = it.next();             //获取图片流             iis = ImageIO.createImageInputStream(is);                           /*              * <p>iis:读取源.true:只向前搜索 </p>.将它标记为 ‘只向前搜索’。             * 此设置意味着包含在输入源中的图像将只按顺序读取,可能允许 reader             *  避免缓存包含与以前已经读取的图像关联的数据的那些输入部分。             */            reader.setInput(iis,true);                        /*              * <p>描述如何对流进行解码的类<p>.用于指定如何在输入时从 Java Image I/O              * 框架的上下文中的流转换一幅图像或一组图像。用于特定图像格式的插件             * 将从其 ImageReader 实现的 getDefaultReadParam 方法中返回              * ImageReadParam 的实例。               */            ImageReadParam param = reader.getDefaultReadParam();                          /*             * 图片裁剪区域。Rectangle 指定了坐标空间中的一个区域,通过 Rectangle 对象             * 的左上顶点的坐标(x,y)、宽度和高度可以定义这个区域。              */             Rectangle rect = new Rectangle(x, y, width, height);                                       //提供一个 BufferedImage,将其用作解码像素数据的目标。             param.setSourceRegion(rect);             /*             * 使用所提供的 ImageReadParam 读取通过索引 imageIndex 指定的对象,并将             * 它作为一个完整的 BufferedImage 返回。             */            BufferedImage bi = reader.read(0,param);                                  //保存新图片             ImageIO.write(bi, "jpg", new File(subpath));             }                finally{            if(is!=null)               is.close() ;                   if(iis!=null)               iis.close();          }                           }    public int getHeight() {        return height;    }    public void setHeight(int height) {        this.height = height;    }    public String getSrcpath() {        return srcpath;    }    public void setSrcpath(String srcpath) {        this.srcpath = srcpath;    }    public String getSubpath() {        return subpath;    }    public void setSubpath(String subpath) {        this.subpath = subpath;    }    public int getWidth() {        return width;    }    public void setWidth(int width) {        this.width = width;    }    public int getX() {        return x;    }    public void setX(int x) {        this.x = x;    }    public int getY() {        return y;    }    public void setY(int y) {        this.y = y;    }     public static void main(String[] args)throws Exception{     String name  =   "C:\\caijian\\bb.jpg" ;         OperateImage oPassword  =   new  OperateImage(700,2450,400,170);         oPassword.setSrcpath(name);           oPassword.setSubpath( "C:\\caijian\\bbpassword.jpg" );         oPassword.cut() ;           OperateImage oNumber  =   new  OperateImage(320,80,800,300);         oNumber.setSrcpath(name);           oNumber.setSubpath( "C:\\caijian\\bbnumber.jpg" );         oNumber.cut() ;         }}

测试客户端Client.java 大家测试的话注意修改服务器地址为本机地址,待识别图片为F://Helios//android//invoice_test.jpg

package com.serverMain;import java.io.BufferedInputStream;import java.io.DataOutputStream;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.net.Socket;/** * @author yangliuis@pku.edu.cn * */public class Client {private Socket socket;public Client(){}public void SendImage() throws IOException{String imageFileName = "F://Helios//android//invoice_test.jpg";socket = new Socket("192.168.1.102", 8089);try {BufferedInputStream bis =new BufferedInputStream(new FileInputStream(imageFileName));DataOutputStream dos = new DataOutputStream(socket.getOutputStream());byte buffer[] = new byte[1024];//一次读1K,循环读写图片字节流int eof = 0;while((eof = bis.read(buffer, 0 ,1024)) != -1){dos.write(buffer, 0, eof);}dos.close();bis.close();socket.close();} catch (FileNotFoundException e) {// TODO Auto-generated catch blocke.printStackTrace();}}public static void main(String s[]) throws IOException {Client client = new Client();client.SendImage();}}





原创粉丝点击
热门问题 老师的惩罚 人脸识别 我在镇武司摸鱼那些年 重生之率土为王 我在大康的咸鱼生活 盘龙之生命进化 天生仙种 凡人之先天五行 春回大明朝 姑娘不必设防,我是瞎子 手切了山药很痒怎么办 山药弄的身上痒怎么办 疣迪去除疣体怎么办 尖锐湿庞出血了怎么办 尿道口周围烂了怎么办 尖锐湿庞复发了怎么办 宝宝脸上长湿疹怎么办如何治疗 孕妇得尖锐湿庞怎么办 痘痘留下的小坑怎么办 花洒固定座坏了怎么办 脚上起水泡烂了怎么办 月经期吃了芒果怎么办 月经量少又黑怎么办 来月经黑色的血怎么办 月经来的是黑色怎么办 来月经有血块是怎么办 月经又少又黑怎么办 来月经发黑又少怎么办 月经血发黑量少怎么办 做人流后肚子胀怎么办 怀孕见红了肚子不痛怎么办 月经来是黑色的怎么办 怀孕了长了痔疮怎么办 怀孕了有外痔疮怎么办 孕妇长痔疮很痛怎么办 孕9个月尿路感染怎么办 旁边有人尿不出来怎么办 外阴破皮了应该怎么办 脸上长脂肪粒怎么办怎么能消除 挤黑头留下的坑怎么办 长痘留下的坑怎么办 鼻子上留下黑印怎么办 狗狗眼里长息肉怎么办 狗狗眼角长息肉怎么办 脸上长了好多脂肪粒怎么办 脸上毛孔粗大有黑头怎么办 脸颊毛孔粗有黑头怎么办 鼻子上有黑头怎么办小窍门 脸上很多粉刺和油脂粒怎么办 毛孔里都是角栓怎么办 脸上长了很多脂肪粒怎么办