使用谷歌开源组件tesseract-OCR识别身份证(windows版本)

来源:互联网 发布:属于新顶级域名的有 编辑:程序博客网 时间:2024/05/29 21:35

1,前期准备(软件安装)

下载tesseract-OCR,我下载的是3.02.02,注意各个版本之间兼容性不太好,需要对应。

2,安装tesseract-ORC,一路下一步即可,我安装在默认路径的(这个路径很重要,后面程序中需要用到)

3,java代码实现识别方法一:通过调用系统命令完成识别,java代码参考如下,首先创建一个java工程在lib目录下导入这两个jar包(记得build path)

以下是我的测试目录介绍

话不多说,上代码,亲测可用(经过整合已经部署到公司项目)

package com.lcm.test;import java.io.File;import java.io.IOException;public class TestOcr {        /**      * @param args      */      public static void main(String[] args) {          //输入图片地址          String path = "E://cxf//develop-workspace-new//testImageRecognition//src//2.jpg";             try {                 String valCode = new OCR().recognizeText(new File(path), "jpg");                 System.out.println(valCode);             } catch (IOException e) {                 e.printStackTrace();             } catch (Exception e) {              e.printStackTrace();          }          }    }  

package com.lcm.test;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.List;import org.jdesktop.swingx.util.OS;public class OCR {      private final String LANG_OPTION = "-l";  //英文字母小写l,并非数字1      private final String EOL = System.getProperty("line.separator");  //    private String tessPath = "C://Program Files//Tesseract-OCR";      private String tessPath = "C://Program Files (x86)//Tesseract-OCR";      //private String tessPath = new File("tesseract").getAbsolutePath();            public String recognizeText(File imageFile,String imageFormat)throws Exception{          File tempImage = ImageIOHelper.createImage(imageFile,imageFormat);          File outputFile = new File(imageFile.getParentFile(),"output");          StringBuffer strB = new StringBuffer();          List<String> cmd = new ArrayList<>();          if(OS.isWindowsXP()){              cmd.add(tessPath+"//tesseract");          }else if(OS.isLinux()){              cmd.add("tesseract");          }else{              cmd.add(tessPath+"//tesseract");          }          cmd.add("");          cmd.add(outputFile.getName());          //cmd.add(LANG_OPTION);         cmd.add("-l");        //        cmd.add("chi_sim");          cmd.add("eng");                    ProcessBuilder pb = new ProcessBuilder();          pb.directory(imageFile.getParentFile());                    cmd.set(1, tempImage.getName());          pb.command(cmd);          pb.redirectErrorStream(true);                    Process process = pb.start();          //tesseract.exe 1.jpg 1 -l chi_sim          int w = process.waitFor();                    //删除临时正在工作文件          tempImage.delete();                    if(w==0){              BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8"));                            String str;              while((str = in.readLine())!=null){                  strB.append(str).append(EOL);              }              in.close();          }else{              String msg;              switch(w){                  case 1:                      msg = "Errors accessing files.There may be spaces in your image's filename.";                      break;                  case 29:                      msg = "Cannot recongnize the image or its selected region.";                      break;                  case 31:                      msg = "Unsupported image format.";                      break;                  default:                      msg = "Errors occurred.";              }              tempImage.delete();              //throw new RuntimeException(msg);          }          new File(outputFile.getAbsolutePath()+".txt").delete();          return strB.toString();      }  }  

package com.lcm.test;import java.awt.image.BufferedImage;import java.io.File;import java.io.IOException;import java.util.Iterator;import java.util.Locale;import javax.imageio.IIOImage;import javax.imageio.ImageIO;import javax.imageio.ImageReader;import javax.imageio.ImageWriteParam;import javax.imageio.ImageWriter;import javax.imageio.metadata.IIOMetadata;import javax.imageio.stream.ImageInputStream;import javax.imageio.stream.ImageOutputStream;import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;public class ImageIOHelper {      /**      * 图片文件转换为tif格式      * @param imageFile 文件路径      * @param imageFormat 文件扩展名      * @return      */      public static File createImage(File imageFile, String imageFormat) {          File tempFile = null;          try {              Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);              ImageReader reader = readers.next();                        ImageInputStream iis = ImageIO.createImageInputStream(imageFile);              reader.setInput(iis);              //Read the stream metadata              IIOMetadata streamMetadata = reader.getStreamMetadata();                            //Set up the writeParam              TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE);              tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);                            //Get tif writer and set output to file              Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("tiff");              ImageWriter writer = writers.next();                            BufferedImage bi = reader.read(0);              IIOImage image = new IIOImage(bi,null,reader.getImageMetadata(0));              tempFile = tempImageFile(imageFile);              ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);              writer.setOutput(ios);              writer.write(streamMetadata, image, tiffWriteParam);              ios.close();                            writer.dispose();              reader.dispose();                        } catch (IOException e) {              e.printStackTrace();          }          return tempFile;      }        private static File tempImageFile(File imageFile) {          String path = imageFile.getPath();          StringBuffer strB = new StringBuffer(path);          strB.insert(path.lastIndexOf('.'),0);          return new File(strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif"));      }    }  

0 0
原创粉丝点击