Windows下Java调用OCR进行图片识别

来源:互联网 发布:dr3插件mac安装 编辑:程序博客网 时间:2024/04/28 04:49

使用Java语言,通过Tesseract-OCR对图片进行识别。

1.Tesseract-OCR

下载windows版本并安装。

2.程序如下:

a.ImageIOHelper类

package OCR;import java.awt.image.BufferedImage;import java.io.File;import java.io.IOException;import java.util.Iterator;import java.util.Locale;import javax.imageio.IIOImage;import javax.imageio.ImageIO;import javax.imageio.ImageReader;import javax.imageio.ImageWriteParam;import javax.imageio.ImageWriter;import javax.imageio.metadata.IIOMetadata;import javax.imageio.stream.ImageInputStream;import javax.imageio.stream.ImageOutputStream;import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;public class ImageIOHelper {/** * 图片文件转换为tif格式 * @param imageFile 文件路径 * @param imageFormat 文件扩展名 * @return */public static File createImage(File imageFile, String imageFormat) {File tempFile = null;try {Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);ImageReader reader = readers.next();ImageInputStream iis = ImageIO.createImageInputStream(imageFile);reader.setInput(iis);//Read the stream metadataIIOMetadata streamMetadata = reader.getStreamMetadata();//Set up the writeParamTIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE);tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);//Get tif writer and set output to fileIterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("tiff");ImageWriter writer = writers.next();BufferedImage bi = reader.read(0);IIOImage image = new IIOImage(bi,null,reader.getImageMetadata(0));tempFile = tempImageFile(imageFile);ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);writer.setOutput(ios);writer.write(streamMetadata, image, tiffWriteParam);ios.close();writer.dispose();reader.dispose();} catch (IOException e) {e.printStackTrace();}return tempFile;}private static File tempImageFile(File imageFile) {String path = imageFile.getPath();StringBuffer strB = new StringBuffer(path);strB.insert(path.lastIndexOf('.'),0);return new File(strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif"));}}
b.OCR核心类


package OCR;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.List;import org.jdesktop.swingx.util.OS;public class OCR {private final String LANG_OPTION = "-l";  //英文字母小写l,并非数字1private final String EOL = System.getProperty("line.separator");private String tessPath = "C://Program Files//Tesseract-OCR";//private String tessPath = new File("tesseract").getAbsolutePath();public String recognizeText(File imageFile,String imageFormat)throws Exception{File tempImage = ImageIOHelper.createImage(imageFile,imageFormat);File outputFile = new File(imageFile.getParentFile(),"output");StringBuffer strB = new StringBuffer();List<String> cmd = new ArrayList<String>();if(OS.isWindowsXP()){cmd.add(tessPath+"//tesseract");}else if(OS.isLinux()){cmd.add("tesseract");}else{cmd.add(tessPath+"//tesseract");}cmd.add("");cmd.add(outputFile.getName());//cmd.add(LANG_OPTION);//cmd.add("chi_sim");//cmd.add("eng");ProcessBuilder pb = new ProcessBuilder();pb.directory(imageFile.getParentFile());cmd.set(1, tempImage.getName());pb.command(cmd);pb.redirectErrorStream(true);Process process = pb.start();//tesseract.exe 1.jpg 1 -l chi_simint w = process.waitFor();//删除临时正在工作文件tempImage.delete();if(w==0){BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8"));String str;while((str = in.readLine())!=null){strB.append(str).append(EOL);}in.close();}else{String msg;switch(w){case 1:msg = "Errors accessing files.There may be spaces in your image's filename.";break;case 29:msg = "Cannot recongnize the image or its selected region.";break;case 31:msg = "Unsupported image format.";break;default:msg = "Errors occurred.";}tempImage.delete();//throw new RuntimeException(msg);}new File(outputFile.getAbsolutePath()+".txt").delete();return strB.toString();}}

c.main

package OCR;import java.io.File;import java.io.IOException;public class TestOcr {/** * @param args */public static void main(String[] args) {//输入图片地址String path = "d://test//test.bmp";           try {               String valCode = new OCR().recognizeText(new File(path), "bmp");               System.out.println(valCode);           } catch (IOException e) {               e.printStackTrace();           } catch (Exception e) {e.printStackTrace();}    }}


0 0