Windows下Java调用OCR进行图片识别
来源:互联网 发布:dr3插件mac安装 编辑:程序博客网 时间:2024/04/28 04:49
使用Java语言,通过Tesseract-OCR对图片进行识别。
1.Tesseract-OCR
下载windows版本并安装。
2.程序如下:
a.ImageIOHelper类
package OCR;import java.awt.image.BufferedImage;import java.io.File;import java.io.IOException;import java.util.Iterator;import java.util.Locale;import javax.imageio.IIOImage;import javax.imageio.ImageIO;import javax.imageio.ImageReader;import javax.imageio.ImageWriteParam;import javax.imageio.ImageWriter;import javax.imageio.metadata.IIOMetadata;import javax.imageio.stream.ImageInputStream;import javax.imageio.stream.ImageOutputStream;import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;public class ImageIOHelper {/** * 图片文件转换为tif格式 * @param imageFile 文件路径 * @param imageFormat 文件扩展名 * @return */public static File createImage(File imageFile, String imageFormat) {File tempFile = null;try {Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(imageFormat);ImageReader reader = readers.next();ImageInputStream iis = ImageIO.createImageInputStream(imageFile);reader.setInput(iis);//Read the stream metadataIIOMetadata streamMetadata = reader.getStreamMetadata();//Set up the writeParamTIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE);tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);//Get tif writer and set output to fileIterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("tiff");ImageWriter writer = writers.next();BufferedImage bi = reader.read(0);IIOImage image = new IIOImage(bi,null,reader.getImageMetadata(0));tempFile = tempImageFile(imageFile);ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);writer.setOutput(ios);writer.write(streamMetadata, image, tiffWriteParam);ios.close();writer.dispose();reader.dispose();} catch (IOException e) {e.printStackTrace();}return tempFile;}private static File tempImageFile(File imageFile) {String path = imageFile.getPath();StringBuffer strB = new StringBuffer(path);strB.insert(path.lastIndexOf('.'),0);return new File(strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif"));}}b.OCR核心类
package OCR;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.List;import org.jdesktop.swingx.util.OS;public class OCR {private final String LANG_OPTION = "-l"; //英文字母小写l,并非数字1private final String EOL = System.getProperty("line.separator");private String tessPath = "C://Program Files//Tesseract-OCR";//private String tessPath = new File("tesseract").getAbsolutePath();public String recognizeText(File imageFile,String imageFormat)throws Exception{File tempImage = ImageIOHelper.createImage(imageFile,imageFormat);File outputFile = new File(imageFile.getParentFile(),"output");StringBuffer strB = new StringBuffer();List<String> cmd = new ArrayList<String>();if(OS.isWindowsXP()){cmd.add(tessPath+"//tesseract");}else if(OS.isLinux()){cmd.add("tesseract");}else{cmd.add(tessPath+"//tesseract");}cmd.add("");cmd.add(outputFile.getName());//cmd.add(LANG_OPTION);//cmd.add("chi_sim");//cmd.add("eng");ProcessBuilder pb = new ProcessBuilder();pb.directory(imageFile.getParentFile());cmd.set(1, tempImage.getName());pb.command(cmd);pb.redirectErrorStream(true);Process process = pb.start();//tesseract.exe 1.jpg 1 -l chi_simint w = process.waitFor();//删除临时正在工作文件tempImage.delete();if(w==0){BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8"));String str;while((str = in.readLine())!=null){strB.append(str).append(EOL);}in.close();}else{String msg;switch(w){case 1:msg = "Errors accessing files.There may be spaces in your image's filename.";break;case 29:msg = "Cannot recongnize the image or its selected region.";break;case 31:msg = "Unsupported image format.";break;default:msg = "Errors occurred.";}tempImage.delete();//throw new RuntimeException(msg);}new File(outputFile.getAbsolutePath()+".txt").delete();return strB.toString();}}
c.main
package OCR;import java.io.File;import java.io.IOException;public class TestOcr {/** * @param args */public static void main(String[] args) {//输入图片地址String path = "d://test//test.bmp"; try { String valCode = new OCR().recognizeText(new File(path), "bmp"); System.out.println(valCode); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) {e.printStackTrace();} }}
0 0
- Windows下Java调用OCR进行图片识别
- Java调用OCR进行图片识别
- java 调用tesseract-ocr识别图片
- C++调用Asprise OCR识别图片
- ocr识别图片文字,调用OneNote
- wxPython:调用OCR模块实现图片识别
- ubuntu 下安装 PyTesser 进行OCR识别
- java图片开源框架tesseract调用OCR实现图片文件识别代码下载
- linux系统如何使用tess4j(java)进行ocr图片文字识别
- OpenCV进行OCR识别
- C#OCR图片识别
- tesseract-ocr图片识别
- 屏幕图片OCR识别
- OCR图片识别
- ocr训练图片识别
- java调用tensorflow模型进行图片分类识别
- C++ 调用Asprise OCR识别图片中的文字
- ubuntu 12.04 下安装 PyTesser 进行OCR识别
- 计算机英语之分布式数据库系统
- windows 系统: 使用命令行启动服务
- android中如何禁止Scrollbar滑动操作?
- Ecilpse 常用快捷键、使用技巧
- 二叉树的遍历
- Windows下Java调用OCR进行图片识别
- 移动web开发经验总结
- 第四章作业
- uva 11752 - The Super Powers(数论+枚举技巧)
- 20140411
- 面试经典(13)--二叉树非递归遍历
- Ubuntu NFS 安装与配置
- 实时开发框架Meteor API解读系列<五>Session
- 下班后遇到ORA-00257archiver error. Connect internal only, until freed错误