调用ocr解析图片内容
来源:互联网 发布:xcode写c语言 编辑:程序博客网 时间:2024/06/06 05:51
package net.wocai.tools.spider;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.jdesktop.swingx.util.OS;
public class OCR {
private final String LANG_OPTION = "-l"; //英文字母小写l,并非数字1
private final String EOL = System.getProperty("line.separator");
private String tessPath = "D://orc//Tesseract-OCR";
//private String tessPath = new File("tesseract").getAbsolutePath();
public String recognizeText(File imageFile)throws Exception{
// File tempImage = ImageIoHelper.createImage(imageFile,imageFormat);
File outputFile = new File(imageFile.getParentFile(),"output");
// String s=imageFile.getParentFile().getParentFile().toString();
//System.out.println(imageFile.getParentFile().getParentFile());
StringBuffer strB = new StringBuffer();
List cmd = new ArrayList();
if(OS.isWindowsXP()){
cmd.add(tessPath+"//tesseract");
}else if(OS.isLinux()){
cmd.add("tesseract");
}else{
cmd.add(tessPath+"//tesseract");
}
cmd.add("");
cmd.add(outputFile.getName());
cmd.add(LANG_OPTION);
// cmd.add("chi_sim");
cmd.add("eng");
// cmd.add("E:");
// cmd.add(LANG_OPTION);
// cmd.add("tesseract"+" "+imageFile+" "+"output");
// cmd.add("eng");
ProcessBuilder pb = new ProcessBuilder();
pb.directory(imageFile.getParentFile());
cmd.set(1, imageFile.getName());
pb.command(cmd);
pb.redirectErrorStream(true);
Process process = pb.start();
//tesseract.exe 1.jpg 1 -l chi_sim
int w = process.waitFor();
//删除临时正在工作文件
// tempImage.delete();
if(w==0){
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8"));
String str;
while((str = in.readLine())!=null){
strB.append(str).append(EOL);
}
in.close();
}else{
String msg;
switch(w){
case 1:
msg = "Errors accessing files.There may be spaces in your image's filename.";
break;
case 29:
msg = "Cannot recongnize the image or its selected region.";
break;
case 31:
msg = "Unsupported image format.";
break;
default:
msg = "Errors occurred.";
}
//tempImage.delete();
throw new RuntimeException(msg);
}
new File(outputFile.getAbsolutePath()+".txt").delete();
// System.out.println(strB.toString());
return strB.toString();
}
}
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.jdesktop.swingx.util.OS;
public class OCR {
private final String LANG_OPTION = "-l"; //英文字母小写l,并非数字1
private final String EOL = System.getProperty("line.separator");
private String tessPath = "D://orc//Tesseract-OCR";
//private String tessPath = new File("tesseract").getAbsolutePath();
public String recognizeText(File imageFile)throws Exception{
// File tempImage = ImageIoHelper.createImage(imageFile,imageFormat);
File outputFile = new File(imageFile.getParentFile(),"output");
// String s=imageFile.getParentFile().getParentFile().toString();
//System.out.println(imageFile.getParentFile().getParentFile());
StringBuffer strB = new StringBuffer();
List cmd = new ArrayList();
if(OS.isWindowsXP()){
cmd.add(tessPath+"//tesseract");
}else if(OS.isLinux()){
cmd.add("tesseract");
}else{
cmd.add(tessPath+"//tesseract");
}
cmd.add("");
cmd.add(outputFile.getName());
cmd.add(LANG_OPTION);
// cmd.add("chi_sim");
cmd.add("eng");
// cmd.add("E:");
// cmd.add(LANG_OPTION);
// cmd.add("tesseract"+" "+imageFile+" "+"output");
// cmd.add("eng");
ProcessBuilder pb = new ProcessBuilder();
pb.directory(imageFile.getParentFile());
cmd.set(1, imageFile.getName());
pb.command(cmd);
pb.redirectErrorStream(true);
Process process = pb.start();
//tesseract.exe 1.jpg 1 -l chi_sim
int w = process.waitFor();
//删除临时正在工作文件
// tempImage.delete();
if(w==0){
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8"));
String str;
while((str = in.readLine())!=null){
strB.append(str).append(EOL);
}
in.close();
}else{
String msg;
switch(w){
case 1:
msg = "Errors accessing files.There may be spaces in your image's filename.";
break;
case 29:
msg = "Cannot recongnize the image or its selected region.";
break;
case 31:
msg = "Unsupported image format.";
break;
default:
msg = "Errors occurred.";
}
//tempImage.delete();
throw new RuntimeException(msg);
}
new File(outputFile.getAbsolutePath()+".txt").delete();
// System.out.println(strB.toString());
return strB.toString();
}
}
0 0
- 调用ocr解析图片内容
- Java调用OCR进行图片识别
- C++调用Asprise OCR识别图片
- java 调用tesseract-ocr识别图片
- ocr识别图片文字,调用OneNote
- wxPython:调用OCR模块实现图片识别
- Windows下Java调用OCR进行图片识别
- C++ 调用Asprise OCR识别图片中的文字
- java图片开源框架tesseract调用OCR实现图片文件识别代码下载
- 百度OCR的调用
- ocr图片转文字
- C#OCR图片识别
- tesseract-ocr图片识别
- 屏幕图片OCR识别
- OCR图片识别
- ocr训练图片识别
- 解析验证码OCR
- 内容解析者调用提供者套路
- mysql数据库备份--java代码
- Uva - 489 - Hangman Judge
- windows下cygwin/mingw编译ffmpeg库
- Myeclipse注释模板设置的方法
- C++const成员规则
- 调用ocr解析图片内容
- Spring MVC中使用jackson的MixInAnnotations方法动态过滤JSON字段
- spring事务配置方法(BeanNameAutoProxyCreator)
- UVA11388:GCD LCM(已知GCD和lLCM求a,b)
- za压缩图片的几种方法
- ShowWindow不起作用
- 数的计算
- Jquery使用Ajax构建方法返回值
- 进程间通信---------有名管道(named pipe/FIFO)