解决一些网站文字不能复制的问题?java应用

来源:互联网 发布:男人的例假 知乎 编辑:程序博客网 时间:2024/05/17 14:15
package we;


import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class Copy {
public static void main(String[] args) throws Exception {
File source = new File("H:\\333.htm");
BufferedReader in = new BufferedReader(new InputStreamReader(
new FileInputStream(source)));


File desc = new File("321.txt");
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(desc), "UTF-8"));


String str = null;
String regex = "<p.*?>(.*?)</p>";
String regex1 = "<p><img";
String regex2 = "<p><em>";
Pattern p = Pattern.compile(regex);
Pattern p1 = Pattern.compile(regex1);
Pattern p2 = Pattern.compile(regex2);
while ((str = in.readLine()) != null) {
Matcher m = p.matcher(str);
Matcher m1 = p1.matcher(str);
Matcher m2 = p2.matcher(str);
while (m.find()) {
if (m1.find())
continue;
else if (m2.find())
continue;
out.write(m.group(1) + "\r\n");
out.flush();
}
}
out.close();
in.close();
}
}
0 0
原创粉丝点击