Java读取TXT文件方法,和检索链接<a>[url]

来源:互联网 发布:起点网络交易怎么下单 编辑:程序博客网 时间:2024/06/13 02:35
import java.util.*;import java.io.*;import java.util.regex.*;public class AddressDectect {public static void main(String[] args) throws IOException {// TODO Auto-generated method stubStringBuffer buffer = new StringBuffer();try {            String encoding="GBK";            File file=new File("getUrl.txt");            if(file.isFile() && file.exists()){ //判断文件是否存在                InputStreamReader read = new InputStreamReader(                new FileInputStream(file),encoding);//考虑到编码格式                BufferedReader bufferedReader = new BufferedReader(read);                String lineTxt = null;                while((lineTxt = bufferedReader.readLine()) != null){                    buffer.append(lineTxt);                }                read.close();    }else{        System.out.println("找不到指定的文件");    }    } catch (Exception e) {        System.out.println("读取文件内容出错");        e.printStackTrace();    }String reg = "(<(\\s*?)a{1}[^>]*>.*?<(\\s*?)/(\\s*?)a(\\s*?)>)|(\\[(\\s*?)url{1}[^\\]]*\\].*?\\[(\\s*?)/(\\s*?)url(\\s*?)\\])";          Pattern pattern = Pattern.compile(reg);          Matcher matcher = pattern.matcher(buffer);            while(matcher.find()){              String result = matcher.group();              System.out.println(result);          }}}
目前还只是检索<a> 和<url>
0 0