java正则表达式抽取html数据

来源:互联网 发布:航天金税盘开票软件 编辑:程序博客网 时间:2024/06/04 18:27
import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.URL;import java.net.URLConnection;import java.util.regex.Matcher;import java.util.regex.Pattern;public class PersonalData{public static void main(String[] args) throws Exception{      URL url= new URL("http://www.baidu.com/s?wd=000897");      URLConnection urlConnection=   url.openConnection();      BufferedReader br=new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));      String str="";      StringBuffer content=new StringBuffer();      while((str=br.readLine())!=null){       content.append(str);      }      br.close();    String regx1="<b style=\"font-size: 1.4em; text-align:center;color:#.*;\">(.*?)</b>(.*?)<b style=\"font-size: 1.1em; color:#.*;\">(.*?)</b>(.*?)开盘:</td> <td style=\"(.*?)\">(.*?)</td>(.*?)<td style=\"  color:#.*;width:91px;\">(.*?)</td>(.*?)<td style=\"  color:(.*?);width:91px;\">(.*?)</td>";      Pattern p= Pattern.compile(regx1);      String text=content.toString();      Matcher  macher =p.matcher(text);     while(macher.find()){      System.out.println("现价:"+macher.group(1).trim());      System.out.println("幅度:"+macher.group(3).trim());      System.out.println("开盘价:"+macher.group(6).trim());      System.out.println("最高:"+macher.group(8).trim());      System.out.println("最低:"+macher.group(11).trim());     }}}
0 0