java解析html例子

来源:互联网 发布:腾讯视频for mac官网 编辑:程序博客网 时间:2024/04/30 22:10

import java.io.*;
import java.util.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;

public class Parser extends ParserCallback {    //继承ParserCallback,解析结果驱动这些回调方法
 protected String base;
 protected boolean isImg = false;
 protected boolean isParagraph = false;
 protected static Vector<String> element = new Vector<String>();
 protected static String paragraphText = new String();

 public Parser() {

 }

 public static String getParagraphText() {
  return paragraphText;
 }

 public void handleComment(char[] data, int pos) {
 }

 public void handleEndTag(HTML.Tag t, int pos) {
  if (t == HTML.Tag.P) {
   if (isParagraph) {
    isParagraph = false;
   }
  } else if (t == HTML.Tag.IMG) {
   if (isImg) {
    isImg = false;
   }

  }
 }

 public void handleError(String errorMsg, int pos) {
 }

 public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
  handleStartTag(t, a, pos);
 }

 public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
  if (t == HTML.Tag.P) {
   isParagraph = true;
  } else if ((t == HTML.Tag.IMG)) {
   String src = (String) a.getAttribute(HTML.Attribute.SRC);
   if (src != null) {

    element.addElement(src);
    isImg = true;
   }
  }
 }

 public void handleText(char[] data, int pos) {

  if (isParagraph) {
   String tempParagraphText = new String(data);
   if (paragraphText != null) {
    element.addElement(tempParagraphText);
    ;

   }
  }
 }
 

 private static void startParse(String sHtml) {
  try {
   ParserDelegator ps = new ParserDelegator();//负责每次在调用其 parse 方法时启动一个新的 DocumentParser
   HTMLEditorKit.ParserCallback parser = new Parser();//解析结果驱动这些回调方法。
   ps.parse(new StringReader(sHtml), parser, true);//解析给定的流并通过解析的结果驱动给定的回调。

   //System.out.println(getParagraphText());
   Vector link = element;
   for (int i = 0; i < link.size(); i++) {
    System.out.println("----haha-----");
    System.out.println(link.get(i));
   }

  } catch (Exception e) {
   e.printStackTrace();
  }
 }

 public static void main(String args[]) {

  try {
   String filename = "D://blogbaby.htm";
   BufferedReader brd = new BufferedReader(new FileReader(filename));
   char[] str = new char[50000];
   brd.read(str);
   String sHtml = new String(str);

   startParse(sHtml);
  } catch (Exception e) {
   e.printStackTrace();
  }
 }

}

原创粉丝点击