综合运用httpClient和Swt Browser实现网页抓取,修改和显示

来源:互联网 发布:linux工程师 编辑:程序博客网 时间:2024/05/21 03:58

httpClient是apache的开源框架,封装了http协议,实现了对http访问的强大管理。

Swt Browser是java下一个对javascript支持比较好的模拟浏览器,可用它在applet中显示网页。

 

以下为源代码

MyBrowser.java: 用cavas,display, shell, browser 在applet中显示页面。

 

import java.awt.BorderLayout;import java.awt.Canvas; import javax.swing.JButton;import javax.swing.JFrame;import javax.swing.JPanel;import javax.swing.JTextField; import org.eclipse.swt.SWT;import org.eclipse.swt.awt.SWT_AWT;import org.eclipse.swt.browser.Browser;import org.eclipse.swt.layout.FillLayout;import org.eclipse.swt.widgets.Display;import org.eclipse.swt.widgets.Shell; public class MyBrowser extends JFrame {  public static final int BOARD_WIDTH = 600; public static final int BOARD_HEIGHT = 500; public static final int LOCATION_X = 100; public static final int LOCATION_Y = 50;  JTextField txtField = new JTextField(30);  JButton startButton = new JButton("Search"); JButton exitButton = new JButton("Exit");  public Shell shell; public Browser browser; public Display display; public Canvas canvas;  public MyFrame frame;  public void init(){  System.setProperty("sun.awt.xembedserver", "true");  display = Display.getDefault();    canvas = new Canvas();     frame = new MyFrame("BrowserListener");     frame.init(this);       frame.add(canvas,BorderLayout.CENTER);       frame.pack();              shell = SWT_AWT.new_Shell(display, canvas);     shell.setLayout(new FillLayout(SWT.DOWN));     browser = new Browser(shell, SWT.EMBEDDED);      //browser.setUrl("www.google.com");    String html = "<html><head>"+     "<base href=/"http://www.eclipse.org/swt//" >"+     "<title>HTML Test</title></head>"+     "<body><a href=/"faq.php/">local link</a></body></html>";      browser.setText(html);     browser.setVisible(true);        shell.open();      frame.setSize(800, 600);         frame.setVisible(true);        while (!shell.isDisposed()) {          if (!display.readAndDispatch())            display.sleep();        }     display.dispose();   }  public void run(final String script){     String html = "<html><head>"+     "<base href=/"http://www.eclipse.org/swt//" >"+     "<title>HTML Test</title></head>"+     "<body><a href=/"faq.php/">local link</a></body></html>";    this.display.asyncExec(new Runnable(){     public void run(){      //browser.setUrl("www.google.com");      browser.setText(script);     }    });    }    public static void main(String[] args) {  MyBrowser myBrowser = new MyBrowser();  myBrowser.init(); } }

 

MyFrame.java : Frame 类

 

import java.awt.BorderLayout;import java.awt.Frame;import java.awt.event.ActionEvent;import java.awt.event.ActionListener;import java.io.IOException; import javax.swing.JButton;import javax.swing.JFrame;import javax.swing.JPanel;import javax.swing.JTextField; import org.apache.http.client.ClientProtocolException; public class MyFrame extends JFrame implements ActionListener{    JTextField txtField = new JTextField(30);  JButton startButton = new JButton("Search"); //JButton exitButton = new JButton("Exit");  SearchEngine searchEngine;  public MyBrowser myBrowser;  public MyFrame(String title){  super(title);     }  public MyFrame(){  super();   }   public void init(MyBrowser myBrowser){    this.myBrowser = myBrowser;     searchEngine = new SearchEngine();    JPanel northPanel = new JPanel();  northPanel.add(txtField);  northPanel.add(startButton);  //northPanel.add(exitButton);  this.add(northPanel,BorderLayout.NORTH);    startButton.addActionListener(this);  //exitButton.addActionListener(this);     setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); }  public void actionPerformed(ActionEvent e){  if(e.getSource()==startButton){   String text = txtField.getText();   String responseBody;   try {    responseBody = searchEngine.search(text);    myBrowser.run(responseBody);   } catch (ClientProtocolException e1) {    // TODO Auto-generated catch block    e1.printStackTrace();   } catch (IOException e1) {    // TODO Auto-generated catch block    e1.printStackTrace();   }        }   }}


 


    

SearchEngine.java: 搜索类,实现自动搜索

 


  

import java.io.IOException;import java.io.InputStream;import java.net.URLEncoder; import org.apache.http.client.ClientProtocolException;import org.apache.http.client.ResponseHandler;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.BasicResponseHandler;import org.apache.http.impl.client.DefaultHttpClient;   public class SearchEngine { public static String UrlString = "http://hk.rd.yahoo.com/homeb/search/t1/*-http://hk.search.yahoo.com/search?"; public static int Port = 80;   public String search(String text) throws ClientProtocolException, IOException{    DefaultHttpClient httpClient = new DefaultHttpClient();   String queryString = "";  text = URLEncoder.encode(text, "UTF-8");   queryString = queryString + "p=" + text + "&fr=FP-tab-web-t&ei=UTF-8&meta=rst%3Dhk";       String url = UrlString + queryString;    HttpGet req = new HttpGet(url);   ResponseHandler<String> responseHandler = new BasicResponseHandler();  String responseBody = httpClient.execute(req, responseHandler);  //System.out.println(responseBody);  /*  StringBuilder buffer = new StringBuilder(responseBody);  int index1 = 0;  int index2 = 0;  while((index1 = buffer.indexOf("<script>"))>0){   index2 = buffer.indexOf("</script>");   if(index2>0){    index2 = index2 + 9;    buffer.delete(index1, index2);       }  }     responseBody = buffer.toString();     System.out.println(responseBody);  */    return responseBody;           }  }



  
  

 

原创粉丝点击