综合运用httpClient和Swt Browser实现网页抓取,修改和显示
来源:互联网 发布:linux工程师 编辑:程序博客网 时间:2024/05/21 03:58
httpClient是apache的开源框架,封装了http协议,实现了对http访问的强大管理。
Swt Browser是java下一个对javascript支持比较好的模拟浏览器,可用它在applet中显示网页。
以下为源代码
MyBrowser.java: 用cavas,display, shell, browser 在applet中显示页面。
import java.awt.BorderLayout;import java.awt.Canvas; import javax.swing.JButton;import javax.swing.JFrame;import javax.swing.JPanel;import javax.swing.JTextField; import org.eclipse.swt.SWT;import org.eclipse.swt.awt.SWT_AWT;import org.eclipse.swt.browser.Browser;import org.eclipse.swt.layout.FillLayout;import org.eclipse.swt.widgets.Display;import org.eclipse.swt.widgets.Shell; public class MyBrowser extends JFrame { public static final int BOARD_WIDTH = 600; public static final int BOARD_HEIGHT = 500; public static final int LOCATION_X = 100; public static final int LOCATION_Y = 50; JTextField txtField = new JTextField(30); JButton startButton = new JButton("Search"); JButton exitButton = new JButton("Exit"); public Shell shell; public Browser browser; public Display display; public Canvas canvas; public MyFrame frame; public void init(){ System.setProperty("sun.awt.xembedserver", "true"); display = Display.getDefault(); canvas = new Canvas(); frame = new MyFrame("BrowserListener"); frame.init(this); frame.add(canvas,BorderLayout.CENTER); frame.pack(); shell = SWT_AWT.new_Shell(display, canvas); shell.setLayout(new FillLayout(SWT.DOWN)); browser = new Browser(shell, SWT.EMBEDDED); //browser.setUrl("www.google.com"); String html = "<html><head>"+ "<base href=/"http://www.eclipse.org/swt//" >"+ "<title>HTML Test</title></head>"+ "<body><a href=/"faq.php/">local link</a></body></html>"; browser.setText(html); browser.setVisible(true); shell.open(); frame.setSize(800, 600); frame.setVisible(true); while (!shell.isDisposed()) { if (!display.readAndDispatch()) display.sleep(); } display.dispose(); } public void run(final String script){ String html = "<html><head>"+ "<base href=/"http://www.eclipse.org/swt//" >"+ "<title>HTML Test</title></head>"+ "<body><a href=/"faq.php/">local link</a></body></html>"; this.display.asyncExec(new Runnable(){ public void run(){ //browser.setUrl("www.google.com"); browser.setText(script); } }); } public static void main(String[] args) { MyBrowser myBrowser = new MyBrowser(); myBrowser.init(); } }
MyFrame.java : Frame 类
import java.awt.BorderLayout;import java.awt.Frame;import java.awt.event.ActionEvent;import java.awt.event.ActionListener;import java.io.IOException; import javax.swing.JButton;import javax.swing.JFrame;import javax.swing.JPanel;import javax.swing.JTextField; import org.apache.http.client.ClientProtocolException; public class MyFrame extends JFrame implements ActionListener{ JTextField txtField = new JTextField(30); JButton startButton = new JButton("Search"); //JButton exitButton = new JButton("Exit"); SearchEngine searchEngine; public MyBrowser myBrowser; public MyFrame(String title){ super(title); } public MyFrame(){ super(); } public void init(MyBrowser myBrowser){ this.myBrowser = myBrowser; searchEngine = new SearchEngine(); JPanel northPanel = new JPanel(); northPanel.add(txtField); northPanel.add(startButton); //northPanel.add(exitButton); this.add(northPanel,BorderLayout.NORTH); startButton.addActionListener(this); //exitButton.addActionListener(this); setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); } public void actionPerformed(ActionEvent e){ if(e.getSource()==startButton){ String text = txtField.getText(); String responseBody; try { responseBody = searchEngine.search(text); myBrowser.run(responseBody); } catch (ClientProtocolException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } }}
SearchEngine.java: 搜索类,实现自动搜索
import java.io.IOException;import java.io.InputStream;import java.net.URLEncoder; import org.apache.http.client.ClientProtocolException;import org.apache.http.client.ResponseHandler;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.BasicResponseHandler;import org.apache.http.impl.client.DefaultHttpClient; public class SearchEngine { public static String UrlString = "http://hk.rd.yahoo.com/homeb/search/t1/*-http://hk.search.yahoo.com/search?"; public static int Port = 80; public String search(String text) throws ClientProtocolException, IOException{ DefaultHttpClient httpClient = new DefaultHttpClient(); String queryString = ""; text = URLEncoder.encode(text, "UTF-8"); queryString = queryString + "p=" + text + "&fr=FP-tab-web-t&ei=UTF-8&meta=rst%3Dhk"; String url = UrlString + queryString; HttpGet req = new HttpGet(url); ResponseHandler<String> responseHandler = new BasicResponseHandler(); String responseBody = httpClient.execute(req, responseHandler); //System.out.println(responseBody); /* StringBuilder buffer = new StringBuilder(responseBody); int index1 = 0; int index2 = 0; while((index1 = buffer.indexOf("<script>"))>0){ index2 = buffer.indexOf("</script>"); if(index2>0){ index2 = index2 + 9; buffer.delete(index1, index2); } } responseBody = buffer.toString(); System.out.println(responseBody); */ return responseBody; } }
- 综合运用httpClient和Swt Browser实现网页抓取,修改和显示
- HttpClient+jsoup实现网页数据抓取和处理
- 利用URL和httpclient抓取网页内容
- java学习-GET方式抓取网页(UrlConnection和HttpClient) 参考
- httpclient实现代理登录和信息抓取--2011初
- 利用HttpClient和Jsoup实现从网站中抓取数据
- Android中网页数据的抓取和修改
- 网页爬虫,HttpClient+Jericho HTML Parser 实现网页的抓取
- httpclient 网页抓取 几个问题
- HTTPCLIENT抓取网页内容
- HttpClient抓取网页
- HttpClient 抓取网页
- httpclient学习抓取网页
- Python豆瓣静态网页抓取,lxml解析和显示(实验)
- 使用 Apache HttpClient 工具模拟百度蜘蛛或浏览器抓取和解压gzip网页
- apache HttpClient 4.3.4自动登录并抓取中国联通网页用户基本信息和账单数据
- HttpClient+Jericho HTML Parser 实现网页的抓取
- HttpClient+Jericho HTML Parser 实现网页的抓取
- C语言中结构体大小计算即存储分配
- 枚举变量扩展
- 算法设计之迭代法
- 世界上最快的浮点数开放算法
- Windows加载器与模块初始化
- 综合运用httpClient和Swt Browser实现网页抓取,修改和显示
- 关于VirtualPC扩容问题的又一种方法
- 内核对象 转载
- C和C++中main函数里面参数的意义
- 在Window XP上搭建Android开发环境
- oracle死锁查询及处理
- 使当前用户具有超级管理员权限
- Struts2中使用总结
- 本人译著《iPhone SDK编程入门经典:使用Objective-C》现已出版