java 采集 AJAX页面

来源:互联网 发布:mysql和oracle的分页 编辑:程序博客网 时间:2024/05/16 14:27

 

package com;

/**
 * <p>Title: </p>
 *
 * <p>Description: </p>
 *
 * <p>Copyright: Copyright (c) 2012</p>
 *
 * <p>Company: </p>
 *
 * @author not attributable
 * @version 1.0
 */

 

import java.net.URL;
import java.math.BigDecimal;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.CollectingAlertHandler;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlPasswordInput;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
import com.gargoylesoftware.htmlunit.html.HtmlTextInput;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;

 

 


public class test {

 

    public static void main(String[] argv) {
            getHomeTile();
    }

 

    public static void getHomeTile(){
        try {
            final URL url = new URL("http://test.com");

 

            final WebClient webClient = new WebClient(BrowserVersion.FIREFOX_3_6); // tried also FIREFOX_3
            webClient.setAjaxController(new NicelyResynchronizingAjaxController());

 

            final HtmlPage page = webClient.getPage(url);
            webClient.waitForBackgroundJavaScript(10000);
            System.out.println(page.getElementById("a-table-tr").asXml());

          } catch(Exception ex) {           
        }
    }

}

 

 

其中用到了一个htmlunit模拟浏览器


0 0
原创粉丝点击