HTMLUnit爬虫模拟登录Linkedin

来源:互联网 发布:360软件手机助手 编辑:程序博客网 时间:2024/06/05 20:47

public static void loginLinkedin(String URL) throws IOException,SQLException {

        try {

            WebClient webClient =new WebClient();//创建WebClient

            webClient.getOptions().setJavaScriptEnabled(false);

            webClient.getOptions().setCssEnabled(false);

            // 获取页面

            HtmlPage page = webClient.getPage("https://www.linkedin.com/uas/login");//打开linkedin

 

            // 获得name"session_key"html元素

            HtmlElement usernameEle =page.getElementByName("session_key");

            // 获得id"session_password"html元素

            HtmlElement passwordEle = (HtmlElement)page.getElementById

 

            ("session_password-login");

            usernameEle.focus();//设置输入焦点

            usernameEle.type("z_hao1975@hotmail.com");//填写值

 

            passwordEle.focus();//设置输入焦点

            passwordEle.type("XXXXX");//填写值

            // 获得name"submit"的元素

            HtmlElement submitEle =page.getElementByName("signin");

            // 点击登陆

            page = submitEle.click();

            String result = page.asXml();// 获得click()后的html页面(包括标签)

            if (result.contains("Sign Out")){

                System.out.println("登陆成功");

                HtmlPage page2 =webClient.getPage(URL);

                String pageXml =page2.asXml();

                Document doc2 = Jsoup.parse(pageXml);

                Element background_text =doc2.getElementById("background");

                Elements name =doc2.getElementsByClass("full-name");

                if (background_text !=null) {

                    //System.out.println(background_text.toString());

 

                    // iMatch method to check if including sensitive content

                    booleaniMatch =iContentMatch(background_text.toString());

                    if (iMatch) {

                        // String res = downloadFromUrl(URL, getDir());

                        writefile(filePath("linkedin"),name.toString()+"<br>"+background_text.toString(),false);

                        //writefile(filePath("linkedin"),result, false);

                        Sub_ID++;

                    }

 

                    // write into database

                    /*

                     * str_SQL =

                     * " INSERT INTO waverly.tsearch(ID,cliuid_2,unedname,Sub_ID,SearchResult) VALUES (88,"

                     * + Integer.valueOf(cliuid_2) + ","+ "\"" +unedname +

                     * "\"" + "," + Sub_ID+ "," + "\"" +

                     *background_text.toString().replace("\"","\\\"") +"\"" +

                     * ")";

                     *

                     * try {Class.forName("com.mysql.jdbc.Driver");

                     * java.sql.Connectionconn =DriverManager.getConnection(

                     * "jdbc:mysql://localhost:3306/waverly?user=root&password=197544"

                     * ); java.sql.Statementstmt =conn.createStatement();

                     * stmt.executeUpdate(str_SQL); Sub_ID++; }catch

                     * (SQLException e) { e.printStackTrace(); }catch

                     * (ClassNotFoundException e) { //TODO Auto-generated catch

                     * block e.printStackTrace(); }

                     */

 

                    // 创建目录

                    // str_m=str_trim.substring(0,

                    // 4-cliuid_2.length())+cliuid_2;

                    // writefile("c:/1.txt",background_text.toString(), false);

                } else {

                    System.out.println("登陆失败");

                }

            }

        } catch (FailingHttpStatusCodeExceptione) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        } catch (Exceptione) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        }

    }


0 0
原创粉丝点击