Learn Web.Crawling of Perl

来源:互联网 发布:梓潼淘宝运营招聘 编辑:程序博客网 时间:2024/06/03 10:58
######Overview of Web.Crawling related modules.#Note that, below codes can not be executed just for overview intention.######!/usr/bin/perl######HTTP::Thin#####use 5.12.1;use HTTP::Request::Common;use HTTP::Thin;say HTTP::Thin->new()->request(GET 'http://example.com')->as_string;######HTTP:Tiny#####use HTTP::Tiny;my $response = HTTP::Tiny->new->get('http://example.com/');die "Failed! \n" unless $response->{success};print "$response->{status} $response->{reason} \n";while (my ($k, $v) = each %{$response->{headers}}) {  for (ref $v eq 'ARRAY' ? @$v : $v) {    print "$k: $_ \n";  }}print $response->{content} if length $response->{content};#new$http = HTTP::Tiny->new{ %attrubutes };#valid attributes include:#-agent#-cookie_jar#-default_headers#-local_address#-keep_alive#-max_redirect#-max_size#-https_proxy#-proxy#-no_proxy#-timeout#-verify_SSL#-SSL_options#get[head][put][post]delete$response = $http->get($url);$response = $http->get($url, \%options);$response = $http->head($url);#post_form$response = $http->post_form($url, $form_data);$response = $http->post_form($url, $form_data, \%options);#request$response = $http->request($method, $url);$response = $http->request($method, $url, \%options);$http->request('GET', 'http://user:pwd hk.mars@aol.com');#or$http->request('GET', 'http://mars%40:pwd hk.mars@aol.com');#www_form_urlencode$params = $http->www_form_urlencode( $data );$response = $http->get("http://example.com/query?$params");#SSL supportSSL_options => {  SSL_ca_file => $file_path,}#proxy support######www::Mechanize##Stateful programmatic web browsing, used for automating interaction with websites.#####use WWW::Mechanize;my $mech = WWW::Mechanize->new();$mech->get( $url );$mech->follow_link( n => 3 );$mech->follow_link( text_regex => qr/download this/i );$mech->follow_link( url => 'http://host.com/index.html' );$mech->submit_form(  form_number => 3,  fields => {    username => 'banana',    passoword => 'lost-and-alone',  });$mech->submit_form(  form_name => 'search',  fields => { query => 'pot of gold', },  button => 'search now');#testing web applicationsuse Test::More;like( $mech->content(), qr/$expected/, "Got expected content" );#page traverse$mech->back();#finer control over page$mech->find_link( n => $number );$mech->form_number( $number );$mech->form_name( $name );$mech->field( $name, $value );$mech->set_fields( $field_values );$mech->set_visible( @criteria );$mech->click( $button );#subclass of LWP::UserAgent, eg:$mech->add_header( $name =>$value );#page-fecting methods#status methods#content-handling methods#link methods#image methods#form methods#field methods#miscellaneous methods#overridden LWP::UserAgent methods#inherited unchanced LWP::UserAgent methods#yeah now, it's easy to implement a spider project for future integration use.

>> More of Perl Web.Crawling

Mars

0 0
原创粉丝点击