63 lines
1.8 KiB
Perl
Executable File
63 lines
1.8 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
use 5.016;
|
|
use common::sense;
|
|
use utf8::all;
|
|
use Data::Dumper;
|
|
|
|
# Use fast binary libraries
|
|
use EV;
|
|
use Web::Scraper::LibXML;
|
|
use YADA 0.039;
|
|
use Scalar::Util qw( blessed );
|
|
|
|
|
|
my $rootScraper = scraper {
|
|
process q(li > div.listBlock.main > div > h3 > a.PreviewTooltip), q(threads[]) => q(@href);
|
|
};
|
|
|
|
|
|
my $uri = URI->new("http://foo.qa/forum.root");
|
|
my @list = @{($rootScraper->scrape($uri))->{'threads'} // []};
|
|
#Page 2
|
|
# my $uri = URI->new("http://www.se7ensins.com/forums/forums/grand-theft-auto-v-modding.428/page-2");
|
|
# my @list2 = = @{($rootScraper->scrape($uri))->{'threads'} // []};
|
|
# push (@list, @list2);
|
|
|
|
print "$list[0]\n";
|
|
print Data::Dumper->Dump( \@list );
|
|
@list = ("http://foo.qa/release", "http://foo.qa/release");
|
|
|
|
|
|
YADA->new(
|
|
common_opts => {
|
|
# Available opts @ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html
|
|
encoding => '',
|
|
followlocation => 1,
|
|
maxredirs => 5,
|
|
}, http_response => 1, max => 4,
|
|
)->append([@list] => sub {
|
|
my ($self) = @_;
|
|
return if $self->has_error
|
|
or not $self->response->is_success
|
|
or not $self->response->content_is_html;
|
|
|
|
# Declare the scraper once and then reuse it
|
|
state $scraper = scraper {
|
|
process q(ol > .message > .innerContainer > .messageInfo > .messageContent), q(op) => {
|
|
q(links[]) => scraper{ process q(a), q(shit[]) => q(@href);},
|
|
post => q(TEXT)
|
|
};
|
|
};
|
|
|
|
# Employ amazing Perl (en|de)coding powers to handle HTML charsets
|
|
my @doc = $scraper->scrape(
|
|
$self->response->decoded_content,
|
|
$self->final_url,
|
|
);
|
|
|
|
# print Data::Dumper->Dump( \@doc );
|
|
|
|
|
|
#email thread data to us!
|
|
#email stufffs
|
|
})->wait; |