gtav-src/tools_ng/script/coding/mod/scrape/dev/7s.pl

#!/usr/bin/env perl
use 5.016;
use common::sense;
use utf8::all;
use Data::Dumper;

# Use fast binary libraries
use EV;
use Web::Scraper::LibXML;
use YADA 0.039;
use Scalar::Util qw( blessed );


my $rootScraper = scraper {
    process q(li > div.listBlock.main > div > h3 > a.PreviewTooltip), q(threads[]) => q(@href);
};


my $uri = URI->new("http://foo.qa/forum.root");
my @list = @{($rootScraper->scrape($uri))->{'threads'} // []};
#Page 2
# my $uri = URI->new("http://www.se7ensins.com/forums/forums/grand-theft-auto-v-modding.428/page-2");
# my @list2 = = @{($rootScraper->scrape($uri))->{'threads'} // []};
# push (@list, @list2);

print "$list[0]\n";
print Data::Dumper->Dump( \@list );
@list = ("http://foo.qa/release", "http://foo.qa/release");


YADA->new(
    common_opts => {
        # Available opts @ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html
        encoding        => '',
        followlocation  => 1,
        maxredirs       => 5,
    }, http_response => 1, max => 4,
)->append([@list] => sub {
    my ($self) = @_;
    return  if $self->has_error
        or not $self->response->is_success
        or not $self->response->content_is_html;

    # Declare the scraper once and then reuse it
    state $scraper = scraper {
         process q(ol > .message > .innerContainer > .messageInfo > .messageContent), q(op) => {
            q(links[]) => scraper{ process q(a), q(shit[]) => q(@href);},
            post => q(TEXT)
        };
    };

    # Employ amazing Perl (en|de)coding powers to handle HTML charsets
    my @doc = $scraper->scrape(
        $self->response->decoded_content,
        $self->final_url,
    );

    # print Data::Dumper->Dump( \@doc );


    #email thread data to us!
    #email stufffs
})->wait;