diff options
author | franck cuny <franck@lumberjaph.net> | 2009-06-25 14:49:11 +0200 |
---|---|---|
committer | franck cuny <franck@lumberjaph.net> | 2009-06-25 14:49:11 +0200 |
commit | 5dd71a777832ccaddd3d37fb1c8abfa4be004b55 (patch) | |
tree | 4685a8ae9bf00bf3c77b1737cea1f69732232e49 | |
parent | basic tests (diff) | |
download | moosex-useragent-5dd71a777832ccaddd3d37fb1c8abfa4be004b55.tar.gz |
POD
-rw-r--r-- | lib/MooseX/UserAgent.pm | 56 | ||||
-rw-r--r-- | lib/MooseX/UserAgent/Async.pm | 2 | ||||
-rw-r--r-- | lib/MooseX/UserAgent/Cache.pm | 2 | ||||
-rw-r--r-- | lib/MooseX/UserAgent/Config.pm | 64 | ||||
-rw-r--r-- | lib/MooseX/UserAgent/Content.pm | 2 |
5 files changed, 105 insertions, 21 deletions
diff --git a/lib/MooseX/UserAgent.pm b/lib/MooseX/UserAgent.pm index edcd5dd..d43312a 100644 --- a/lib/MooseX/UserAgent.pm +++ b/lib/MooseX/UserAgent.pm @@ -2,27 +2,30 @@ package MooseX::UserAgent; our $VERSION = '0.2.0'; -use Moose::Role; -with qw/MooseX::UserAgent::Config MooseX::UserAgent::Content - MooseX::UserAgent::Cache/; - use URI; use HTTP::Request; use HTTP::Response; use LWP::UserAgent; +use Moose::Role; +with qw/ + MooseX::UserAgent::Config + MooseX::UserAgent::Content + MooseX::UserAgent::Cache + /; + sub fetch { my ( $self, $url ) = @_; - my $req = HTTP::Request->new( GET => URI->new( $url ) ); + my $req = HTTP::Request->new( GET => URI->new($url) ); $req->header( 'Accept-Encoding', 'gzip' ); my $last_modified = $self->get_ua_cache($url); $req->header( 'If-Modified-Since' => $last_modified ) if $last_modified; - my $res = $self->agent->request( $req ); - $self->store_ua_cache($url, $res); + my $res = $self->agent->request($req); + $self->store_ua_cache( $url, $res ); $res; } @@ -69,18 +72,40 @@ This is a role which provides a useragent to a Moose Class. The role will do the caching for you if you need it, using Cache::*Cache modules. By default it uses Cache::FileCache, but you can use any Cache -modules you want. +modules you want: + + my $cache = new Cache::MemoryCache( + { + 'namespace' => 'mymemorycacheforbot', + 'default_expires_in' => 600 + } + ); + + my $class = $MyClassUsingUA->new( + useragent_conf => { + cache => { + use_cache => 1, + namespace => 'testua', + } + }, + ua_cache => $cache, + ); =head2 METHODS +=head3 useragent_conf + +This is an attribut you need to add to your Class. It's a HashRef that +contains all the required configuration for the useragent. + =over 4 =item B<agent> -The default useragent is a LWPx::ParanoidAgent object. In the -configuration, the name, mail of the useragent have to be defined. The -default size of a page manipulated can't excess 3 000 000 octets and the -timeout is set to 30 seconds. +The default useragent is a LWP::UserAgent object. In the configuration, +the name and mail of the useragent have to be defined. The default size of +a page manipulated can't excess 3 000 000 octets and the timeout is set to +30 seconds. =item B<fetch> @@ -88,10 +113,7 @@ This method will fetch a given URL. This method handle only the http protocol. If there is a cache configuration, the url will be checked in the cache, -and if there is a match, the content will be returned. - -In the case of scraping search engines, a delay may be given, so we will -not hammer the server. +and if there is a match, a 304 HTTP code will be returned. =item B<get_content> @@ -103,7 +125,7 @@ This method will return a content in utf8. =head1 AUTHOR -franck cuny C<< <franck@lumberjaph.net> >> +franck cuny C<< <franck.cuny@rtgi.fr> >> =head1 LICENCE AND COPYRIGHT diff --git a/lib/MooseX/UserAgent/Async.pm b/lib/MooseX/UserAgent/Async.pm index 3c9a09d..186a183 100644 --- a/lib/MooseX/UserAgent/Async.pm +++ b/lib/MooseX/UserAgent/Async.pm @@ -63,7 +63,7 @@ RTGI::Role::UserAgent::Async - Fetch an url using AnyEvent::HTTP =head1 AUTHOR -franck cuny C<< <franck@lumberjaph.net> >> +franck cuny C<< <franck.cuny@rtgi.fr> >> =head1 LICENCE AND COPYRIGHT diff --git a/lib/MooseX/UserAgent/Cache.pm b/lib/MooseX/UserAgent/Cache.pm index de9b24d..fa235d7 100644 --- a/lib/MooseX/UserAgent/Cache.pm +++ b/lib/MooseX/UserAgent/Cache.pm @@ -56,7 +56,7 @@ RTGI::Role::UserAgent::Cache =head1 AUTHOR -franck cuny C<< <franck@lumberjaph.net> >> +franck cuny C<< <franck.cuny@rtgi.fr> >> =head1 LICENCE AND COPYRIGHT diff --git a/lib/MooseX/UserAgent/Config.pm b/lib/MooseX/UserAgent/Config.pm index 40b0720..d5d6730 100644 --- a/lib/MooseX/UserAgent/Config.pm +++ b/lib/MooseX/UserAgent/Config.pm @@ -10,6 +10,8 @@ has 'agent' => ( my $self = shift; my $ua = LWP::UserAgent->new; + if (!$self->can('useragent_conf')) { + } my $conf = $self->useragent_conf; $ua->agent( $conf->{name} ) if $conf->{name}; $ua->from( $conf->{mail} ) if $conf->{mail}; @@ -27,13 +29,73 @@ __END__ RTGI::Role::UserAgent::Config +=head1 SYNOPSIS + + has useragent_conf => ( + isa => 'HashRef', + default => sub { + { + name => 'myownbot', + mail => 'mail\@bot.com', + timeout => 60, + max_size => 50000, + cache => { + use_cache => 1, + namespace => 'mybotua', + root => '/tmp', + } + }; + } + ); + =head1 DESCRIPTION +=over 4 + +=item B<name> + +UserAgent string used by the HTTP client. Default is to use the LWP or +AnyEvent::HTTP string. + +=item B<mail> + +Mail string used by the HTTP client (only for LWP). Default is to use the +LWP string. + +=item B<max_size> + +Max size that will be fetched by the useragent, in octets (only for LWP). +Default is set to 3 000 000. + +=item B<timeout> + +Time out. Default is set to 30. + +=item B<cache> + +=over 2 + +=item B<use_cache> + +If you need caching, set to 1. Default is no cache. + +=item B<root> + +Where to store the cache. + +=item B<default_expires_in> + +=item B<namespace> + +=back + +=back + =head1 BUGS AND LIMITATIONS =head1 AUTHOR -franck cuny C<< <franck@lumberjaph.net> >> +franck cuny C<< <franck.cuny@rtgi.fr> >> =head1 LICENCE AND COPYRIGHT diff --git a/lib/MooseX/UserAgent/Content.pm b/lib/MooseX/UserAgent/Content.pm index 024531f..1b3f5ee 100644 --- a/lib/MooseX/UserAgent/Content.pm +++ b/lib/MooseX/UserAgent/Content.pm @@ -37,7 +37,7 @@ RTGI::Role::UserAgent::Content =head1 AUTHOR -franck cuny C<< <franck@lumberjaph.net> >> +franck cuny C<< <franck.cuny@rtgi.fr> >> =head1 LICENCE AND COPYRIGHT |