summary refs log tree commit diff
path: root/lib/MooseX/UserAgent.pm
blob: 2e1b2f27fe01e57a6e4cd5232ec5c9ef9e3da8e5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package MooseX::UserAgent;

our $VERSION = '0.2.0';

use URI;
use HTTP::Request;
use HTTP::Response;
use LWP::UserAgent;

use Moose::Role;
with qw/
    MooseX::UserAgent::Config
    MooseX::UserAgent::Content
    MooseX::UserAgent::Cache
    /;

sub fetch {
    my ( $self, $url ) = @_;

    my $req = HTTP::Request->new( GET => URI->new($url) );

    $req->header( 'Accept-Encoding', 'gzip' );
    my $last_modified = $self->get_ua_cache($url);
    $req->header( 'If-Modified-Since' => $last_modified )
        if $last_modified;

    my $res = $self->agent->request($req);
    $self->store_ua_cache( $url, $res );
    $res;
}

1;

__END__

=head1 NAME

RTGI::Role::UserAgent - Fetch an url using LWP as the HTTP library

=head1 SYNOPSIS

    package Foo;

    use Moose;
    with qw/MooseX::UserAgent/;

    has useragent_conf => (
        isa     => 'HashRef',
        default => sub {
            { name => 'myownbot', };
        }
    );

    my $res = $self->fetch($url, $cache);
    ...
    my $content = $self->get_content($res);

    --- yaml configuration
    name: 'Mozilla/5.0 (compatible; RTGI; http://rtgi.fr/)'
    mail: 'bot@rtgi.fr'
    max_size: 3000000
    timeout: 30
    cache:
      use_cache: 1
      root: /tmp
      default_expires_in: 5 days
      namespace: my::useragent

=head1 DESCRIPTION

This is a role which provides a useragent to a Moose Class. 

The role will do the caching for you if you need it, using Cache::*Cache
modules. By default it uses Cache::FileCache, but you can use any Cache
modules you want:

    my $cache = new Cache::MemoryCache(
        {
            'namespace'          => 'mymemorycacheforbot',
            'default_expires_in' => 600
        }
    );

    my $class = $MyClassUsingUA->new(
        useragent_conf => {
            cache => {
                use_cache => 1,
                namespace => 'testua',
            }
        },
        ua_cache => $cache,
    );

=head2 METHODS

=head3 useragent_conf

This is an attribut you need to add to your Class. It's a HashRef that
contains all the required configuration for the useragent.

=over 4

=item B<agent>

The default useragent is a LWP::UserAgent object. In the configuration,
the name and mail of the useragent have to be defined. The default size of
a page manipulated can't excess 3 000 000 octets and the timeout is set to
30 seconds.

=item B<fetch>

This method will fetch a given URL. This method handle only the http
protocol.

If there is a cache configuration, the url will be checked in the cache,
and if there is a match, a 304 HTTP code will be returned.

Return a HTTP::Response object.

=item B<get_content>

This method will return a content in utf8.

=back

=head1 BUGS AND LIMITATIONS

=head1 AUTHOR

franck cuny  C<< <franck.cuny@rtgi.fr> >>

=head1 LICENCE AND COPYRIGHT

Copyright (c) 2009, RTGI
All rights reserved.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.