summary refs log tree commit diff
path: root/lib/MooseX/UserAgent.pm
blob: 3ed384b31a19cfcdffbafb34518237974cd245c8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package MooseX::UserAgent;

our $VERSION = '0.2.1';

use Moose::Role;
with qw/
    MooseX::UserAgent::Config
    MooseX::UserAgent::Content
    MooseX::UserAgent::Cache
    MooseX::UserAgent::Generic
    /;

has _LWPLIB => ( isa => 'Str', is => 'ro', default => 'LWP::UserAgent' );

1;

__END__

=head1 NAME

RTGI::Role::UserAgent - Fetch an url using LWP as the HTTP library

=head1 SYNOPSIS

    package Foo;

    use Moose;
    with qw/MooseX::UserAgent/;

    has useragent_conf => (
        isa     => 'HashRef',
        default => sub {
            { name => 'myownbot', };
        }
    );

    my $res = $self->fetch($url, $cache);
    ...
    my $content = $self->get_content($res);

    --- yaml configuration
    name: 'Mozilla/5.0 (compatible; RTGI; http://rtgi.fr/)'
    mail: 'bot@rtgi.fr'
    max_size: 3000000
    timeout: 30
    cache:
      use_cache: 1
      root: /tmp
      default_expires_in: 5 days
      namespace: my::useragent

=head1 DESCRIPTION

This is a role which provides a useragent to a Moose Class. 

The role will do the caching for you if you need it, using Cache::*Cache
modules. By default it uses Cache::FileCache, but you can use any Cache
modules you want:

    my $cache = new Cache::MemoryCache(
        {
            'namespace'          => 'mymemorycacheforbot',
            'default_expires_in' => 600
        }
    );

    my $class = $MyClassUsingUA->new(
        useragent_conf => {
            cache => {
                use_cache => 1,
                namespace => 'testua',
            }
        },
        ua_cache => $cache,
    );

=head2 METHODS

=head3 useragent_conf

This is an attribut you need to add to your Class. It's a HashRef that
contains all the required configuration for the useragent.

=over 4

=item B<agent>

The default useragent is a LWP::UserAgent object. In the configuration,
the name and mail of the useragent have to be defined. The default size of
a page manipulated can't excess 3 000 000 octets and the timeout is set to
30 seconds.

=item B<fetch>

This method will fetch a given URL. This method handle only the http
protocol.

If there is a cache configuration, the url will be checked in the cache,
and if there is a match, a 304 HTTP code will be returned.

Return a HTTP::Response object.

=item B<get_content>

This method will return a content in utf8.

=back

=head1 BUGS AND LIMITATIONS

=head1 AUTHOR

franck cuny  C<< <franck.cuny@rtgi.fr> >>

=head1 LICENCE AND COPYRIGHT

Copyright (c) 2009, RTGI
All rights reserved.
L<http://rtgi.fr/>

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.