summary refs log tree commit diff
path: root/lib/LWPx/ParanoidAgent.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/LWPx/ParanoidAgent.pm')
-rw-r--r--lib/LWPx/ParanoidAgent.pm578
1 files changed, 578 insertions, 0 deletions
diff --git a/lib/LWPx/ParanoidAgent.pm b/lib/LWPx/ParanoidAgent.pm
new file mode 100644
index 0000000..bbd0d77
--- /dev/null
+++ b/lib/LWPx/ParanoidAgent.pm
@@ -0,0 +1,578 @@
+package LWPx::ParanoidAgent;
+require LWP::UserAgent;
+
+use vars qw(@ISA $VERSION);
+@ISA = qw(LWP::UserAgent);
+$VERSION = '1.04';
+
+require HTTP::Request;
+require HTTP::Response;
+
+use HTTP::Status ();
+use strict;
+use Net::DNS;
+
+sub new {
+    my $class = shift;
+    my %opts = @_;
+
+    my $blocked_hosts     = delete $opts{blocked_hosts}     || [];
+    my $whitelisted_hosts = delete $opts{whitelisted_hosts} || [];
+    my $resolver          = delete $opts{resolver};
+    my $paranoid_proxy    = delete $opts{paranoid_proxy};
+    $opts{timeout}      ||= 15;
+
+    my $self = LWP::UserAgent->new( %opts );
+
+    $self->{'blocked_hosts'}     = $blocked_hosts;
+    $self->{'whitelisted_hosts'} = $whitelisted_hosts;
+    $self->{'resolver'}          = $resolver;
+    $self->{'paranoid_proxy'}    = $paranoid_proxy;
+
+    $self = bless $self, $class;
+    return $self;
+}
+
+# returns seconds remaining given a request
+sub _time_remain {
+    my $self = shift;
+    my $req = shift;
+
+    my $now = time();
+    my $start_time = $req->{_time_begin} || $now;
+    return $start_time + $self->{timeout} - $now;
+}
+
+sub _resolve {
+    my ($self, $host, $request, $timeout, $depth) = @_;
+    my $res = $self->resolver;
+    $depth ||= 0;
+
+    die "CNAME recursion depth limit exceeded.\n" if $depth > 10;
+    die "DNS lookup resulted in bad host." if $self->_bad_host($host);
+
+    # return the IP address if it looks like one and wasn't marked bad
+    return ($host) if $host =~ /^\d+\.\d+\.\d+\.\d+$/;
+
+    my $sock = $res->bgsend($host)
+        or die "No sock from bgsend";
+
+    # wait for the socket to become readable, unless this is from our test
+    # mock resolver.
+    unless ($sock && $sock eq "MOCK") {
+        my $rin = '';
+        vec($rin, fileno($sock), 1) = 1;
+        my $nf = select($rin, undef, undef, $self->_time_remain($request));
+        die "DNS lookup timeout" unless $nf;
+    }
+
+    my $packet = $res->bgread($sock)
+        or die "DNS bgread failure";
+    $sock = undef;
+
+    my @addr;
+    my $cname;
+    foreach my $rr ($packet->answer) {
+        if ($rr->type eq "A") {
+            die "Suspicious DNS results from A record\n" if $self->_bad_host($rr->address);
+            # untaints the address:
+            push @addr, join(".", ($rr->address =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/));
+        } elsif ($rr->type eq "CNAME") {
+            # will be checked for validity in the recursion path
+            $cname = $rr->cname;
+        }
+    }
+
+    return @addr if @addr;
+    return () unless $cname;
+    return $self->_resolve($cname, $request, $timeout, $depth + 1);
+}
+
+sub _host_list_match {
+    my $self = shift;
+    my $list_name = shift;
+    my $host = shift;
+
+    foreach my $rule (@{ $self->{$list_name} || [] }) {
+        if (ref $rule eq "CODE") {
+            return 1 if $rule->($host);
+        } elsif (ref $rule) {
+            # assume regexp
+            return 1 if $host =~ /$rule/;
+        } else {
+            return 1 if $host eq $rule;
+        }
+    }
+}
+
+sub _bad_host {
+    my $self = shift;
+    my $host = lc(shift);
+
+    return 0 if $self->_host_list_match("whitelisted_hosts", $host);
+    return 1 if $self->_host_list_match("blocked_hosts", $host);
+    return 1 if
+        $host =~ /^localhost$/i ||    # localhost is bad.  even though it'd be stopped in
+                                      #    a later call to _bad_host with the IP address
+        $host =~ /\s/i;               # any whitespace is questionable
+
+    # Let's assume it's an IP address now, and get it into 32 bits.
+    # Uf at any time something doesn't look like a number, then it's
+    # probably a hostname and we've already either whitelisted or
+    # blacklisted those, so we'll just say it's okay and it'll come
+    # back here later when the resolver finds an IP address.
+    my @parts = split(/\./, $host);
+    return 0 if @parts > 4;
+
+    # un-octal/un-hex the parts, or return if there's a non-numeric part
+    my $overflow_flag = 0;
+    foreach (@parts) {
+        return 0 unless /^\d+$/ || /^0x[a-f\d]+$/;
+        local $SIG{__WARN__} = sub { $overflow_flag = 1; };
+        $_ = oct($_) if /^0/;
+    }
+
+    # a purely numeric address shouldn't overflow.
+    return 1 if $overflow_flag;
+
+    my $addr;  # network order packed IP address
+
+    if (@parts == 1) {
+        # a - 32 bits
+        return 1 if
+            $parts[0] > 0xffffffff;
+        $addr = pack("N", $parts[0]);
+    } elsif (@parts == 2) {
+        # a.b - 8.24 bits
+        return 1 if
+            $parts[0] > 0xff ||
+            $parts[1] > 0xffffff;
+        $addr = pack("N", $parts[0] << 24 | $parts[1]);
+    } elsif (@parts == 3) {
+        # a.b.c - 8.8.16 bits
+        return 1 if
+            $parts[0] > 0xff ||
+            $parts[1] > 0xff ||
+            $parts[2] > 0xffff;
+        $addr = pack("N", $parts[0] << 24 | $parts[1] << 16 | $parts[2]);
+    } else {
+        # a.b.c.d - 8.8.8.8 bits
+        return 1 if
+            $parts[0] > 0xff ||
+            $parts[1] > 0xff ||
+            $parts[2] > 0xff ||
+            $parts[3] > 0xff;
+        $addr = pack("N", $parts[0] << 24 | $parts[1] << 16 | $parts[2] << 8 | $parts[3]);
+    }
+
+    my $haddr = unpack("N", $addr); # host order IP address
+    return 1 if
+        ($haddr & 0xFF000000) == 0x00000000 || # 0.0.0.0/8
+        ($haddr & 0xFF000000) == 0x0A000000 || # 10.0.0.0/8
+        ($haddr & 0xFF000000) == 0x7F000000 || # 127.0.0.0/8
+        ($haddr & 0xFFF00000) == 0xAC100000 || # 172.16.0.0/12
+        ($haddr & 0xFFFF0000) == 0xA9FE0000 || # 169.254.0.0/16
+        ($haddr & 0xFFFF0000) == 0xC0A80000 || # 192.168.0.0/16
+        ($haddr & 0xFFFFFF00) == 0xC0000200 || # 192.0.2.0/24  "TEST-NET" docs/example code
+        ($haddr & 0xFFFFFF00) == 0xC0586300 || # 192.88.99.0/24 6to4 relay anycast addresses
+         $haddr               == 0xFFFFFFFF || # 255.255.255.255
+        ($haddr & 0xF0000000) == 0xE0000000;  # multicast addresses
+
+    # as final IP address check, pass in the canonical a.b.c.d decimal form
+    # to the blacklisted host check to see if matches as bad there.
+    my $can_ip = join(".", map { ord } split //, $addr);
+    return 1 if $self->_host_list_match("blocked_hosts", $can_ip);
+
+    # looks like an okay IP address
+    return 0;
+}
+
+sub request {
+    my ($self, $req, $arg, $size, $previous) = @_;
+
+    # walk back to the first request, and set our _time_begin to its _time_begin, or if
+    # we're the first, then use current time.  used by LWPx::Protocol::http_paranoid
+    my $first_res = $previous;  # previous is the previous response that invoked this request
+    $first_res = $first_res->previous while $first_res && $first_res->previous;
+    $req->{_time_begin} = $first_res ? $first_res->request->{_time_begin} : time();
+
+    my $host = $req->uri->host;
+    if ($self->_bad_host($host)) {
+        my $err_res = HTTP::Response->new(403, "Unauthorized access to blocked host");
+        $err_res->request($req);
+        $err_res->header("Client-Date" => HTTP::Date::time2str(time));
+        $err_res->header("Client-Warning" => "Internal response");
+        $err_res->header("Content-Type" => "text/plain");
+        $err_res->content("403 Unauthorized access to blocked host\n");
+        return $err_res;
+    }
+
+    if (my $pp = $self->{paranoid_proxy}) {
+        $req->uri("$pp?url="   . eurl($req->uri) .
+                  "&timeout="  . ($self->{timeout}  + 0) .
+                  "&max_size=" . ($self->{max_size} + 0));
+    }
+
+    return $self->SUPER::request($req, $arg, $size, $previous);
+}
+
+# taken from LWP::UserAgent and modified slightly.  (proxy support removed,
+# and map http and https schemes to separate protocol handlers)
+sub send_request
+{
+    my ($self, $request, $arg, $size) = @_;
+    $self->_request_sanity_check($request);
+
+    my ($method, $url) = ($request->method, $request->uri);
+
+    local($SIG{__DIE__});  # protect against user defined die handlers
+
+    # Check that we have a METHOD and a URL first
+    return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "Method missing")
+        unless $method;
+    return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL missing")
+        unless $url;
+    return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL must be absolute")
+        unless $url->scheme;
+    return _new_response($request, &HTTP::Status::RC_BAD_REQUEST,
+                         "ParanoidAgent doesn't support going through proxies.  ".
+                         "In that case, do your paranoia at your proxy instead.")
+        if $self->_need_proxy($url);
+
+    my $scheme = $url->scheme;
+    return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "Only http and https are supported by ParanoidAgent")
+        unless $scheme eq "http" || $scheme eq "https";
+
+    LWP::Debug::trace("$method $url");
+
+    my $protocol;
+
+    {
+      # Honor object-specific restrictions by forcing protocol objects
+      #  into class LWP::Protocol::nogo.
+        my $x;
+        if($x       = $self->protocols_allowed) {
+            if(grep lc($_) eq $scheme, @$x) {
+                LWP::Debug::trace("$scheme URLs are among $self\'s allowed protocols (@$x)");
+              }
+            else {
+                LWP::Debug::trace("$scheme URLs aren't among $self\'s allowed protocols (@$x)");
+                  require LWP::Protocol::nogo;
+                  $protocol = LWP::Protocol::nogo->new;
+              }
+        }
+        elsif ($x = $self->protocols_forbidden) {
+            if(grep lc($_) eq $scheme, @$x) {
+                LWP::Debug::trace("$scheme URLs are among $self\'s forbidden protocols (@$x)");
+                  require LWP::Protocol::nogo;
+                  $protocol = LWP::Protocol::nogo->new;
+              }
+            else {
+                LWP::Debug::trace("$scheme URLs aren't among $self\'s forbidden protocols (@$x)");
+              }
+        }
+      # else fall thru and create the protocol object normally
+    }
+
+    unless ($protocol) {
+        LWP::Protocol::implementor("${scheme}_paranoid",  "LWPx::Protocol::${scheme}_paranoid");
+        eval "require LWPx::Protocol::${scheme}_paranoid;";
+        if ($@) {
+            $@ =~ s/ at .* line \d+.*//s;  # remove file/line number
+            my $response =  _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
+            return $response;
+        }
+
+        $protocol = eval { LWP::Protocol::create($scheme eq "http" ? "http_paranoid" : "https_paranoid", $self) };
+        if ($@) {
+            $@ =~ s/ at .* line \d+.*//s;  # remove file/line number
+            my $response =  _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
+            if ($scheme eq "https") {
+                $response->message($response->message . " (Crypt::SSLeay not installed)");
+                $response->content_type("text/plain");
+                $response->content(<<EOT);
+LWP will support https URLs if the Crypt::SSLeay module is installed.
+More information at <http://www.linpro.no/lwp/libwww-perl/README.SSL>.
+EOT
+}
+            return $response;
+        }
+    }
+
+    # Extract fields that will be used below
+    my ($timeout, $cookie_jar, $use_eval, $parse_head, $max_size) =
+        @{$self}{qw(timeout cookie_jar use_eval parse_head max_size)};
+
+    my $response;
+    my $proxy = undef;
+    if ($use_eval) {
+        # we eval, and turn dies into responses below
+        eval {
+            $response = $protocol->request($request, $proxy,
+                                           $arg, $size, $timeout);
+        };
+        if ($@) {
+            $@ =~ s/ at .* line \d+.*//s;  # remove file/line number
+            $response = _new_response($request,
+                                      &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+                                      $@);
+        }
+    }
+    else {
+        $response = $protocol->request($request, $proxy,
+                                       $arg, $size, $timeout);
+        # XXX: Should we die unless $response->is_success ???
+    }
+
+    $response->request($request);  # record request for reference
+    $cookie_jar->extract_cookies($response) if $cookie_jar;
+    $response->header("Client-Date" => HTTP::Date::time2str(time));
+    return $response;
+}
+
+# blocked hostnames, compiled patterns, or subrefs
+sub blocked_hosts
+{
+    my $self = shift;
+    if (@_) {
+        my @hosts = @_;
+        $self->{'blocked_hosts'} = \@hosts;
+        return;
+    }
+    return @{ $self->{'blocked_hosts'} || [] };
+}
+
+# whitelisted hostnames, compiled patterns, or subrefs
+sub whitelisted_hosts
+{
+    my $self = shift;
+    if (@_) {
+        my @hosts = @_;
+        $self->{'whitelisted_hosts'} = \@hosts;
+        return;
+    }
+    return @{ $self->{'whitelisted_hosts'} || [] };
+}
+
+# get/set Net::DNS resolver object
+sub resolver
+{
+    my $self = shift;
+    if (@_) {
+        $self->{'resolver'} = shift;
+        require UNIVERSAL ;
+        die "Not a Net::DNS::Resolver object" unless
+            UNIVERSAL::isa($self->{'resolver'}, "Net::DNS::Resolver");
+    }
+    return $self->{'resolver'} ||= Net::DNS::Resolver->new;
+}
+
+# Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
+# staying there in future versions:  needed by our modified version of send_request
+sub _need_proxy
+{
+    my($self, $url) = @_;
+    $url = $HTTP::URI_CLASS->new($url) unless ref $url;
+
+    my $scheme = $url->scheme || return;
+    if (my $proxy = $self->{'proxy'}{$scheme}) {
+        if ($self->{'no_proxy'} && @{ $self->{'no_proxy'} }) {
+            if (my $host = eval { $url->host }) {
+                for my $domain (@{ $self->{'no_proxy'} }) {
+                    if ($host =~ /\Q$domain\E$/) {
+                        LWP::Debug::trace("no_proxy configured");
+                          return;
+                      }
+                }
+            }
+        }
+        LWP::Debug::debug("Proxied to $proxy");
+        return $HTTP::URI_CLASS->new($proxy);
+    }
+    LWP::Debug::debug('Not proxied');
+    undef;
+}
+
+# Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
+# staying there in future versions:  needed by our modified version of send_request
+sub _request_sanity_check {
+    my($self, $request) = @_;
+    # some sanity checking
+    if (defined $request) {
+        if (ref $request) {
+            Carp::croak("You need a request object, not a " . ref($request) . " object")
+              if ref($request) eq 'ARRAY' or ref($request) eq 'HASH' or
+              !$request->can('method') or !$request->can('uri');
+          }
+        else {
+            Carp::croak("You need a request object, not '$request'");
+          }
+    }
+    else {
+        Carp::croak("No request object passed in");
+      }
+}
+
+# Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
+# staying there in future versions:  needed by our modified version of send_request
+sub _new_response {
+    my($request, $code, $message) = @_;
+    my $response = HTTP::Response->new($code, $message);
+    $response->request($request);
+    $response->header("Client-Date" => HTTP::Date::time2str(time));
+    $response->header("Client-Warning" => "Internal response");
+    $response->header("Content-Type" => "text/plain");
+    $response->content("$code $message\n");
+    return $response;
+}
+
+sub eurl {
+    my $a = $_[0];
+    $a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg;
+    $a =~ tr/ /+/;
+    return $a;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWPx::ParanoidAgent - subclass of LWP::UserAgent that protects you from harm
+
+=head1 SYNOPSIS
+
+ require LWPx::ParanoidAgent;
+
+ my $ua = LWPx::ParanoidAgent->new;
+
+ # this is 10 seconds overall, from start to finish.  not just between
+ # socket reads.  and it includes all redirects.  so attackers telling
+ # you to download from a malicious tarpit webserver can only stall
+ # you for $n seconds
+
+ $ua->timeout(10);
+
+ # setup extra block lists, in addition to the always-enforced blocking
+ # of private IP addresses, loopbacks, and multicast addresses
+
+ $ua->blocked_hosts(
+    "foo.com",
+    qr/\.internal\.company\.com$/i,
+    sub { my $host = shift;  return 1 if is_bad($host); },
+ );
+
+ $ua->whitelisted_hosts(
+    "brad.lj",
+    qr/^192\.168\.64\.3?/,
+    sub { ... },
+ );
+
+ # get/set the DNS resolver object that's used
+ my $resolver = $ua->resolver;
+ $ua->resolver(Net::DNS::Resolver->new(...));
+
+ # and then just like a normal LWP::UserAgent, because it is one.
+ my $response = $ua->get('http://search.cpan.org/');
+ ...
+ if ($response->is_success) {
+     print $response->content;  # or whatever
+ }
+ else {
+     die $response->status_line;
+ }
+
+=head1 DESCRIPTION
+
+The C<LWPx::ParanoidAgent> is a class subclassing C<LWP::UserAgent>,
+but paranoid against attackers.  It's to be used when you're fetching
+a remote resource on behalf of a possibly malicious user.
+
+This class can do whatever C<LWP::UserAgent> can (callbacks, uploads from
+files, etc), except proxy support is explicitly removed, because in
+that case you should do your paranoia at your proxy.
+
+Also, the schemes are limited to http and https, which are mapped to
+C<LWPx::Protocol::http_paranoid> and
+C<LWPx::Protocol::https_paranoid>, respectively, which are forked
+versions of the same ones without the "_paranoid".  Subclassing them
+didn't look possible, as they were essentially just one huge function.
+
+This class protects you from connecting to internal IP ranges (unless you
+whitelist them), hostnames/IPs that you blacklist, remote webserver
+tarpitting your process (the timeout parameter is changed to be a global
+timeout over the entire process), and all combinations of redirects and
+DNS tricks to otherwise tarpit and/or connect to internal resources.
+
+=head1 CONSTRUCTOR
+
+=over 4
+
+=item C<new>
+
+my $ua = LWPx::ParanoidAgent->new([ %opts ]);
+
+In addition to any constructor options from L<LWP::UserAgent>, you may
+also set C<blocked_hosts> (to an arrayref), C<whitelisted_hosts> (also
+an arrayref), and C<resolver>, a Net::DNS::Resolver object.
+
+=back
+
+=head1 METHODS
+
+=over 4
+
+=item $csr->B<resolver>($net_dns_resolver)
+
+=item $csr->B<resolver>
+
+Get/set the L<Net::DNS::Resolver> object used to lookup hostnames.
+
+=item $csr->B<blocked_hosts>(@host_list)
+
+=item $csr->B<blocked_hosts>
+
+Get/set the the list of blocked hosts.  The items in @host_list may be
+compiled regular expressions (with qr//), code blocks, or scalar
+literals.  In any case, the thing that is match, passed in, or
+compared (respectively), is all of the given hostname, given IP
+address, and IP address in canonical a.b.c.d decimal notation.  So if
+you want to block "1.2.3.4" and the user entered it in a mix of
+network/host form in a mix of decimal/octal/hex, you need only block
+"1.2.3.4" and not worry about the details.
+
+=item $csr->B<whitelisted_hosts>(@host_list)
+
+=item $csr->B<whitelisted_hosts>
+
+Like blocked hosts, but matching the hosts/IPs that bypass blocking
+checks.  The only difference is the IP address isn't canonicalized
+before being whitelisted-matched, mostly because it doesn't make sense
+for somebody to enter in a good address in a subversive way.
+
+=back
+
+=head1 SEE ALSO
+
+See L<LWP::UserAgent> to see how to use this class.
+
+=head1 WARRANTY
+
+This module is supplied "as-is" and comes with no warranty, expressed
+or implied.  It tries to protect you from harm, but maybe it will.
+Maybe it will destroy your data and your servers.  You'd better audit
+it and send me bug reports.
+
+=head1 BUGS
+
+Maybe.  See the warranty above.
+
+=head1 COPYRIGHT
+
+Copyright 2005 Brad Fitzpatrick
+
+Lot of code from the the base class, copyright 1995-2004 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.