diff options
Diffstat (limited to '')
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | extract-seed.pl | 6 | ||||
-rw-r--r-- | lib/githubexplorer.pm | 2 | ||||
-rw-r--r-- | lib/githubexplorer/Gexf.pm | 99 | ||||
-rw-r--r-- | lib/githubexplorer/Network.pm | 9 |
5 files changed, 98 insertions, 22 deletions
diff --git a/.gitignore b/.gitignore index 4358bbd..19f1f1e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *.sqlite *.conf *.sql -*.yml \ No newline at end of file +*.yml +*.gexf +*.csv \ No newline at end of file diff --git a/extract-seed.pl b/extract-seed.pl new file mode 100644 index 0000000..293c270 --- /dev/null +++ b/extract-seed.pl @@ -0,0 +1,6 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use githubexplorer::Schema; + +my $schema = githubexplorer::Schema->connect(); \ No newline at end of file diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm index 4fc5aa5..67c789b 100644 --- a/lib/githubexplorer.pm +++ b/lib/githubexplorer.pm @@ -69,7 +69,7 @@ sub gen_graph { my $self = shift; $self->_connect unless $self->has_schema; my $graph = githubexplorer::Gexf->new( schema => $self->schema ); - my $xml = $graph->profiles; + my $xml = $graph->gen_gexf; $xml > io('crawl.gexf'); } diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm index c3be9dc..f7e38cb 100644 --- a/lib/githubexplorer/Gexf.pm +++ b/lib/githubexplorer/Gexf.pm @@ -2,8 +2,11 @@ package githubexplorer::Gexf; use Moose; use XML::Simple; +use 5.010; has schema => (is => 'ro', isa => 'Object', required => 1); +has id_edges => (is => 'rw', isa => 'Num', traits => ['Counter'], default => +0, handles => {inc_edges => 'inc'}); has graph => ( is => 'rw', @@ -11,8 +14,8 @@ has graph => ( default => sub { my $graph = { gexf => { - version => "1.0", - meta => { creator => ['rtgi'] }, + version => "1.1", + meta => { creator => ['linkfluence'] }, graph => { type => 'static', attributes => { @@ -21,7 +24,7 @@ has graph => ( attribute => [ { id => 0, - type => 'string', + type => 'float', title => 'name' }, { @@ -31,17 +34,17 @@ has graph => ( }, { id => 2, - type => 'string', + type => 'float', title => 'followers_count' }, { id => 3, - type => 'string', + type => 'float', title => 'following_count' }, { id => 4, - type => 'string', + type => 'float', title => 'forks', }, { @@ -51,12 +54,12 @@ has graph => ( }, { id => 6, - type => 'string', + type => 'float', title => 'public_gist_count', }, { id => 7, - type => 'string', + type => 'float', title => 'public_repo_count', }, { @@ -71,7 +74,7 @@ has graph => ( }, { id => 10, - type => 'string', + type => 'float', title => 'watchers', } ] @@ -82,8 +85,19 @@ has graph => ( } ); +sub gen_gexf { + my $self = shift; + $self->profiles; + #$self->repositories; + say "total nodes : ".scalar (@{ $self->graph->{gexf}->{graph}->{nodes}->{node} }); + say "total edges : ".scalar (@{ $self->graph->{gexf}->{graph}->{edges}->{edge} }); + my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 ); + return $xml_out; +} + sub profiles { my $self = shift; + say "start profiles ..."; my $profiles = $self->schema->resultset('Profiles')->search(); while ( my $profile = $profiles->next ) { @@ -92,9 +106,13 @@ sub profiles { label => $profile->login, attvalues => { attvalue => [ - {name => $profile->name}, - {followers_count => $profile->followers_count}, - {following_count => $profile->following_count}, + { for => 0, value => $profile->name}, + { for => 1, value => "profile"}, + { for => 2, value => $profile->followers_count}, + { for => 3, value => $profile->following_count}, + { for => 5, value => $profile->location}, + { for => 6, value => $profile->public_gist_count}, + { for => 7, value => $profile->public_repo_count}, ] }, }; @@ -105,25 +123,66 @@ sub profiles { my $id = 0; while ( my $edge = $edges->next ) { my $e = { - cardinal => 1, source => $edge->origin->id, target => $edge->dest->id, - type => 'dir', - id => $id++, + id => $self->inc_edges, }; push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; } - - my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 ); - return $xml_out; + say " done"; } sub repositories { my $self = shift; - my $repositories = $self->schema->resultset('Repositories')->search(); - while (my $repo = $repositories->next) { + say "start repositories ..."; + my $repositories = $self->schema->resultset('Repositories')->search({fork => 0}); + while (my $repos = $repositories->next) { + + next if $repos->name =~ /dotfiles/i; + # available in forks ? + my $check_fork = $self->schema->resultset('Fork')->search({repos => $repos->id}); + next if $check_fork->count < 1; + + if (!grep {$_->{id} eq "repos_".$repos->name} @{$self->graph->{gexf}->{graph}->{nodes}->{node}}) { + my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first; + my $lang = $language ? $language->language->name : 'none'; + my $node = { + id => "repos_".$repos->name, + label => $repos->name, + attvalues => { + attvalue => [ + { for => 0, value => $repos->name}, + { for => 1, value => "repository"}, + { for => 4, value => $repos->forks}, + { for => 9, value => $repos->description}, + { for => 10, value => $repos->watchers}, + { for => 8, value => $lang}, + ], + }, + }; + push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node; + } + my $e = { + source => $repos->id_profile->id, + target => "repos_".$repos->name, + id => $self->inc_edges, + }; + push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; + } + + my $forks = $self->schema->resultset('Fork')->search(); + + while (my $fork = $forks->next) { + next if $fork->repos->name =~ /dotfiles/i; + my $e = { + source => $fork->profile->id, + target => "repos_".$fork->repos->name, + id => $self->inc_edges, + }; + push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; } + say " done"; } 1; diff --git a/lib/githubexplorer/Network.pm b/lib/githubexplorer/Network.pm index eb6253f..dde08a4 100644 --- a/lib/githubexplorer/Network.pm +++ b/lib/githubexplorer/Network.pm @@ -7,6 +7,11 @@ use YAML::Syck; sub fetch_network { my ( $self, $repos ) = @_; + # check fork + my $check = $self->schema->resultset('Fork')->search({repos=> + $repos->id}); + return if $check->count > 0; + say ">> start on ".$repos->name; my $api_repos = Net::GitHub::V2::Repositories->new( owner => $repos->id_profile->login, @@ -16,6 +21,10 @@ sub fetch_network { ); my $edges = $api_repos->network(); + if (ref $edges ne 'ARRAY') { + sleep 60; + return; + } sleep(1); foreach my $edge (@$edges) { next if $edge->{owner} eq $repos->id_profile->login; |