diff options
Diffstat (limited to '')
-rw-r--r-- | clean-country.pl | 20 | ||||
-rw-r--r-- | crawl.pl | 2 | ||||
-rw-r--r-- | lib/githubexplorer.pm | 40 | ||||
-rw-r--r-- | lib/githubexplorer/Gexf.pm | 270 |
4 files changed, 179 insertions, 153 deletions
diff --git a/clean-country.pl b/clean-country.pl index 9fb0b5c..8d26d12 100644 --- a/clean-country.pl +++ b/clean-country.pl @@ -9,26 +9,32 @@ use YAML::Syck; my $conf = LoadFile(shift); -my $schema = githubexplorer::Schema->connect(@{$conf->{connect_info}}); +my $schema = githubexplorer::Schema->connect( @{ $conf->{connect_info} } ); -my $profiles = $schema->resultset('Profiles')->search({id => {'>' => 55781}, location => {'!=' => - undef}, location => {'!=' => ''}}); +my $profiles = $schema->resultset('Profiles')->search( + { + id => { '>' => 55781 }, + location => { '!=' => undef }, + location => { '!=' => '' } + } +); my $geo = Geo::GeoNames->new(); -while (my $pr = $profiles->next) { +while ( my $pr = $profiles->next ) { next if $pr->location =~ /^http/; next if $pr->country; next if $pr->location =~ /earth/i; - say "-> process ".$pr->login." with ".$pr->location; + say "-> process " . $pr->login . " with " . $pr->location; my $result = $geo->search( q => $pr->location, maxRows => 1 ); my $res = shift @$result; if ($res) { eval { - $pr->update({city => $res->{name}, country => $res->{countryName}}); + $pr->update( + { city => $res->{name}, country => $res->{countryName} } ); }; next if $@; - say "** fix with ".$pr->city . " in ".$pr->country; + say "** fix with " . $pr->city . " in " . $pr->country; } sleep(1); } \ No newline at end of file diff --git a/crawl.pl b/crawl.pl index d844893..300cfd5 100644 --- a/crawl.pl +++ b/crawl.pl @@ -12,7 +12,7 @@ GetOptions( 'repo' => \my $repo, 'graph' => \my $graph, 'network' => \my $network, - 'seed' => \my $seed, + 'seed' => \my $seed, 'conf=s' => \my $conf, ); diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm index 5744e08..7b9c252 100644 --- a/lib/githubexplorer.pm +++ b/lib/githubexplorer.pm @@ -7,7 +7,7 @@ use githubexplorer::Gexf; use IO::All; with qw/githubexplorer::Profile githubexplorer::Repository -githubexplorer::Network/; + githubexplorer::Network/; has seed => ( isa => 'ArrayRef', @@ -25,10 +25,10 @@ has seed => ( return \@seeds; } ); -has api_login => ( isa => 'Str|Undef', is => 'ro', required => 1 ); -has api_token => ( isa => 'Str|Undef', is => 'ro', required => 1 ); -has connect_info => ( isa => 'ArrayRef', is => 'ro', required => 1 ); -has with_repo => ( isa => 'Bool', is => 'ro', default => sub {0} ); +has api_login => ( isa => 'Str|Undef', is => 'ro', required => 1 ); +has api_token => ( isa => 'Str|Undef', is => 'ro', required => 1 ); +has connect_info => ( isa => 'ArrayRef', is => 'ro', required => 1 ); +has with_repo => ( isa => 'Bool', is => 'ro', default => sub {0} ); has schema => ( isa => 'githubexplorer::Schema', is => 'rw', @@ -75,7 +75,8 @@ sub gen_graph { sub graph_repo { my $self = shift; $self->_connect unless $self->has_schema; - my $repos = $self->schema->resultset('Repositories')->search({fork => 0}); + my $repos + = $self->schema->resultset('Repositories')->search( { fork => 0 } ); while ( my $r = $repos->next ) { $self->fetch_network($r); } @@ -90,20 +91,25 @@ sub gen_seed { open my $fh, '>', 'seed.csv'; while ( my $pr = $profiles->next ) { my %languages; - my $forks = $self->schema->resultset('Fork')->search({profile => - $pr->id}); - while (my $fork = $forks->next) { - my $languages = - $self->schema->resultset('RepoLang')->search({repository => - $fork->repos->id}); - while (my $lang = $languages->next) { - $languages{$lang->language->name}+=$lang->size; + my $forks = $self->schema->resultset('Fork') + ->search( { profile => $pr->id } ); + while ( my $fork = $forks->next ) { + my $languages = $self->schema->resultset('RepoLang') + ->search( { repository => $fork->repos->id } ); + while ( my $lang = $languages->next ) { + $languages{ $lang->language->name } += $lang->size; } } - my @sorted_lang = sort {$languages{$b} <=> $languages{$a}} keys %languages; + my @sorted_lang + = sort { $languages{$b} <=> $languages{$a} } keys %languages; my $main_lang = shift @sorted_lang; - my $other_lang = join('|', @sorted_lang); - my $str = $profiles->blog.";;;github;".$main_lang.";".$other_lang.";".$profile->country."\n"; + my $other_lang = join( '|', @sorted_lang ); + my $str + = $profiles->blog + . ";;;github;" + . $main_lang . ";" + . $other_lang . ";" + . $profile->country . "\n"; print $fh $str; } close $fh; diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm index 58281d4..de0da49 100644 --- a/lib/githubexplorer/Gexf.pm +++ b/lib/githubexplorer/Gexf.pm @@ -4,85 +4,90 @@ use Moose; use XML::Simple; use 5.010; -has schema => (is => 'ro', isa => 'Object', required => 1); -has id_edges => (is => 'rw', isa => 'Num', traits => ['Counter'], default => -0, handles => {inc_edges => 'inc'}); +has schema => ( is => 'ro', isa => 'Object', required => 1 ); +has id_edges => ( + is => 'rw', + isa => 'Num', + traits => ['Counter'], + default => 0, + handles => { inc_edges => 'inc' } +); has graph => ( -is => 'rw', -isa => 'HashRef', -default => sub { - my $graph = { - gexf => { - version => "1.1", - meta => { creator => ['linkfluence'] }, - graph => { - type => 'static', - attributes => { - class => 'node', - type => 'static', - attribute => [ - { - id => 0, - type => 'float', - title => 'name' - }, - { - id => 1, - type => 'string', - title => 'type', - }, - { - id => 2, - type => 'float', - title => 'followers_count' - }, - { - id => 3, - type => 'float', - title => 'following_count' - }, - { - id => 4, - type => 'float', - title => 'forks', - }, - { - id => 5, - type => 'string', - title => 'location', - }, - { - id => 6, - type => 'float', - title => 'public_gist_count', - }, - { - id => 7, - type => 'float', - title => 'public_repo_count', - }, - { - id => 8, - type => 'string', - title => 'language', - }, - { - id => 9, - type => 'string', - title => 'description', - }, - { - id => 10, - type => 'float', - title => 'watchers', - } - ] + is => 'rw', + isa => 'HashRef', + default => sub { + my $graph = { + gexf => { + version => "1.1", + meta => { creator => ['linkfluence'] }, + graph => { + type => 'static', + attributes => { + class => 'node', + type => 'static', + attribute => [ + { + id => 0, + type => 'float', + title => 'name' + }, + { + id => 1, + type => 'string', + title => 'type', + }, + { + id => 2, + type => 'float', + title => 'followers_count' + }, + { + id => 3, + type => 'float', + title => 'following_count' + }, + { + id => 4, + type => 'float', + title => 'forks', + }, + { + id => 5, + type => 'string', + title => 'location', + }, + { + id => 6, + type => 'float', + title => 'public_gist_count', + }, + { + id => 7, + type => 'float', + title => 'public_repo_count', + }, + { + id => 8, + type => 'string', + title => 'language', + }, + { + id => 9, + type => 'string', + title => 'description', + }, + { + id => 10, + type => 'float', + title => 'watchers', + } + ] + } } } - } - }; -} + }; + } ); sub gen_gexf { @@ -94,11 +99,11 @@ sub gen_gexf { $self->profiles_from_repositories; my $profiles_from_repositories = $self->dump_gexf; - $profiles_from_repositories > io ('profiles_from_repositories.gexf'); + $profiles_from_repositories > io('profiles_from_repositories.gexf'); $self->repositories_from_profiles; my $repositories_from_profiles = $self->dump_gexf; - $profiles_from_repositories > io ('repositories_from_profiles.gexf'); + $profiles_from_repositories > io('repositories_from_profiles.gexf'); } sub dump_gefx { @@ -110,7 +115,7 @@ sub dump_gefx { } sub basic_profiles { - my $self = shift; + my $self = shift; $self->id_edges(0); say "start basic_profiles ..."; my $profiles = $self->schema->resultset('Profiles')->search(); @@ -124,9 +129,9 @@ sub basic_profiles { my $id = 0; while ( my $edge = $edges->next ) { my $e = { - source => $edge->origin->id, - target => $edge->dest->id, - id => $self->inc_edges, + source => $edge->origin->id, + target => $edge->dest->id, + id => $self->inc_edges, }; push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; } @@ -140,15 +145,16 @@ sub profiles_from_repositories { my ($nodes); my $profiles = $self->schema->resultset('Profiles')->search(); - while (my $profile = $profiles->next) { + while ( my $profile = $profiles->next ) { my $node = $self->_get_node_for_profile($profile); push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node; } my $repositories = $self->schema->resultset('Repositories')->search(); - while (my $repos = $repositories->next) { - my $forks = $self->schema->resultset('Fork')->search({repos => $repos->id}); + while ( my $repos = $repositories->next ) { + my $forks = $self->schema->resultset('Fork') + ->search( { repos => $repos->id } ); my @profiles; - while (my $fork = $forks->next) { + while ( my $fork = $forks->next ) { push @profiles, $fork->profile->id; } foreach my $p (@profiles) { @@ -157,7 +163,7 @@ sub profiles_from_repositories { my $e = { source => $p, target => $_, - id => $self->inc_edges, + id => $self->inc_edges, }; push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; } @profiles; @@ -173,38 +179,45 @@ sub repositories_from_profiles { my ($nodes); my $repositories = $self->schema->resultset('Repositories')->search(); - while (my $repos = $repositories->next) { + while ( my $repos = $repositories->next ) { next if $repos->name =~ /dotfiles/; - if (!exists $nodes->{$repos->name}) { - my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first; + if ( !exists $nodes->{ $repos->name } ) { + my $language + = $self->schema->resultset('RepoLang') + ->search( { repository => $repos->id }, + { order_by => 'size' } )->first; my $lang = $language ? $language->language->name : 'none'; - $nodes->{$repos->name} = { - id => $repos->name, - label => $repos->name, + $nodes->{ $repos->name } = { + id => $repos->name, + label => $repos->name, attvalues => { attvalue => [ - { for => 0, value => $repos->name}, - { for => 1, value => "repository"}, - { for => 4, value => $repos->forks}, - { for => 9, value => $repos->description}, - { for => 10, value => $repos->watchers}, - { for => 8, value => $lang}, + { for => 0, value => $repos->name }, + { for => 1, value => "repository" }, + { for => 4, value => $repos->forks }, + { for => 9, value => $repos->description }, + { for => 10, value => $repos->watchers }, + { for => 8, value => $lang }, ], }, }; } - my $forks = $self->schema->resultset('Fork')->search({repos => $repos->id}); - while (my $fork = $forks->next) { + my $forks = $self->schema->resultset('Fork') + ->search( { repos => $repos->id } ); + while ( my $fork = $forks->next ) { my $e = { - source => $fork->profile->id, - target => $fork->repos->name, - id => $self->inc_edges, + source => $fork->profile->id, + target => $fork->repos->name, + id => $self->inc_edges, }; push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; } } - map {push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $nodes->{$_} keys %$nodes; + map { + push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, + $nodes->{$_} + } keys %$nodes; say "repositories_from_profiles done"; } @@ -213,22 +226,23 @@ sub stats_languages_by_country { } sub _get_node_for_profile { - my ($self, $profile) = @_; - my ($languages, $ordered_languages) = $self->_get_languages_for_profile($profile); + my ( $self, $profile ) = @_; + my ( $languages, $ordered_languages ) + = $self->_get_languages_for_profile($profile); my $main_lang = shift @$ordered_languages; - my $node = { - id => $profile->id, - label => $profile->login, + my $node = { + id => $profile->id, + label => $profile->login, attvalues => { attvalue => [ - { for => 0, value => $profile->name}, - { for => 1, value => "profile"}, - { for => 2, value => $profile->followers_count}, - { for => 3, value => $profile->following_count}, - { for => 5, value => $profile->country}, - { for => 6, value => $profile->public_gist_count}, - { for => 7, value => $profile->public_repo_count}, - { for => 8, value => $main_lang}, + { for => 0, value => $profile->name }, + { for => 1, value => "profile" }, + { for => 2, value => $profile->followers_count }, + { for => 3, value => $profile->following_count }, + { for => 5, value => $profile->country }, + { for => 6, value => $profile->public_gist_count }, + { for => 7, value => $profile->public_repo_count }, + { for => 8, value => $main_lang }, ] }, }; @@ -236,22 +250,22 @@ sub _get_node_for_profile { } sub _get_languages_for_profile { - my ($self, $profile) = shift; + my ( $self, $profile ) = shift; - my $forks = $self->schema->resultset('Fork')->search({profile => - $profile->id}); + my $forks = $self->schema->resultset('Fork') + ->search( { profile => $profile->id } ); my %languages; - while (my $fork = $forks->next) { - my $languages = - $self->schema->resultset('RepoLang')->search({repository => - $fork->repos->id}); - while (my $lang = $languages->next) { - $languages{$lang->language->name}+=$lang->size; + while ( my $fork = $forks->next ) { + my $languages = $self->schema->resultset('RepoLang') + ->search( { repository => $fork->repos->id } ); + while ( my $lang = $languages->next ) { + $languages{ $lang->language->name } += $lang->size; } } - my @sorted_lang = sort {$languages{$b} <=> $languages{$a}} keys %languages; - return (\%languages, \@sorted_lang); + my @sorted_lang + = sort { $languages{$b} <=> $languages{$a} } keys %languages; + return ( \%languages, \@sorted_lang ); } #sub repositories { |