summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--clean-country.pl21
-rw-r--r--crawl.pl2
-rw-r--r--lib/githubexplorer.pm40
-rw-r--r--lib/githubexplorer/Gexf.pm270
4 files changed, 179 insertions, 154 deletions
diff --git a/clean-country.pl b/clean-country.pl
index 7a99e99..460b29f 100644
--- a/clean-country.pl
+++ b/clean-country.pl
@@ -9,27 +9,32 @@ use YAML::Syck;
 
 my $conf = LoadFile(shift);
 
-my $schema = githubexplorer::Schema->connect(@{$conf->{connect_info}});
+my $schema = githubexplorer::Schema->connect( @{ $conf->{connect_info} } );
 
-my $profiles = $schema->resultset('Profiles')->search({id => {'>' => 61498}, location => {'!=' =>
-            undef}, location => {'!=' => ''}});
+my $profiles = $schema->resultset('Profiles')->search(
+    {
+        id       => { '>'  => 55781 },
+        location => { '!=' => undef },
+        location => { '!=' => '' }
+    }
+);
 
 my $geo = Geo::GeoNames->new();
 
-my $i = 0;
-while (my $pr = $profiles->next) {
+while ( my $pr = $profiles->next ) {
     next if $pr->location =~ /^http/;
     next if $pr->country;
     next if $pr->location =~ /earth/i;
-    say "-> process ".$pr->login." with ".$pr->location;
+    say "-> process " . $pr->login . " with " . $pr->location;
     my $result = $geo->search( q => $pr->location, maxRows => 1 );
     my $res = shift @$result;
     if ($res) {
         eval {
-            $pr->update({city => $res->{name}, country => $res->{countryName}});
+            $pr->update(
+                { city => $res->{name}, country => $res->{countryName} } );
         };
         next if $@;
-        say "** fix with ".$pr->city . " in ".$pr->country;
+        say "** fix with " . $pr->city . " in " . $pr->country;
     }
     if (++$i == 10) {
         sleep(2);
diff --git a/crawl.pl b/crawl.pl
index d844893..300cfd5 100644
--- a/crawl.pl
+++ b/crawl.pl
@@ -12,7 +12,7 @@ GetOptions(
     'repo'     => \my $repo,
     'graph'    => \my $graph,
     'network'  => \my $network,
-    'seed'  => \my $seed,
+    'seed'     => \my $seed,
     'conf=s'   => \my $conf,
 );
 
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm
index 5744e08..7b9c252 100644
--- a/lib/githubexplorer.pm
+++ b/lib/githubexplorer.pm
@@ -7,7 +7,7 @@ use githubexplorer::Gexf;
 use IO::All;
 
 with qw/githubexplorer::Profile githubexplorer::Repository
-githubexplorer::Network/;
+    githubexplorer::Network/;
 
 has seed => (
     isa      => 'ArrayRef',
@@ -25,10 +25,10 @@ has seed => (
         return \@seeds;
     }
 );
-has api_login    => ( isa => 'Str|Undef',      is => 'ro', required => 1 );
-has api_token    => ( isa => 'Str|Undef',      is => 'ro', required => 1 );
-has connect_info => ( isa => 'ArrayRef', is => 'ro', required => 1 );
-has with_repo    => ( isa => 'Bool',     is => 'ro', default  => sub {0} );
+has api_login    => ( isa => 'Str|Undef', is => 'ro', required => 1 );
+has api_token    => ( isa => 'Str|Undef', is => 'ro', required => 1 );
+has connect_info => ( isa => 'ArrayRef',  is => 'ro', required => 1 );
+has with_repo    => ( isa => 'Bool',      is => 'ro', default  => sub {0} );
 has schema => (
     isa       => 'githubexplorer::Schema',
     is        => 'rw',
@@ -75,7 +75,8 @@ sub gen_graph {
 sub graph_repo {
     my $self = shift;
     $self->_connect unless $self->has_schema;
-    my $repos = $self->schema->resultset('Repositories')->search({fork => 0});
+    my $repos
+        = $self->schema->resultset('Repositories')->search( { fork => 0 } );
     while ( my $r = $repos->next ) {
         $self->fetch_network($r);
     }
@@ -90,20 +91,25 @@ sub gen_seed {
     open my $fh, '>', 'seed.csv';
     while ( my $pr = $profiles->next ) {
         my %languages;
-        my $forks = $self->schema->resultset('Fork')->search({profile =>
-                $pr->id});
-        while (my $fork = $forks->next) {
-            my $languages =
-            $self->schema->resultset('RepoLang')->search({repository =>
-                    $fork->repos->id});
-            while (my $lang = $languages->next) {
-                $languages{$lang->language->name}+=$lang->size;
+        my $forks = $self->schema->resultset('Fork')
+            ->search( { profile => $pr->id } );
+        while ( my $fork = $forks->next ) {
+            my $languages = $self->schema->resultset('RepoLang')
+                ->search( { repository => $fork->repos->id } );
+            while ( my $lang = $languages->next ) {
+                $languages{ $lang->language->name } += $lang->size;
             }
         }
-        my @sorted_lang = sort {$languages{$b} <=> $languages{$a}} keys %languages;
+        my @sorted_lang
+            = sort { $languages{$b} <=> $languages{$a} } keys %languages;
         my $main_lang = shift @sorted_lang;
-        my $other_lang = join('|', @sorted_lang);
-        my $str = $profiles->blog.";;;github;".$main_lang.";".$other_lang.";".$profile->country."\n";
+        my $other_lang = join( '|', @sorted_lang );
+        my $str
+            = $profiles->blog
+            . ";;;github;"
+            . $main_lang . ";"
+            . $other_lang . ";"
+            . $profile->country . "\n";
         print $fh $str;
     }
     close $fh;
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
index 58281d4..de0da49 100644
--- a/lib/githubexplorer/Gexf.pm
+++ b/lib/githubexplorer/Gexf.pm
@@ -4,85 +4,90 @@ use Moose;
 use XML::Simple;
 use 5.010;
 
-has schema => (is => 'ro', isa => 'Object', required => 1);
-has id_edges => (is => 'rw', isa => 'Num', traits  => ['Counter'], default =>
-0, handles => {inc_edges => 'inc'});
+has schema => ( is => 'ro', isa => 'Object', required => 1 );
+has id_edges => (
+    is      => 'rw',
+    isa     => 'Num',
+    traits  => ['Counter'],
+    default => 0,
+    handles => { inc_edges => 'inc' }
+);
 
 has graph => (
-is      => 'rw',
-isa     => 'HashRef',
-default => sub {
-    my $graph = {
-        gexf => {
-            version => "1.1",
-            meta    => { creator => ['linkfluence'] },
-            graph   => {
-                type       => 'static',
-                attributes => {
-                    class     => 'node',
-                    type      => 'static',
-                    attribute => [
-                        {
-                            id    => 0,
-                            type  => 'float',
-                            title => 'name'
-                        },
-                        {
-                            id => 1,
-                            type => 'string',
-                            title => 'type',
-                        },
-                        {
-                            id    => 2,
-                            type  => 'float',
-                            title => 'followers_count'
-                        },
-                        {
-                            id    => 3,
-                            type  => 'float',
-                            title => 'following_count'
-                        },
-                        {
-                            id => 4,
-                            type => 'float',
-                            title => 'forks',
-                        },
-                        {
-                            id => 5,
-                            type => 'string',
-                            title => 'location',
-                        },
-                        {
-                            id => 6,
-                            type => 'float',
-                            title => 'public_gist_count',
-                        },
-                        {
-                            id => 7,
-                            type => 'float',
-                            title => 'public_repo_count',
-                        },
-                        {
-                            id => 8,
-                            type => 'string',
-                            title => 'language',
-                        },
-                        {
-                            id => 9,
-                            type => 'string',
-                            title => 'description',
-                        },
-                        {
-                            id => 10,
-                            type => 'float',
-                            title => 'watchers',
-                        }
-                    ]
+    is      => 'rw',
+    isa     => 'HashRef',
+    default => sub {
+        my $graph = {
+            gexf => {
+                version => "1.1",
+                meta    => { creator => ['linkfluence'] },
+                graph   => {
+                    type       => 'static',
+                    attributes => {
+                        class     => 'node',
+                        type      => 'static',
+                        attribute => [
+                            {
+                                id    => 0,
+                                type  => 'float',
+                                title => 'name'
+                            },
+                            {
+                                id    => 1,
+                                type  => 'string',
+                                title => 'type',
+                            },
+                            {
+                                id    => 2,
+                                type  => 'float',
+                                title => 'followers_count'
+                            },
+                            {
+                                id    => 3,
+                                type  => 'float',
+                                title => 'following_count'
+                            },
+                            {
+                                id    => 4,
+                                type  => 'float',
+                                title => 'forks',
+                            },
+                            {
+                                id    => 5,
+                                type  => 'string',
+                                title => 'location',
+                            },
+                            {
+                                id    => 6,
+                                type  => 'float',
+                                title => 'public_gist_count',
+                            },
+                            {
+                                id    => 7,
+                                type  => 'float',
+                                title => 'public_repo_count',
+                            },
+                            {
+                                id    => 8,
+                                type  => 'string',
+                                title => 'language',
+                            },
+                            {
+                                id    => 9,
+                                type  => 'string',
+                                title => 'description',
+                            },
+                            {
+                                id    => 10,
+                                type  => 'float',
+                                title => 'watchers',
+                            }
+                        ]
+                    }
                 }
             }
-        }
-    };
-}
+        };
+    }
 );
 
 sub gen_gexf {
@@ -94,11 +99,11 @@ sub gen_gexf {
 
     $self->profiles_from_repositories;
     my $profiles_from_repositories = $self->dump_gexf;
-    $profiles_from_repositories > io ('profiles_from_repositories.gexf');
+    $profiles_from_repositories > io('profiles_from_repositories.gexf');
 
     $self->repositories_from_profiles;
     my $repositories_from_profiles = $self->dump_gexf;
-    $profiles_from_repositories > io ('repositories_from_profiles.gexf');
+    $profiles_from_repositories > io('repositories_from_profiles.gexf');
 }
 
 sub dump_gefx {
@@ -110,7 +115,7 @@ sub dump_gefx {
 }
 
 sub basic_profiles {
-    my $self     = shift;
+    my $self = shift;
     $self->id_edges(0);
     say "start basic_profiles ...";
     my $profiles = $self->schema->resultset('Profiles')->search();
@@ -124,9 +129,9 @@ sub basic_profiles {
     my $id    = 0;
     while ( my $edge = $edges->next ) {
         my $e = {
-            source   => $edge->origin->id,
-            target   => $edge->dest->id,
-            id       => $self->inc_edges,
+            source => $edge->origin->id,
+            target => $edge->dest->id,
+            id     => $self->inc_edges,
         };
         push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
     }
@@ -140,15 +145,16 @@ sub profiles_from_repositories {
 
     my ($nodes);
     my $profiles = $self->schema->resultset('Profiles')->search();
-    while (my $profile = $profiles->next) {
+    while ( my $profile = $profiles->next ) {
         my $node = $self->_get_node_for_profile($profile);
         push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
     }
     my $repositories = $self->schema->resultset('Repositories')->search();
-    while (my $repos = $repositories->next) {
-        my $forks = $self->schema->resultset('Fork')->search({repos => $repos->id});
+    while ( my $repos = $repositories->next ) {
+        my $forks = $self->schema->resultset('Fork')
+            ->search( { repos => $repos->id } );
         my @profiles;
-        while (my $fork = $forks->next) {
+        while ( my $fork = $forks->next ) {
             push @profiles, $fork->profile->id;
         }
         foreach my $p (@profiles) {
@@ -157,7 +163,7 @@ sub profiles_from_repositories {
                 my $e = {
                     source => $p,
                     target => $_,
-                    id => $self->inc_edges,
+                    id     => $self->inc_edges,
                 };
                 push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
             } @profiles;
@@ -173,38 +179,45 @@ sub repositories_from_profiles {
 
     my ($nodes);
     my $repositories = $self->schema->resultset('Repositories')->search();
-    while (my $repos = $repositories->next) {
+    while ( my $repos = $repositories->next ) {
         next if $repos->name =~ /dotfiles/;
 
-        if (!exists $nodes->{$repos->name}) {
-            my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
+        if ( !exists $nodes->{ $repos->name } ) {
+            my $language
+                = $self->schema->resultset('RepoLang')
+                ->search( { repository => $repos->id },
+                { order_by => 'size' } )->first;
             my $lang = $language ? $language->language->name : 'none';
-            $nodes->{$repos->name} = {
-                id => $repos->name,
-                label => $repos->name,
+            $nodes->{ $repos->name } = {
+                id        => $repos->name,
+                label     => $repos->name,
                 attvalues => {
                     attvalue => [
-                        { for => 0,  value => $repos->name},
-                        { for => 1,  value => "repository"},
-                        { for => 4,  value => $repos->forks},
-                        { for => 9,  value => $repos->description},
-                        { for => 10, value => $repos->watchers},
-                        { for => 8,  value => $lang},
+                        { for => 0,  value => $repos->name },
+                        { for => 1,  value => "repository" },
+                        { for => 4,  value => $repos->forks },
+                        { for => 9,  value => $repos->description },
+                        { for => 10, value => $repos->watchers },
+                        { for => 8,  value => $lang },
                     ],
                 },
             };
         }
-        my $forks = $self->schema->resultset('Fork')->search({repos => $repos->id});
-        while (my $fork = $forks->next) {
+        my $forks = $self->schema->resultset('Fork')
+            ->search( { repos => $repos->id } );
+        while ( my $fork = $forks->next ) {
             my $e = {
-                source   => $fork->profile->id,
-                target   => $fork->repos->name,
-                id       => $self->inc_edges,
+                source => $fork->profile->id,
+                target => $fork->repos->name,
+                id     => $self->inc_edges,
             };
             push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
         }
     }
-    map {push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $nodes->{$_} keys %$nodes;
+    map {
+        push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} },
+            $nodes->{$_}
+    } keys %$nodes;
     say "repositories_from_profiles done";
 }
 
@@ -213,22 +226,23 @@ sub stats_languages_by_country {
 }
 
 sub _get_node_for_profile {
-    my ($self, $profile) = @_;
-    my ($languages, $ordered_languages) = $self->_get_languages_for_profile($profile);
+    my ( $self, $profile ) = @_;
+    my ( $languages, $ordered_languages )
+        = $self->_get_languages_for_profile($profile);
     my $main_lang = shift @$ordered_languages;
-    my $node = {
-        id              => $profile->id,
-        label           => $profile->login,
+    my $node      = {
+        id        => $profile->id,
+        label     => $profile->login,
         attvalues => {
             attvalue => [
-                { for => 0, value => $profile->name},
-                { for => 1, value => "profile"},
-                { for => 2, value => $profile->followers_count},
-                { for => 3, value => $profile->following_count},
-                { for => 5, value => $profile->country},
-                { for => 6, value => $profile->public_gist_count},
-                { for => 7, value => $profile->public_repo_count},
-                { for => 8, value => $main_lang},
+                { for => 0, value => $profile->name },
+                { for => 1, value => "profile" },
+                { for => 2, value => $profile->followers_count },
+                { for => 3, value => $profile->following_count },
+                { for => 5, value => $profile->country },
+                { for => 6, value => $profile->public_gist_count },
+                { for => 7, value => $profile->public_repo_count },
+                { for => 8, value => $main_lang },
             ]
         },
     };
@@ -236,22 +250,22 @@ sub _get_node_for_profile {
 }
 
 sub _get_languages_for_profile {
-    my ($self, $profile) = shift;
+    my ( $self, $profile ) = shift;
 
-    my $forks = $self->schema->resultset('Fork')->search({profile =>
-        $profile->id});
+    my $forks = $self->schema->resultset('Fork')
+        ->search( { profile => $profile->id } );
 
     my %languages;
-    while (my $fork = $forks->next) {
-        my $languages =
-        $self->schema->resultset('RepoLang')->search({repository =>
-                $fork->repos->id});
-        while (my $lang = $languages->next) {
-            $languages{$lang->language->name}+=$lang->size;
+    while ( my $fork = $forks->next ) {
+        my $languages = $self->schema->resultset('RepoLang')
+            ->search( { repository => $fork->repos->id } );
+        while ( my $lang = $languages->next ) {
+            $languages{ $lang->language->name } += $lang->size;
         }
     }
-    my @sorted_lang = sort {$languages{$b} <=> $languages{$a}} keys %languages;
-    return (\%languages, \@sorted_lang);
+    my @sorted_lang
+        = sort { $languages{$b} <=> $languages{$a} } keys %languages;
+    return ( \%languages, \@sorted_lang );
 }
 
 #sub repositories {