summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--.gitignore4
-rw-r--r--extract-seed.pl6
-rw-r--r--lib/githubexplorer.pm2
-rw-r--r--lib/githubexplorer/Gexf.pm137
-rw-r--r--lib/githubexplorer/Network.pm9
-rw-r--r--lib/githubexplorer/Repository.pm9
6 files changed, 146 insertions, 21 deletions
diff --git a/.gitignore b/.gitignore
index 4358bbd..19f1f1e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
 *.sqlite
 *.conf
 *.sql
-*.yml
\ No newline at end of file
+*.yml
+*.gexf
+*.csv
\ No newline at end of file
diff --git a/extract-seed.pl b/extract-seed.pl
new file mode 100644
index 0000000..293c270
--- /dev/null
+++ b/extract-seed.pl
@@ -0,0 +1,6 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use githubexplorer::Schema;
+
+my $schema = githubexplorer::Schema->connect();
\ No newline at end of file
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm
index 9e5e134..4260842 100644
--- a/lib/githubexplorer.pm
+++ b/lib/githubexplorer.pm
@@ -69,7 +69,7 @@ sub gen_graph {
     my $self = shift;
     $self->_connect unless $self->has_schema;
     my $graph = githubexplorer::Gexf->new( schema => $self->schema );
-    my $xml = $graph->profiles;
+    my $xml = $graph->gen_gexf;
     $xml > io('crawl.gexf');
 }
 
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
index 503eebe..f7e38cb 100644
--- a/lib/githubexplorer/Gexf.pm
+++ b/lib/githubexplorer/Gexf.pm
@@ -2,8 +2,11 @@ package githubexplorer::Gexf;
 
 use Moose;
 use XML::Simple;
+use 5.010;
 
 has schema => (is => 'ro', isa => 'Object', required => 1);
+has id_edges => (is => 'rw', isa => 'Num', traits  => ['Counter'], default =>
+0, handles => {inc_edges => 'inc'});
 
 has graph => (
     is      => 'rw',
@@ -11,8 +14,8 @@ has graph => (
     default => sub {
         my $graph = {
             gexf => {
-                version => "1.0",
-                meta    => { creator => ['rtgi'] },
+                version => "1.1",
+                meta    => { creator => ['linkfluence'] },
                 graph   => {
                     type       => 'static',
                     attributes => {
@@ -21,19 +24,59 @@ has graph => (
                         attribute => [
                             {
                                 id    => 0,
-                                type  => 'string',
+                                type  => 'float',
                                 title => 'name'
                             },
                             {
-                                id    => 1,
-                                type  => 'string',
-                                title => 'followers_count'
+                                id => 1,
+                                type => 'string',
+                                title => 'type',
                             },
                             {
                                 id    => 2,
-                                type  => 'string',
+                                type  => 'float',
+                                title => 'followers_count'
+                            },
+                            {
+                                id    => 3,
+                                type  => 'float',
                                 title => 'following_count'
                             },
+                            {
+                                id => 4,
+                                type => 'float',
+                                title => 'forks',
+                            },
+                            {
+                                id => 5,
+                                type => 'string',
+                                title => 'location',
+                            },
+                            {
+                                id => 6,
+                                type => 'float',
+                                title => 'public_gist_count',
+                            },
+                            {
+                                id => 7,
+                                type => 'float',
+                                title => 'public_repo_count',
+                            },
+                            {
+                                id => 8,
+                                type => 'string',
+                                title => 'language',
+                            },
+                            {
+                                id => 9,
+                                type => 'string',
+                                title => 'description',
+                            },
+                            {
+                                id => 10,
+                                type => 'float',
+                                title => 'watchers',
+                            }
                         ]
                     }
                 }
@@ -42,8 +85,19 @@ has graph => (
     }
 );
 
+sub gen_gexf {
+    my $self = shift;
+    $self->profiles;
+    #$self->repositories;
+    say "total nodes : ".scalar (@{ $self->graph->{gexf}->{graph}->{nodes}->{node} });
+    say "total edges : ".scalar (@{ $self->graph->{gexf}->{graph}->{edges}->{edge} });
+    my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 );
+    return $xml_out;
+}
+
 sub profiles {
     my $self     = shift;
+    say "start profiles ...";
     my $profiles = $self->schema->resultset('Profiles')->search();
 
     while ( my $profile = $profiles->next ) {
@@ -52,9 +106,13 @@ sub profiles {
             label           => $profile->login,
             attvalues => {
                 attvalue => [
-                    {name            => $profile->name},
-                    {followers_count => $profile->followers_count},
-                    {following_count => $profile->following_count},
+                    { for => 0, value => $profile->name},
+                    { for => 1, value => "profile"},
+                    { for => 2, value => $profile->followers_count},
+                    { for => 3, value => $profile->following_count},
+                    { for => 5, value => $profile->location},
+                    { for => 6, value => $profile->public_gist_count},
+                    { for => 7, value => $profile->public_repo_count},
                 ]
             },
         };
@@ -65,17 +123,66 @@ sub profiles {
     my $id    = 0;
     while ( my $edge = $edges->next ) {
         my $e = {
-            cardinal => 1,
             source   => $edge->origin->id,
             target   => $edge->dest->id,
-            type     => 'dir',
-            id       => $id++,
+            id       => $self->inc_edges,
         };
         push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
     }
+    say " done";
+}
 
-    my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 );
-    return $xml_out;
+sub repositories {
+    my $self = shift;
+
+    say "start repositories ...";
+    my $repositories = $self->schema->resultset('Repositories')->search({fork => 0});
+    while (my $repos = $repositories->next) {
+
+        next if $repos->name =~ /dotfiles/i;
+        # available in forks ?
+        my $check_fork = $self->schema->resultset('Fork')->search({repos => $repos->id});
+        next if $check_fork->count < 1;
+
+        if (!grep {$_->{id} eq "repos_".$repos->name} @{$self->graph->{gexf}->{graph}->{nodes}->{node}}) {
+            my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
+            my $lang = $language ? $language->language->name : 'none';
+            my $node = {
+                id => "repos_".$repos->name,
+                label => $repos->name,
+                attvalues => {
+                    attvalue => [
+                        { for => 0,  value => $repos->name},
+                        { for => 1,  value => "repository"},
+                        { for => 4,  value => $repos->forks},
+                        { for => 9,  value => $repos->description},
+                        { for => 10, value => $repos->watchers},
+                        { for => 8,  value => $lang},
+                    ],
+                },
+            };
+            push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
+        }
+        my $e = {
+            source   => $repos->id_profile->id,
+            target   => "repos_".$repos->name,
+            id       => $self->inc_edges,
+        };
+        push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+    }
+
+    my $forks = $self->schema->resultset('Fork')->search();
+
+    while (my $fork = $forks->next) {
+        next if $fork->repos->name =~ /dotfiles/i;
+        my $e = {
+            source   => $fork->profile->id,
+            target   => "repos_".$fork->repos->name,
+            id       => $self->inc_edges,
+        };
+        push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+    }
+    say " done";
 }
 
 1;
diff --git a/lib/githubexplorer/Network.pm b/lib/githubexplorer/Network.pm
index eb6253f..dde08a4 100644
--- a/lib/githubexplorer/Network.pm
+++ b/lib/githubexplorer/Network.pm
@@ -7,6 +7,11 @@ use YAML::Syck;
 sub fetch_network {
     my ( $self, $repos ) = @_;
 
+    # check fork
+    my $check = $self->schema->resultset('Fork')->search({repos=>
+            $repos->id});
+    return if $check->count > 0;
+
     say ">> start on ".$repos->name;
     my $api_repos = Net::GitHub::V2::Repositories->new(
         owner => $repos->id_profile->login,
@@ -16,6 +21,10 @@ sub fetch_network {
     );
 
     my $edges = $api_repos->network();
+    if (ref $edges ne 'ARRAY') {
+        sleep 60;
+        return;
+    }
     sleep(1);
     foreach my $edge (@$edges) {
         next if $edge->{owner} eq $repos->id_profile->login;
diff --git a/lib/githubexplorer/Repository.pm b/lib/githubexplorer/Repository.pm
index 035450a..617e091 100644
--- a/lib/githubexplorer/Repository.pm
+++ b/lib/githubexplorer/Repository.pm
@@ -2,6 +2,7 @@ package githubexplorer::Repository;
 use 5.010;
 use Moose::Role;
 use Net::GitHub::V2::Repositories;
+use Try::Tiny;
 
 sub fetch_repositories {
     my ( $self, $profile ) = @_;
@@ -14,9 +15,9 @@ sub fetch_repositories {
 
     my $repo_list = $github_profile->list();
 
-    while ( ref $repo_list ne 'ARRAYREF' ) {
+    if ( ref $repo_list ne 'ARRAY' ) {
         sleep(60);
-        $repo_list = $github_profile->list();
+        return;
     }
 
     foreach my $repos (@$repo_list) {
@@ -40,9 +41,9 @@ sub fetch_repositories {
             token => $self->api_token,
         );
         my $langs = $api_repos->languages;
-        while ( ref $langs ne 'HASHREF' ) {
+        if ( ref $langs ne 'HASH' ) {
             sleep(60);
-            $langs = $api_repos->languages;
+            next;
         }
 
         foreach my $lang ( keys %$langs ) {