summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--.gitignore4
-rw-r--r--lib/githubexplorer/Gexf.pm254
-rw-r--r--lib/githubexplorer/Schema/Result/Profiles.pm1
-rw-r--r--lib/githubexplorer/Schema/Result/Repositories.pm1
4 files changed, 138 insertions, 122 deletions
diff --git a/.gitignore b/.gitignore
index 19f1f1e..294fe6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,2 @@
-*.sqlite
 *.conf
-*.sql
 *.yml
-*.gexf
-*.csv
\ No newline at end of file
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
index 7e82f80..6832379 100644
--- a/lib/githubexplorer/Gexf.pm
+++ b/lib/githubexplorer/Gexf.pm
@@ -25,67 +25,85 @@ has graph => (
                 meta    => { creator => ['linkfluence'] },
                 graph   => {
                     type       => 'static',
-                    attributes => {
-                        class     => 'node',
-                        type      => 'static',
-                        attribute => [
-                            {
-                                id    => 0,
-                                type  => 'string',
-                                title => 'name'
-                            },
-                            {
-                                id    => 1,
-                                type  => 'string',
-                                title => 'type',
-                            },
-                            {
-                                id    => 2,
-                                type  => 'float',
-                                title => 'followers_count'
-                            },
-                            {
-                                id    => 3,
-                                type  => 'float',
-                                title => 'following_count'
-                            },
-                            {
-                                id    => 4,
-                                type  => 'float',
-                                title => 'forks',
-                            },
-                            {
-                                id    => 5,
-                                type  => 'string',
-                                title => 'location',
-                            },
-                            {
-                                id    => 6,
-                                type  => 'float',
-                                title => 'public_gist_count',
-                            },
-                            {
-                                id    => 7,
-                                type  => 'float',
-                                title => 'public_repo_count',
-                            },
-                            {
-                                id    => 8,
-                                type  => 'string',
-                                title => 'language',
-                            },
-                            {
-                                id    => 9,
-                                type  => 'string',
-                                title => 'description',
-                            },
-                            {
-                                id    => 10,
-                                type  => 'float',
-                                title => 'watchers',
-                            }
-                        ]
-                    }
+                    attributes => [
+                        {
+                            class     => 'edge',
+                            type      => 'static',
+                            attribute => [
+                                {
+                                    id    => 0,
+                                    type  => 'string',
+                                    title => 'language'
+                                },
+                                {
+                                    id    => 0,
+                                    type  => 'float',
+                                    title => 'collaborate'
+                                },
+                            ]
+                        },
+                        {
+                            class     => 'node',
+                            type      => 'static',
+                            attribute => [
+                                {
+                                    id    => 0,
+                                    type  => 'string',
+                                    title => 'name'
+                                },
+                                {
+                                    id    => 1,
+                                    type  => 'string',
+                                    title => 'type',
+                                },
+                                {
+                                    id    => 2,
+                                    type  => 'float',
+                                    title => 'followers_count'
+                                },
+                                {
+                                    id    => 3,
+                                    type  => 'float',
+                                    title => 'following_count'
+                                },
+                                {
+                                    id    => 4,
+                                    type  => 'float',
+                                    title => 'forks',
+                                },
+                                {
+                                    id    => 5,
+                                    type  => 'string',
+                                    title => 'location',
+                                },
+                                {
+                                    id    => 6,
+                                    type  => 'float',
+                                    title => 'public_gist_count',
+                                },
+                                {
+                                    id    => 7,
+                                    type  => 'float',
+                                    title => 'public_repo_count',
+                                },
+                                {
+                                    id    => 8,
+                                    type  => 'string',
+                                    title => 'language',
+                                },
+                                {
+                                    id    => 9,
+                                    type  => 'string',
+                                    title => 'description',
+                                },
+                                {
+                                    id    => 10,
+                                    type  => 'float',
+                                    title => 'watchers',
+                                }
+                            ]
+                        },
+                    ]
                 }
             }
         };
@@ -95,26 +113,26 @@ has graph => (
 sub gen_gexf {
     my $self = shift;
 
-    $self->_average_by_langage();
-#    $self->basic_profiles;
-#    my $basic_profiles = $self->dump_gexf;
-#    $basic_profiles > io('basic_profiles.gexf');
+    #$self->_average_by_langage();
 
+    $self->basic_profiles;
+    my $basic_profiles = $self->dump_gexf;
+    $basic_profiles > io('basic_profiles.gexf');
 
-    $self->profiles_from_repositories;
-    my $profiles_from_repositories = $self->dump_gexf;
-    $profiles_from_repositories > io('profiles_from_repositories.gexf');
+    #$self->profiles_from_repositories;
+    #my $profiles_from_repositories = $self->dump_gexf;
+    #$profiles_from_repositories > io('profiles_from_repositories.gexf');
 
-#    $self->repositories_from_profiles;
-#    my $repositories_from_profiles = $self->dump_gexf;
-#    $repositories_from_profiles > io('repositories_from_profiles.gexf');
+    #$self->repositories_from_profiles;
+    #my $repositories_from_profiles = $self->dump_gexf;
+    #$repositories_from_profiles > io('repositories_from_profiles.gexf');
 }
 
 sub dump_gexf {
     my $self = shift;
     my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 );
-    say "total edges => ".scalar @{$self->graph->{gexf}->{graph}->{nodes}->{node}};
-    say "total nodes => ".scalar @{$self->graph->{gexf}->{graph}->{edges}->{edge}};
+    say "total nodes => ".scalar @{$self->graph->{gexf}->{graph}->{nodes}->{node}};
+    say "total edges => ".scalar @{$self->graph->{gexf}->{graph}->{edges}->{edge}};
     $self->graph->{gexf}->{graph}->{nodes} = undef;
     $self->graph->{gexf}->{graph}->{edges} = undef;
     return $xml_out;
@@ -124,7 +142,8 @@ sub basic_profiles {
     my $self = shift;
     $self->id_edges(0);
     say "start basic_profiles ...";
-    my $profiles = $self->schema->resultset('Profiles')->search();
+    my $profiles =
+    $self->schema->resultset('Profiles')->search();
 
     while ( my $profile = $profiles->next ) {
         my $node = $self->_get_node_for_profile($profile);
@@ -134,10 +153,23 @@ sub basic_profiles {
     my $edges = $self->schema->resultset('Follow')->search();
     my $id    = 0;
     while ( my $edge = $edges->next ) {
+        my $collaborate = 1;
+#        my $forks_source = $self->schema->resultset('Fork')->search({profile =>
+#                $edge->origin->id});
+#        while (my $fork = $forks_source->next) {
+#            my $contrib = $self->schema->resultset('Fork')->search({repos =>
+#                    $fork->repos->id});
+#            while (my $c = $contrib->next) {
+#                $collaborate++ if ($c->profile->id == $edge->dest->id);
+#            }
+#        }
         my $e = {
             source => $edge->origin->id,
             target => $edge->dest->id,
             id     => $self->inc_edges,
+            weight => $collaborate,
+            collaborate => $collaborate,
+            language => $edge->origin->main_language,
         };
         push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
     }
@@ -160,13 +192,9 @@ sub profiles_from_repositories {
     while ( my $repos = $repositories->next ) {
         my $forks = $self->schema->resultset('Fork')
             ->search( { repos => $repos->id } );
-        my $language;
-        my $lang_rs = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
-        if ($lang_rs) {
-            $language = $lang_rs->language->name;
-        }
-        if ($language && exists $self->avg_contrib_by_lang->{$language}->{avg} && $forks <= $self->avg_contrib_by_lang->{$language}->{avg}){
-            warn ">>>>> on skip pour ".$repos->name."\n";
+        if ($repos->main_language && exists
+            $self->avg_contrib_by_lang->{$repos->main_language}->{avg} &&
+            $forks < $self->avg_contrib_by_lang->{$repos->main_language}->{avg}){
             next;
         }
         my @profiles;
@@ -188,6 +216,7 @@ sub profiles_from_repositories {
     }
     foreach my $e (keys %$edges) {
         foreach my $t (keys %{$edges->{$e}}) {
+            next unless $edges->{$e}->{$t}->{weight} > 5;
             my $edge = {
                 id     => $self->inc_edges,
                 source => $e,
@@ -211,11 +240,6 @@ sub repositories_from_profiles {
         next if $repos->name =~ /dotfiles/;
 
         if ( !exists $nodes->{ $repos->name } ) {
-            my $language
-                = $self->schema->resultset('RepoLang')
-                ->search( { repository => $repos->id },
-                { order_by => 'size' } )->first;
-            my $lang = $language ? $language->language->name : 'none';
             $nodes->{ $repos->name } = {
                 id        => $repos->name,
                 label     => $repos->name,
@@ -226,7 +250,7 @@ sub repositories_from_profiles {
                         { for => 4,  value => $repos->forks },
                         { for => 9,  value => $repos->description },
                         { for => 10, value => $repos->watchers },
-                        { for => 8,  value => $lang },
+                        { for => 8,  value => $repos->main_language },
                     ],
                 },
             };
@@ -261,7 +285,7 @@ sub repositories_from_profiles {
     }
     foreach my $e (keys %$edges) {
         foreach my $t (keys %{$edges->{$e}}) {
-            next if $edges->{$e}->{$t}->{weight} < 10;
+            next if $edges->{$e}->{$t}->{weight} < 5;
             my $edge = {
                 id     => $self->inc_edges,
                 source => $e,
@@ -271,15 +295,11 @@ sub repositories_from_profiles {
             push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $edge;
         }
     }
-    say "edges => ".scalar @{ $self->graph->{gexf}->{graph}->{edges}->{edge} };
     say "repositories_from_profiles done";
 }
 
 sub _get_node_for_profile {
     my ( $self, $profile ) = @_;
-    my ( $languages, $ordered_languages )
-        = $self->_get_languages_for_profile($profile);
-    my $main_lang = shift @$ordered_languages;
     my $node      = {
         id        => $profile->id,
         label     => $profile->login,
@@ -292,45 +312,43 @@ sub _get_node_for_profile {
                 { for => 5, value => $profile->country },
                 { for => 6, value => $profile->public_gist_count },
                 { for => 7, value => $profile->public_repo_count },
-                { for => 8, value => $main_lang },
+                { for => 8, value => $profile->main_language },
             ]
         },
     };
     return $node;
 }
 
-sub _get_languages_for_profile {
-    my ( $self, $profile ) = @_;
-
-    my $forks = $self->schema->resultset('Fork')
-        ->search( { profile => $profile->id } );
-
-    my %languages;
-    while ( my $fork = $forks->next ) {
-        my $languages = $self->schema->resultset('RepoLang')
-            ->search( { repository => $fork->repos->id } );
-        while ( my $lang = $languages->next ) {
-            $languages{ $lang->language->name } += $lang->size;
-        }
-    }
-    my @sorted_lang
-        = sort { $languages{$b} <=> $languages{$a} } keys %languages;
-    return ( \%languages, \@sorted_lang );
-}
+#sub _get_languages_for_profile {
+#    my ( $self, $profile ) = @_;
+#
+#    my $forks = $self->schema->resultset('Fork')
+#        ->search( { profile => $profile->id } );
+#
+#    my %languages;
+#    while ( my $fork = $forks->next ) {
+#        my $languages = $self->schema->resultset('RepoLang')
+#            ->search( { repository => $fork->repos->id } );
+#        while ( my $lang = $languages->next ) {
+#            $languages{ $lang->language->name } += $lang->size;
+#        }
+#    }
+#    my @sorted_lang
+#        = sort { $languages{$b} <=> $languages{$a} } keys %languages;
+#    return ( \%languages, \@sorted_lang );
+#}
 
 sub _average_by_langage {
     my $self = shift;
     my $hash_lang;
     my $repositories = $self->schema->resultset('Repositories')->search();
+    say "gather stats ...";
     while ( my $repos = $repositories->next ) {
-        my $lang = $self->schema->resultset('RepoLang')->search(
-            { repository => $repos->id }, { order_by => 'size' }
-        )->first;
-        next unless $lang;
-        $hash_lang->{ $lang->language->name }->{repositories}++;
+        next unless $repos->main_language;
+        $hash_lang->{ $repos->main_language }->{repositories}++;
         my $forks = $self->schema->resultset('Fork')->search( { repos => $repos->id } )->count;
-        $hash_lang->{ $lang->language->name }->{contributors} += $forks;
-        $hash_lang->{$lang->language->name}->{avg} = int ($hash_lang->{$lang->language->name}->{contributors} / $hash_lang->{$lang->language->name}->{repositories});
+        $hash_lang->{ $repos->main_language }->{contributors} += $forks;
+        $hash_lang->{$repos->main_language}->{avg} = int ($hash_lang->{$repos->main_language}->{contributors} / $hash_lang->{$repos->main_language}->{repositories});
     };
     $self->avg_contrib_by_lang($hash_lang);
 }
diff --git a/lib/githubexplorer/Schema/Result/Profiles.pm b/lib/githubexplorer/Schema/Result/Profiles.pm
index b43211e..df8f147 100644
--- a/lib/githubexplorer/Schema/Result/Profiles.pm
+++ b/lib/githubexplorer/Schema/Result/Profiles.pm
@@ -11,6 +11,7 @@ __PACKAGE__->add_columns(
     company           => { data_type => 'varchar', is_nullable => 1 },
     created_at        => { data_type => 'timestamp' },
     email             => { data_type => 'varchar', is_nullable => 1 },
+    main_language             => { data_type => 'varchar', is_nullable => 1 },
     followers_count   => { data_type => 'int' },
     following_count   => { data_type => 'int' },
     gravatar_id       => { data_type => 'varchar', is_nullable => 1 },
diff --git a/lib/githubexplorer/Schema/Result/Repositories.pm b/lib/githubexplorer/Schema/Result/Repositories.pm
index 58c0e51..ab82fef 100644
--- a/lib/githubexplorer/Schema/Result/Repositories.pm
+++ b/lib/githubexplorer/Schema/Result/Repositories.pm
@@ -10,6 +10,7 @@ __PACKAGE__->add_columns(
     name        => { data_type => 'varchar' },
     homepage    => { data_type => 'varchar', is_nullable       => 1 },
     url         => { data_type => 'varchar', is_nullable       => 1 },
+    main_language         => { data_type => 'varchar', is_nullable       => 1 },
     watchers    => { data_type => 'int' },
     forks       => { data_type => 'int' },
     fork        => { data_type => 'bool' },