summary refs log tree commit diff
path: root/lib
diff options
context:
space:
mode:
authorfranck cuny <franck@lumberjaph.net>2010-02-15 13:02:59 +0100
committerfranck cuny <franck@lumberjaph.net>2010-02-15 13:02:59 +0100
commitd55ac1c0b26161f5c6d2604a8f192ff2bf2e7d3a (patch)
treed9f83707aeb4a90b494f74b56da5af2fe2c281a9 /lib
parentfix (diff)
downloadgithub-explorer-d55ac1c0b26161f5c6d2604a8f192ff2bf2e7d3a.tar.gz
update gexf
Diffstat (limited to '')
-rw-r--r--lib/githubexplorer.pm41
-rw-r--r--lib/githubexplorer/Gexf.pm155
2 files changed, 107 insertions, 89 deletions
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm
index 7b9c252..aef66ec 100644
--- a/lib/githubexplorer.pm
+++ b/lib/githubexplorer.pm
@@ -105,14 +105,47 @@ sub gen_seed {
         my $main_lang = shift @sorted_lang;
         my $other_lang = join( '|', @sorted_lang );
         my $str
-            = $profiles->blog
+            = $pr->blog
             . ";;;github;"
-            . $main_lang . ";"
-            . $other_lang . ";"
-            . $profile->country . "\n";
+            . ($main_lang || '') . ";"
+            . ($other_lang || '') . ";"
+            . ($pr->country || ''). "\n";
         print $fh $str;
     }
     close $fh;
 }
 
+sub stats_by_country {
+    my $self = shift;
+    $self->_connect unless $self->has_schema;
+    my $repositories = $self->schema->resultset('Repositories')->search();
+
+    my $countries;
+    while (my $repos = $repositories->next) {
+        next if !$repos->id_profile->country;
+        my $languages = $self->schema->resultset('RepoLang')
+            ->search( { repository => $repos->id } );
+        while ( my $lang = $languages->next ) {
+            $countries->{ $repos->id_profile->country }->{$lang->language->name} += $lang->size;
+        }
+    }
+    foreach my $country (keys %$countries) {
+        my $total = $self->schema->resultset('Profiles')->search({country => $country})->count;
+        $countries->{$country}->{total_dev} = $total;
+        my $total_bytes;
+        map {$total_bytes += $countries->{$country}->{$_}} keys %{$countries->{$country}};
+        foreach my $lang (keys %{$countries->{$country}}) {
+            $countries->{$country}->{"pct_".$lang} = ($countries->{$country}->{$lang} / $total_bytes) * 100;
+        }
+    }
+    my @sorted_countries = sort {$countries->{$b}->{total_dev} <=> $countries->{$a}->{total_dev}} keys %$countries;
+
+    my $final;
+    for ( 0 .. 19) {
+        push @$final, {$sorted_countries[$_] => $countries->{$sorted_countries[$_]} };
+    }
+    warn Dump $final;
+    DumpFile('countries.yaml', $final);
+}
+
 1;
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
index de0da49..98f3d38 100644
--- a/lib/githubexplorer/Gexf.pm
+++ b/lib/githubexplorer/Gexf.pm
@@ -2,6 +2,7 @@ package githubexplorer::Gexf;
 
 use Moose;
 use XML::Simple;
+use IO::All;
 use 5.010;
 
 has schema => ( is => 'ro', isa => 'Object', required => 1 );
@@ -29,7 +30,7 @@ has graph => (
                         attribute => [
                             {
                                 id    => 0,
-                                type  => 'float',
+                                type  => 'string',
                                 title => 'name'
                             },
                             {
@@ -93,20 +94,20 @@ has graph => (
 sub gen_gexf {
     my $self = shift;
 
-    $self->basic_profiles;
-    my $basic_profiles = $self->dump_gexf;
-    $basic_profiles > io('basic_profiles.gexf');
+#    $self->basic_profiles;
+#    my $basic_profiles = $self->dump_gexf;
+#    $basic_profiles > io('basic_profiles.gexf');
 
     $self->profiles_from_repositories;
     my $profiles_from_repositories = $self->dump_gexf;
     $profiles_from_repositories > io('profiles_from_repositories.gexf');
 
-    $self->repositories_from_profiles;
-    my $repositories_from_profiles = $self->dump_gexf;
-    $profiles_from_repositories > io('repositories_from_profiles.gexf');
+#    $self->repositories_from_profiles;
+#    my $repositories_from_profiles = $self->dump_gexf;
+#    $repositories_from_profiles > io('repositories_from_profiles.gexf');
 }
 
-sub dump_gefx {
+sub dump_gexf {
     my $self = shift;
     my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 );
     $self->graph->{gexf}->{graph}->{nodes} = undef;
@@ -149,6 +150,7 @@ sub profiles_from_repositories {
         my $node = $self->_get_node_for_profile($profile);
         push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
     }
+    my $edges;
     my $repositories = $self->schema->resultset('Repositories')->search();
     while ( my $repos = $repositories->next ) {
         my $forks = $self->schema->resultset('Fork')
@@ -158,17 +160,31 @@ sub profiles_from_repositories {
             push @profiles, $fork->profile->id;
         }
         foreach my $p (@profiles) {
-            map {
-                next if $_ eq $p;
-                my $e = {
-                    source => $p,
-                    target => $_,
-                    id     => $self->inc_edges,
-                };
-                push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
-            } @profiles;
+            foreach my $t (@profiles) {
+                next if $t eq $p;
+                if (exists $edges->{$p}->{$t}) {
+                    $edges->{$p}->{$t}->{weight}++;
+                }elsif(exists $edges->{$t}->{$p}) {
+                    $edges->{$t}->{$p}->{weight}++;
+                }else{
+                    $edges->{$p}->{$t}->{weight}++;
+                }
+            }
+        }
+    }
+    foreach my $e (keys %$edges) {
+        foreach my $t (keys %{$edges->{$e}}) {
+            next if $edges->{$e}->{$t}->{weight} < 4;
+            my $edge = {
+                id     => $self->inc_edges,
+                source => $e,
+                target => $t,
+                weight => $edges->{$e}->{$t}->{weight},
+            };
+            push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $edge;
         }
     }
+    say "edges => ".scalar @{ $self->graph->{gexf}->{graph}->{edges}->{edge} };
     say "profiles_from_repositories done";
 }
 
@@ -203,26 +219,48 @@ sub repositories_from_profiles {
                 },
             };
         }
-        my $forks = $self->schema->resultset('Fork')
-            ->search( { repos => $repos->id } );
-        while ( my $fork = $forks->next ) {
-            my $e = {
-                source => $fork->profile->id,
-                target => $fork->repos->name,
-                id     => $self->inc_edges,
-            };
-            push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
-        }
     }
     map {
         push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} },
             $nodes->{$_}
     } keys %$nodes;
-    say "repositories_from_profiles done";
-}
 
-sub stats_languages_by_country {
-    my $self = shift;
+    my $edges;
+    my $profiles = $self->schema->resultset('Profiles');
+    while ( my $profile = $profiles->next ) {
+        my $forks = $self->schema->resultset('Fork')->search({profile =>
+                $profile->id});
+        my @repos;
+        while (my $fork = $forks->next) {
+            push @repos, $fork->repos->name;
+        }
+        foreach my $r (@repos) {
+            foreach my $t (@repos) {
+                next if $t eq $r;
+                if (exists $edges->{$r}->{$t}) {
+                    $edges->{$r}->{$t}->{weight}++;
+                }elsif(exists $edges->{$t}->{$r}){
+                    $edges->{$t}->{$r}->{weight}++;
+                }else{
+                    $edges->{$r}->{$t}->{weight}++;
+                }
+            }
+        }
+    }
+    foreach my $e (keys %$edges) {
+        foreach my $t (keys %{$edges->{$e}}) {
+            next if $edges->{$e}->{$t}->{weight} < 10;
+            my $edge = {
+                id     => $self->inc_edges,
+                source => $e,
+                target => $t,
+                weight => $edges->{$e}->{$t}->{weight},
+            };
+            push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $edge;
+        }
+    }
+    say "edges => ".scalar @{ $self->graph->{gexf}->{graph}->{edges}->{edge} };
+    say "repositories_from_profiles done";
 }
 
 sub _get_node_for_profile {
@@ -250,7 +288,7 @@ sub _get_node_for_profile {
 }
 
 sub _get_languages_for_profile {
-    my ( $self, $profile ) = shift;
+    my ( $self, $profile ) = @_;
 
     my $forks = $self->schema->resultset('Fork')
         ->search( { profile => $profile->id } );
@@ -268,57 +306,4 @@ sub _get_languages_for_profile {
     return ( \%languages, \@sorted_lang );
 }
 
-#sub repositories {
-#    my $self = shift;
-#
-#    say "start repositories ...";
-#    my $repositories = $self->schema->resultset('Repositories')->search({fork => 0});
-#    while (my $repos = $repositories->next) {
-#
-#        next if $repos->name =~ /dotfiles/i;
-#        # available in forks ?
-#        my $check_fork = $self->schema->resultset('Fork')->search({repos => $repos->id});
-#        next if $check_fork->count < 1;
-#
-#        if (!grep {$_->{id} eq "repos_".$repos->name} @{$self->graph->{gexf}->{graph}->{nodes}->{node}}) {
-#            my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
-#            my $lang = $language ? $language->language->name : 'none';
-#            my $node = {
-#                id => "repos_".$repos->name,
-#                label => $repos->name,
-#                attvalues => {
-#                    attvalue => [
-#                        { for => 0,  value => $repos->name},
-#                        { for => 1,  value => "repository"},
-#                        { for => 4,  value => $repos->forks},
-#                        { for => 9,  value => $repos->description},
-#                        { for => 10, value => $repos->watchers},
-#                        { for => 8,  value => $lang},
-#                    ],
-#                },
-#            };
-#            push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
-#        }
-#        my $e = {
-#            source   => $repos->id_profile->id,
-#            target   => "repos_".$repos->name,
-#            id       => $self->inc_edges,
-#        };
-#        push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
-#    }
-#
-#    my $forks = $self->schema->resultset('Fork')->search();
-#
-#    while (my $fork = $forks->next) {
-#        next if $fork->repos->name =~ /dotfiles/i;
-#        my $e = {
-#            source   => $fork->profile->id,
-#            target   => "repos_".$fork->repos->name,
-#            id       => $self->inc_edges,
-#        };
-#        push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
-#    }
-#    say " done";
-#}
-
 1;