summary refs log tree commit diff
diff options
context:
space:
mode:
authorfranck cuny <franck@lumberjaph.net>2010-02-15 13:14:40 +0100
committerfranck cuny <franck@lumberjaph.net>2010-02-15 13:14:40 +0100
commit1337fef864e9b1493355bd0928c23dc71b4ceea4 (patch)
tree3ec19bb66ef870b3400e949076ffbabe3ad362fe
parentget_average (diff)
parentupdate gexf (diff)
downloadgithub-explorer-1337fef864e9b1493355bd0928c23dc71b4ceea4.tar.gz
merge
-rw-r--r--clean-country.pl5
-rw-r--r--crawl.pl2
-rw-r--r--lib/githubexplorer.pm41
3 files changed, 43 insertions, 5 deletions
diff --git a/clean-country.pl b/clean-country.pl
index 8d26d12..460b29f 100644
--- a/clean-country.pl
+++ b/clean-country.pl
@@ -36,5 +36,8 @@ while ( my $pr = $profiles->next ) {
         next if $@;
         say "** fix with " . $pr->city . " in " . $pr->country;
     }
-    sleep(1);
+    if (++$i == 10) {
+        sleep(2);
+        $i = 0;
+    }
 }
\ No newline at end of file
diff --git a/crawl.pl b/crawl.pl
index 300cfd5..c1822fb 100644
--- a/crawl.pl
+++ b/crawl.pl
@@ -13,6 +13,7 @@ GetOptions(
     'graph'    => \my $graph,
     'network'  => \my $network,
     'seed'     => \my $seed,
+    'stats'     => \my $stats,
     'conf=s'   => \my $conf,
 );
 
@@ -32,3 +33,4 @@ $gh->harvest_repo     if $repo;
 $gh->graph_repo       if $network;
 $gh->gen_graph        if $graph;
 $gh->gen_seed         if $seed;
+$gh->stats_by_country         if $stats;
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm
index 7b9c252..aef66ec 100644
--- a/lib/githubexplorer.pm
+++ b/lib/githubexplorer.pm
@@ -105,14 +105,47 @@ sub gen_seed {
         my $main_lang = shift @sorted_lang;
         my $other_lang = join( '|', @sorted_lang );
         my $str
-            = $profiles->blog
+            = $pr->blog
             . ";;;github;"
-            . $main_lang . ";"
-            . $other_lang . ";"
-            . $profile->country . "\n";
+            . ($main_lang || '') . ";"
+            . ($other_lang || '') . ";"
+            . ($pr->country || ''). "\n";
         print $fh $str;
     }
     close $fh;
 }
 
+sub stats_by_country {
+    my $self = shift;
+    $self->_connect unless $self->has_schema;
+    my $repositories = $self->schema->resultset('Repositories')->search();
+
+    my $countries;
+    while (my $repos = $repositories->next) {
+        next if !$repos->id_profile->country;
+        my $languages = $self->schema->resultset('RepoLang')
+            ->search( { repository => $repos->id } );
+        while ( my $lang = $languages->next ) {
+            $countries->{ $repos->id_profile->country }->{$lang->language->name} += $lang->size;
+        }
+    }
+    foreach my $country (keys %$countries) {
+        my $total = $self->schema->resultset('Profiles')->search({country => $country})->count;
+        $countries->{$country}->{total_dev} = $total;
+        my $total_bytes;
+        map {$total_bytes += $countries->{$country}->{$_}} keys %{$countries->{$country}};
+        foreach my $lang (keys %{$countries->{$country}}) {
+            $countries->{$country}->{"pct_".$lang} = ($countries->{$country}->{$lang} / $total_bytes) * 100;
+        }
+    }
+    my @sorted_countries = sort {$countries->{$b}->{total_dev} <=> $countries->{$a}->{total_dev}} keys %$countries;
+
+    my $final;
+    for ( 0 .. 19) {
+        push @$final, {$sorted_countries[$_] => $countries->{$sorted_countries[$_]} };
+    }
+    warn Dump $final;
+    DumpFile('countries.yaml', $final);
+}
+
 1;