diff options
-rw-r--r-- | clean-country.pl | 5 | ||||
-rw-r--r-- | crawl.pl | 2 | ||||
-rw-r--r-- | lib/githubexplorer.pm | 41 |
3 files changed, 43 insertions, 5 deletions
diff --git a/clean-country.pl b/clean-country.pl index 8d26d12..460b29f 100644 --- a/clean-country.pl +++ b/clean-country.pl @@ -36,5 +36,8 @@ while ( my $pr = $profiles->next ) { next if $@; say "** fix with " . $pr->city . " in " . $pr->country; } - sleep(1); + if (++$i == 10) { + sleep(2); + $i = 0; + } } \ No newline at end of file diff --git a/crawl.pl b/crawl.pl index 300cfd5..c1822fb 100644 --- a/crawl.pl +++ b/crawl.pl @@ -13,6 +13,7 @@ GetOptions( 'graph' => \my $graph, 'network' => \my $network, 'seed' => \my $seed, + 'stats' => \my $stats, 'conf=s' => \my $conf, ); @@ -32,3 +33,4 @@ $gh->harvest_repo if $repo; $gh->graph_repo if $network; $gh->gen_graph if $graph; $gh->gen_seed if $seed; +$gh->stats_by_country if $stats; diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm index 7b9c252..aef66ec 100644 --- a/lib/githubexplorer.pm +++ b/lib/githubexplorer.pm @@ -105,14 +105,47 @@ sub gen_seed { my $main_lang = shift @sorted_lang; my $other_lang = join( '|', @sorted_lang ); my $str - = $profiles->blog + = $pr->blog . ";;;github;" - . $main_lang . ";" - . $other_lang . ";" - . $profile->country . "\n"; + . ($main_lang || '') . ";" + . ($other_lang || '') . ";" + . ($pr->country || ''). "\n"; print $fh $str; } close $fh; } +sub stats_by_country { + my $self = shift; + $self->_connect unless $self->has_schema; + my $repositories = $self->schema->resultset('Repositories')->search(); + + my $countries; + while (my $repos = $repositories->next) { + next if !$repos->id_profile->country; + my $languages = $self->schema->resultset('RepoLang') + ->search( { repository => $repos->id } ); + while ( my $lang = $languages->next ) { + $countries->{ $repos->id_profile->country }->{$lang->language->name} += $lang->size; + } + } + foreach my $country (keys %$countries) { + my $total = $self->schema->resultset('Profiles')->search({country => $country})->count; + $countries->{$country}->{total_dev} = $total; + my $total_bytes; + map {$total_bytes += $countries->{$country}->{$_}} keys %{$countries->{$country}}; + foreach my $lang (keys %{$countries->{$country}}) { + $countries->{$country}->{"pct_".$lang} = ($countries->{$country}->{$lang} / $total_bytes) * 100; + } + } + my @sorted_countries = sort {$countries->{$b}->{total_dev} <=> $countries->{$a}->{total_dev}} keys %$countries; + + my $final; + for ( 0 .. 19) { + push @$final, {$sorted_countries[$_] => $countries->{$sorted_countries[$_]} }; + } + warn Dump $final; + DumpFile('countries.yaml', $final); +} + 1; |