From d55ac1c0b26161f5c6d2604a8f192ff2bf2e7d3a Mon Sep 17 00:00:00 2001 From: franck cuny Date: Mon, 15 Feb 2010 13:02:59 +0100 Subject: update gexf --- lib/githubexplorer.pm | 41 ++++++++++-- lib/githubexplorer/Gexf.pm | 155 ++++++++++++++++++++------------------------- 2 files changed, 107 insertions(+), 89 deletions(-) (limited to 'lib') diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm index 7b9c252..aef66ec 100644 --- a/lib/githubexplorer.pm +++ b/lib/githubexplorer.pm @@ -105,14 +105,47 @@ sub gen_seed { my $main_lang = shift @sorted_lang; my $other_lang = join( '|', @sorted_lang ); my $str - = $profiles->blog + = $pr->blog . ";;;github;" - . $main_lang . ";" - . $other_lang . ";" - . $profile->country . "\n"; + . ($main_lang || '') . ";" + . ($other_lang || '') . ";" + . ($pr->country || ''). "\n"; print $fh $str; } close $fh; } +sub stats_by_country { + my $self = shift; + $self->_connect unless $self->has_schema; + my $repositories = $self->schema->resultset('Repositories')->search(); + + my $countries; + while (my $repos = $repositories->next) { + next if !$repos->id_profile->country; + my $languages = $self->schema->resultset('RepoLang') + ->search( { repository => $repos->id } ); + while ( my $lang = $languages->next ) { + $countries->{ $repos->id_profile->country }->{$lang->language->name} += $lang->size; + } + } + foreach my $country (keys %$countries) { + my $total = $self->schema->resultset('Profiles')->search({country => $country})->count; + $countries->{$country}->{total_dev} = $total; + my $total_bytes; + map {$total_bytes += $countries->{$country}->{$_}} keys %{$countries->{$country}}; + foreach my $lang (keys %{$countries->{$country}}) { + $countries->{$country}->{"pct_".$lang} = ($countries->{$country}->{$lang} / $total_bytes) * 100; + } + } + my @sorted_countries = sort {$countries->{$b}->{total_dev} <=> $countries->{$a}->{total_dev}} keys %$countries; + + my $final; + for ( 0 .. 19) { + push @$final, {$sorted_countries[$_] => $countries->{$sorted_countries[$_]} }; + } + warn Dump $final; + DumpFile('countries.yaml', $final); +} + 1; diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm index de0da49..98f3d38 100644 --- a/lib/githubexplorer/Gexf.pm +++ b/lib/githubexplorer/Gexf.pm @@ -2,6 +2,7 @@ package githubexplorer::Gexf; use Moose; use XML::Simple; +use IO::All; use 5.010; has schema => ( is => 'ro', isa => 'Object', required => 1 ); @@ -29,7 +30,7 @@ has graph => ( attribute => [ { id => 0, - type => 'float', + type => 'string', title => 'name' }, { @@ -93,20 +94,20 @@ has graph => ( sub gen_gexf { my $self = shift; - $self->basic_profiles; - my $basic_profiles = $self->dump_gexf; - $basic_profiles > io('basic_profiles.gexf'); +# $self->basic_profiles; +# my $basic_profiles = $self->dump_gexf; +# $basic_profiles > io('basic_profiles.gexf'); $self->profiles_from_repositories; my $profiles_from_repositories = $self->dump_gexf; $profiles_from_repositories > io('profiles_from_repositories.gexf'); - $self->repositories_from_profiles; - my $repositories_from_profiles = $self->dump_gexf; - $profiles_from_repositories > io('repositories_from_profiles.gexf'); +# $self->repositories_from_profiles; +# my $repositories_from_profiles = $self->dump_gexf; +# $repositories_from_profiles > io('repositories_from_profiles.gexf'); } -sub dump_gefx { +sub dump_gexf { my $self = shift; my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 ); $self->graph->{gexf}->{graph}->{nodes} = undef; @@ -149,6 +150,7 @@ sub profiles_from_repositories { my $node = $self->_get_node_for_profile($profile); push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node; } + my $edges; my $repositories = $self->schema->resultset('Repositories')->search(); while ( my $repos = $repositories->next ) { my $forks = $self->schema->resultset('Fork') @@ -158,17 +160,31 @@ sub profiles_from_repositories { push @profiles, $fork->profile->id; } foreach my $p (@profiles) { - map { - next if $_ eq $p; - my $e = { - source => $p, - target => $_, - id => $self->inc_edges, - }; - push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; - } @profiles; + foreach my $t (@profiles) { + next if $t eq $p; + if (exists $edges->{$p}->{$t}) { + $edges->{$p}->{$t}->{weight}++; + }elsif(exists $edges->{$t}->{$p}) { + $edges->{$t}->{$p}->{weight}++; + }else{ + $edges->{$p}->{$t}->{weight}++; + } + } + } + } + foreach my $e (keys %$edges) { + foreach my $t (keys %{$edges->{$e}}) { + next if $edges->{$e}->{$t}->{weight} < 4; + my $edge = { + id => $self->inc_edges, + source => $e, + target => $t, + weight => $edges->{$e}->{$t}->{weight}, + }; + push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $edge; } } + say "edges => ".scalar @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }; say "profiles_from_repositories done"; } @@ -203,26 +219,48 @@ sub repositories_from_profiles { }, }; } - my $forks = $self->schema->resultset('Fork') - ->search( { repos => $repos->id } ); - while ( my $fork = $forks->next ) { - my $e = { - source => $fork->profile->id, - target => $fork->repos->name, - id => $self->inc_edges, - }; - push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; - } } map { push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $nodes->{$_} } keys %$nodes; - say "repositories_from_profiles done"; -} -sub stats_languages_by_country { - my $self = shift; + my $edges; + my $profiles = $self->schema->resultset('Profiles'); + while ( my $profile = $profiles->next ) { + my $forks = $self->schema->resultset('Fork')->search({profile => + $profile->id}); + my @repos; + while (my $fork = $forks->next) { + push @repos, $fork->repos->name; + } + foreach my $r (@repos) { + foreach my $t (@repos) { + next if $t eq $r; + if (exists $edges->{$r}->{$t}) { + $edges->{$r}->{$t}->{weight}++; + }elsif(exists $edges->{$t}->{$r}){ + $edges->{$t}->{$r}->{weight}++; + }else{ + $edges->{$r}->{$t}->{weight}++; + } + } + } + } + foreach my $e (keys %$edges) { + foreach my $t (keys %{$edges->{$e}}) { + next if $edges->{$e}->{$t}->{weight} < 10; + my $edge = { + id => $self->inc_edges, + source => $e, + target => $t, + weight => $edges->{$e}->{$t}->{weight}, + }; + push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $edge; + } + } + say "edges => ".scalar @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }; + say "repositories_from_profiles done"; } sub _get_node_for_profile { @@ -250,7 +288,7 @@ sub _get_node_for_profile { } sub _get_languages_for_profile { - my ( $self, $profile ) = shift; + my ( $self, $profile ) = @_; my $forks = $self->schema->resultset('Fork') ->search( { profile => $profile->id } ); @@ -268,57 +306,4 @@ sub _get_languages_for_profile { return ( \%languages, \@sorted_lang ); } -#sub repositories { -# my $self = shift; -# -# say "start repositories ..."; -# my $repositories = $self->schema->resultset('Repositories')->search({fork => 0}); -# while (my $repos = $repositories->next) { -# -# next if $repos->name =~ /dotfiles/i; -# # available in forks ? -# my $check_fork = $self->schema->resultset('Fork')->search({repos => $repos->id}); -# next if $check_fork->count < 1; -# -# if (!grep {$_->{id} eq "repos_".$repos->name} @{$self->graph->{gexf}->{graph}->{nodes}->{node}}) { -# my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first; -# my $lang = $language ? $language->language->name : 'none'; -# my $node = { -# id => "repos_".$repos->name, -# label => $repos->name, -# attvalues => { -# attvalue => [ -# { for => 0, value => $repos->name}, -# { for => 1, value => "repository"}, -# { for => 4, value => $repos->forks}, -# { for => 9, value => $repos->description}, -# { for => 10, value => $repos->watchers}, -# { for => 8, value => $lang}, -# ], -# }, -# }; -# push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node; -# } -# my $e = { -# source => $repos->id_profile->id, -# target => "repos_".$repos->name, -# id => $self->inc_edges, -# }; -# push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; -# } -# -# my $forks = $self->schema->resultset('Fork')->search(); -# -# while (my $fork = $forks->next) { -# next if $fork->repos->name =~ /dotfiles/i; -# my $e = { -# source => $fork->profile->id, -# target => "repos_".$fork->repos->name, -# id => $self->inc_edges, -# }; -# push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; -# } -# say " done"; -#} - 1; -- cgit 1.4.1