From 7eb24ee8cb01b7bd7163d63edf9cf16ff8993112 Mon Sep 17 00:00:00 2001 From: Franck Cuny Date: Wed, 1 Jan 2014 16:31:16 -0800 Subject: no idea --- lib/githubexplorer/Gexf.pm | 254 ++++++++++++----------- lib/githubexplorer/Schema/Result/Profiles.pm | 1 + lib/githubexplorer/Schema/Result/Repositories.pm | 1 + 3 files changed, 138 insertions(+), 118 deletions(-) (limited to 'lib') diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm index 7e82f80..6832379 100644 --- a/lib/githubexplorer/Gexf.pm +++ b/lib/githubexplorer/Gexf.pm @@ -25,67 +25,85 @@ has graph => ( meta => { creator => ['linkfluence'] }, graph => { type => 'static', - attributes => { - class => 'node', - type => 'static', - attribute => [ - { - id => 0, - type => 'string', - title => 'name' - }, - { - id => 1, - type => 'string', - title => 'type', - }, - { - id => 2, - type => 'float', - title => 'followers_count' - }, - { - id => 3, - type => 'float', - title => 'following_count' - }, - { - id => 4, - type => 'float', - title => 'forks', - }, - { - id => 5, - type => 'string', - title => 'location', - }, - { - id => 6, - type => 'float', - title => 'public_gist_count', - }, - { - id => 7, - type => 'float', - title => 'public_repo_count', - }, - { - id => 8, - type => 'string', - title => 'language', - }, - { - id => 9, - type => 'string', - title => 'description', - }, - { - id => 10, - type => 'float', - title => 'watchers', - } - ] - } + attributes => [ + { + class => 'edge', + type => 'static', + attribute => [ + { + id => 0, + type => 'string', + title => 'language' + }, + { + id => 0, + type => 'float', + title => 'collaborate' + }, + ] + }, + { + class => 'node', + type => 'static', + attribute => [ + { + id => 0, + type => 'string', + title => 'name' + }, + { + id => 1, + type => 'string', + title => 'type', + }, + { + id => 2, + type => 'float', + title => 'followers_count' + }, + { + id => 3, + type => 'float', + title => 'following_count' + }, + { + id => 4, + type => 'float', + title => 'forks', + }, + { + id => 5, + type => 'string', + title => 'location', + }, + { + id => 6, + type => 'float', + title => 'public_gist_count', + }, + { + id => 7, + type => 'float', + title => 'public_repo_count', + }, + { + id => 8, + type => 'string', + title => 'language', + }, + { + id => 9, + type => 'string', + title => 'description', + }, + { + id => 10, + type => 'float', + title => 'watchers', + } + ] + }, + ] } } }; @@ -95,26 +113,26 @@ has graph => ( sub gen_gexf { my $self = shift; - $self->_average_by_langage(); -# $self->basic_profiles; -# my $basic_profiles = $self->dump_gexf; -# $basic_profiles > io('basic_profiles.gexf'); + #$self->_average_by_langage(); + $self->basic_profiles; + my $basic_profiles = $self->dump_gexf; + $basic_profiles > io('basic_profiles.gexf'); - $self->profiles_from_repositories; - my $profiles_from_repositories = $self->dump_gexf; - $profiles_from_repositories > io('profiles_from_repositories.gexf'); + #$self->profiles_from_repositories; + #my $profiles_from_repositories = $self->dump_gexf; + #$profiles_from_repositories > io('profiles_from_repositories.gexf'); -# $self->repositories_from_profiles; -# my $repositories_from_profiles = $self->dump_gexf; -# $repositories_from_profiles > io('repositories_from_profiles.gexf'); + #$self->repositories_from_profiles; + #my $repositories_from_profiles = $self->dump_gexf; + #$repositories_from_profiles > io('repositories_from_profiles.gexf'); } sub dump_gexf { my $self = shift; my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 ); - say "total edges => ".scalar @{$self->graph->{gexf}->{graph}->{nodes}->{node}}; - say "total nodes => ".scalar @{$self->graph->{gexf}->{graph}->{edges}->{edge}}; + say "total nodes => ".scalar @{$self->graph->{gexf}->{graph}->{nodes}->{node}}; + say "total edges => ".scalar @{$self->graph->{gexf}->{graph}->{edges}->{edge}}; $self->graph->{gexf}->{graph}->{nodes} = undef; $self->graph->{gexf}->{graph}->{edges} = undef; return $xml_out; @@ -124,7 +142,8 @@ sub basic_profiles { my $self = shift; $self->id_edges(0); say "start basic_profiles ..."; - my $profiles = $self->schema->resultset('Profiles')->search(); + my $profiles = + $self->schema->resultset('Profiles')->search(); while ( my $profile = $profiles->next ) { my $node = $self->_get_node_for_profile($profile); @@ -134,10 +153,23 @@ sub basic_profiles { my $edges = $self->schema->resultset('Follow')->search(); my $id = 0; while ( my $edge = $edges->next ) { + my $collaborate = 1; +# my $forks_source = $self->schema->resultset('Fork')->search({profile => +# $edge->origin->id}); +# while (my $fork = $forks_source->next) { +# my $contrib = $self->schema->resultset('Fork')->search({repos => +# $fork->repos->id}); +# while (my $c = $contrib->next) { +# $collaborate++ if ($c->profile->id == $edge->dest->id); +# } +# } my $e = { source => $edge->origin->id, target => $edge->dest->id, id => $self->inc_edges, + weight => $collaborate, + collaborate => $collaborate, + language => $edge->origin->main_language, }; push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e; } @@ -160,13 +192,9 @@ sub profiles_from_repositories { while ( my $repos = $repositories->next ) { my $forks = $self->schema->resultset('Fork') ->search( { repos => $repos->id } ); - my $language; - my $lang_rs = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first; - if ($lang_rs) { - $language = $lang_rs->language->name; - } - if ($language && exists $self->avg_contrib_by_lang->{$language}->{avg} && $forks <= $self->avg_contrib_by_lang->{$language}->{avg}){ - warn ">>>>> on skip pour ".$repos->name."\n"; + if ($repos->main_language && exists + $self->avg_contrib_by_lang->{$repos->main_language}->{avg} && + $forks < $self->avg_contrib_by_lang->{$repos->main_language}->{avg}){ next; } my @profiles; @@ -188,6 +216,7 @@ sub profiles_from_repositories { } foreach my $e (keys %$edges) { foreach my $t (keys %{$edges->{$e}}) { + next unless $edges->{$e}->{$t}->{weight} > 5; my $edge = { id => $self->inc_edges, source => $e, @@ -211,11 +240,6 @@ sub repositories_from_profiles { next if $repos->name =~ /dotfiles/; if ( !exists $nodes->{ $repos->name } ) { - my $language - = $self->schema->resultset('RepoLang') - ->search( { repository => $repos->id }, - { order_by => 'size' } )->first; - my $lang = $language ? $language->language->name : 'none'; $nodes->{ $repos->name } = { id => $repos->name, label => $repos->name, @@ -226,7 +250,7 @@ sub repositories_from_profiles { { for => 4, value => $repos->forks }, { for => 9, value => $repos->description }, { for => 10, value => $repos->watchers }, - { for => 8, value => $lang }, + { for => 8, value => $repos->main_language }, ], }, }; @@ -261,7 +285,7 @@ sub repositories_from_profiles { } foreach my $e (keys %$edges) { foreach my $t (keys %{$edges->{$e}}) { - next if $edges->{$e}->{$t}->{weight} < 10; + next if $edges->{$e}->{$t}->{weight} < 5; my $edge = { id => $self->inc_edges, source => $e, @@ -271,15 +295,11 @@ sub repositories_from_profiles { push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $edge; } } - say "edges => ".scalar @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }; say "repositories_from_profiles done"; } sub _get_node_for_profile { my ( $self, $profile ) = @_; - my ( $languages, $ordered_languages ) - = $self->_get_languages_for_profile($profile); - my $main_lang = shift @$ordered_languages; my $node = { id => $profile->id, label => $profile->login, @@ -292,45 +312,43 @@ sub _get_node_for_profile { { for => 5, value => $profile->country }, { for => 6, value => $profile->public_gist_count }, { for => 7, value => $profile->public_repo_count }, - { for => 8, value => $main_lang }, + { for => 8, value => $profile->main_language }, ] }, }; return $node; } -sub _get_languages_for_profile { - my ( $self, $profile ) = @_; - - my $forks = $self->schema->resultset('Fork') - ->search( { profile => $profile->id } ); - - my %languages; - while ( my $fork = $forks->next ) { - my $languages = $self->schema->resultset('RepoLang') - ->search( { repository => $fork->repos->id } ); - while ( my $lang = $languages->next ) { - $languages{ $lang->language->name } += $lang->size; - } - } - my @sorted_lang - = sort { $languages{$b} <=> $languages{$a} } keys %languages; - return ( \%languages, \@sorted_lang ); -} +#sub _get_languages_for_profile { +# my ( $self, $profile ) = @_; +# +# my $forks = $self->schema->resultset('Fork') +# ->search( { profile => $profile->id } ); +# +# my %languages; +# while ( my $fork = $forks->next ) { +# my $languages = $self->schema->resultset('RepoLang') +# ->search( { repository => $fork->repos->id } ); +# while ( my $lang = $languages->next ) { +# $languages{ $lang->language->name } += $lang->size; +# } +# } +# my @sorted_lang +# = sort { $languages{$b} <=> $languages{$a} } keys %languages; +# return ( \%languages, \@sorted_lang ); +#} sub _average_by_langage { my $self = shift; my $hash_lang; my $repositories = $self->schema->resultset('Repositories')->search(); + say "gather stats ..."; while ( my $repos = $repositories->next ) { - my $lang = $self->schema->resultset('RepoLang')->search( - { repository => $repos->id }, { order_by => 'size' } - )->first; - next unless $lang; - $hash_lang->{ $lang->language->name }->{repositories}++; + next unless $repos->main_language; + $hash_lang->{ $repos->main_language }->{repositories}++; my $forks = $self->schema->resultset('Fork')->search( { repos => $repos->id } )->count; - $hash_lang->{ $lang->language->name }->{contributors} += $forks; - $hash_lang->{$lang->language->name}->{avg} = int ($hash_lang->{$lang->language->name}->{contributors} / $hash_lang->{$lang->language->name}->{repositories}); + $hash_lang->{ $repos->main_language }->{contributors} += $forks; + $hash_lang->{$repos->main_language}->{avg} = int ($hash_lang->{$repos->main_language}->{contributors} / $hash_lang->{$repos->main_language}->{repositories}); }; $self->avg_contrib_by_lang($hash_lang); } diff --git a/lib/githubexplorer/Schema/Result/Profiles.pm b/lib/githubexplorer/Schema/Result/Profiles.pm index b43211e..df8f147 100644 --- a/lib/githubexplorer/Schema/Result/Profiles.pm +++ b/lib/githubexplorer/Schema/Result/Profiles.pm @@ -11,6 +11,7 @@ __PACKAGE__->add_columns( company => { data_type => 'varchar', is_nullable => 1 }, created_at => { data_type => 'timestamp' }, email => { data_type => 'varchar', is_nullable => 1 }, + main_language => { data_type => 'varchar', is_nullable => 1 }, followers_count => { data_type => 'int' }, following_count => { data_type => 'int' }, gravatar_id => { data_type => 'varchar', is_nullable => 1 }, diff --git a/lib/githubexplorer/Schema/Result/Repositories.pm b/lib/githubexplorer/Schema/Result/Repositories.pm index 58c0e51..ab82fef 100644 --- a/lib/githubexplorer/Schema/Result/Repositories.pm +++ b/lib/githubexplorer/Schema/Result/Repositories.pm @@ -10,6 +10,7 @@ __PACKAGE__->add_columns( name => { data_type => 'varchar' }, homepage => { data_type => 'varchar', is_nullable => 1 }, url => { data_type => 'varchar', is_nullable => 1 }, + main_language => { data_type => 'varchar', is_nullable => 1 }, watchers => { data_type => 'int' }, forks => { data_type => 'int' }, fork => { data_type => 'bool' }, -- cgit 1.4.1