package Gsg::Gather; use strict; use warnings; use Log::Log4perl qw(:easy); use Shellex::Shellex qw(shellex findBin); use Exporter qw(import); our @EXPORT_OK = qw(get_file_tree get_projects trim_project_paths get_diff_stat); sub get_diff_stat($$$$) { my $project_dir = shift; my $newest_commit = shift; my $compare_commit = shift; my $logger = shift; # git --git-dir=/home/git/git-site-gen.git/ diff --stat 37f54811d49d41a4d794594e5bbaaee2271d82ad 1afd193eda9a6bc703011a72afa273e560355713 my $gitCmd = findBin("git",$logger); my $diff_stat = shellex("$gitCmd --git-dir=$project_dir $newest_commit $compare_commit",$logger); return $diff_stat; } sub get_projects($$$) { my $git_dir = shift; my $ignored_projects_ref = shift; my $logger = shift; my $ls_cmd = findBin("ls",$logger); my @git_project_dirs; foreach my $dir ( split("\n", shellex("$ls_cmd -d $git_dir/*/",$logger)) ) { if ( $dir !~ m/\.git/ ) { next; } if ( grep( /^$dir$/, @$ignored_projects_ref ) ) { $logger->info("Found $dir in ignore list, skipping..."); next; } else { push(@git_project_dirs,$dir); } } return \@git_project_dirs; } sub trim_project_paths($$) { my $projects_ref = shift; my $logger = shift; my @trimmed_projects; foreach my $project_path ( @$projects_ref ) { # Chop parts of the path we dont need for the web root # /some/path/project.git/ -> project.git/ if ( $project_path =~ m/\/?([^\/]+\.[^\.]+$)/ ) { push(@trimmed_projects, $1); } } $logger->info("Returning trimmed project paths"); return \@trimmed_projects; } sub get_file_tree($$) { my $projectDir = shift; my $logger = shift; my $gitCmd = findBin("git",$logger); # Get files my %file_tree; foreach my $file ( split("\n", shellex("$gitCmd --git-dir=\"$projectDir\" ls-tree --full-tree -r HEAD",$logger)) ) { chomp $file; $file =~ /([a-z0-9]{40})\t(.*)$/; # Name - object id $file_tree{$2} = $1; } # Get file content my %file_content; foreach my $filename ( keys %file_tree ) { my $content = shellex("$gitCmd --git-dir=\"$projectDir\" show $file_tree{$filename}",$logger); # - TODO - # A hack -- interested in a better way to detect if git files are binary # Also dramatically increases run time (~3 seconds additional run time, will likely ballon on bigger git repos) my $file_cmd = findBin("file",$logger); my $rm_cmd = findBin("rm",$logger); my $test_write_path = "/tmp/test"; my $bin_test = shellex("$gitCmd --git-dir=\"$projectDir\" show $file_tree{$filename} > $test_write_path && $file_cmd -i $test_write_path && $rm_cmd $test_write_path",$logger); if ( $bin_test !~ m/text/ ) { $content = "Binary file"; } chomp $content; # Name - file content $file_content{$filename} = $content; } # Get logs my @commit_ids; foreach my $log_line ( split("\n",shellex("$gitCmd --git-dir=\"$projectDir\" log",$logger)) ) { if ( $log_line =~ m/commit\ ([a-z0-9]{40})/ ) { push(@commit_ids,$1); } } my %commits; foreach my $commit_id ( @commit_ids ) { my $commit_info = shellex("git --git-dir=\"$projectDir\" show $commit_id",$logger); chomp $commit_info; $commits{$commit_id} = $commit_info; } # We return commit_ids as well to preserve ordering return ( \%file_tree, \%file_content, \%commits, \@commit_ids ); } 1;