Gather.pm 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. package Gsg::Gather;
  2. use strict;
  3. use warnings;
  4. use Log::Log4perl qw(:easy);
  5. use Shellex::Shellex qw(shellex findBin);
  6. use Exporter qw(import);
  7. our @EXPORT_OK = qw(get_file_tree get_projects trim_project_paths get_diff_stat);
  8. sub get_diff_stat($$$$) {
  9. my $project_dir = shift;
  10. my $newest_commit = shift;
  11. my $compare_commit = shift;
  12. my $logger = shift;
  13. # git --git-dir=/home/git/git-site-gen.git/ diff --stat 37f54811d49d41a4d794594e5bbaaee2271d82ad 1afd193eda9a6bc703011a72afa273e560355713
  14. my $gitCmd = findBin("git",$logger);
  15. my $diff_stat = shellex("$gitCmd --git-dir=$project_dir $newest_commit $compare_commit",$logger);
  16. return $diff_stat;
  17. }
  18. sub get_projects($$$) {
  19. my $git_dir = shift;
  20. my $ignored_projects_ref = shift;
  21. my $logger = shift;
  22. my $ls_cmd = findBin("ls",$logger);
  23. my @git_project_dirs;
  24. foreach my $dir ( split("\n", shellex("$ls_cmd -d $git_dir/*/",$logger)) ) {
  25. if ( $dir !~ m/\.git/ ) {
  26. next;
  27. }
  28. if ( grep( /^$dir$/, @$ignored_projects_ref ) ) {
  29. $logger->info("Found $dir in ignore list, skipping...");
  30. next;
  31. } else {
  32. push(@git_project_dirs,$dir);
  33. }
  34. }
  35. return \@git_project_dirs;
  36. }
  37. sub trim_project_paths($$) {
  38. my $projects_ref = shift;
  39. my $logger = shift;
  40. my @trimmed_projects;
  41. foreach my $project_path ( @$projects_ref ) {
  42. # Chop parts of the path we dont need for the web root
  43. # /some/path/project.git/ -> project.git/
  44. if ( $project_path =~ m/\/?([^\/]+\.[^\.]+$)/ ) {
  45. push(@trimmed_projects, $1);
  46. }
  47. }
  48. $logger->info("Returning trimmed project paths");
  49. return \@trimmed_projects;
  50. }
  51. sub get_file_tree($$) {
  52. my $projectDir = shift;
  53. my $logger = shift;
  54. my $gitCmd = findBin("git",$logger);
  55. # Get files
  56. my %file_tree;
  57. foreach my $file ( split("\n", shellex("$gitCmd --git-dir=\"$projectDir\" ls-tree --full-tree -r HEAD",$logger)) ) {
  58. chomp $file;
  59. $file =~ /([a-z0-9]{40})\t(.*)$/;
  60. # Name - object id
  61. $file_tree{$2} = $1;
  62. }
  63. # Get file content
  64. my %file_content;
  65. foreach my $filename ( keys %file_tree ) {
  66. my $content = shellex("$gitCmd --git-dir=\"$projectDir\" show $file_tree{$filename}",$logger);
  67. # - TODO -
  68. # A hack -- interested in a better way to detect if git files are binary
  69. # Also dramatically increases run time (~3 seconds additional run time, will likely ballon on bigger git repos)
  70. my $file_cmd = findBin("file",$logger);
  71. my $rm_cmd = findBin("rm",$logger);
  72. my $test_write_path = "/tmp/test";
  73. my $bin_test = shellex("$gitCmd --git-dir=\"$projectDir\" show $file_tree{$filename} > $test_write_path && $file_cmd -i $test_write_path && $rm_cmd $test_write_path",$logger);
  74. if ( $bin_test !~ m/text/ ) {
  75. $content = "Binary file";
  76. }
  77. chomp $content;
  78. # Name - file content
  79. $file_content{$filename} = $content;
  80. }
  81. # Get logs
  82. my @commit_ids;
  83. foreach my $log_line ( split("\n",shellex("$gitCmd --git-dir=\"$projectDir\" log",$logger)) ) {
  84. if ( $log_line =~ m/commit\ ([a-z0-9]{40})/ ) {
  85. push(@commit_ids,$1);
  86. }
  87. }
  88. my %commits;
  89. foreach my $commit_id ( @commit_ids ) {
  90. my $commit_info = shellex("git --git-dir=\"$projectDir\" show $commit_id",$logger);
  91. chomp $commit_info;
  92. $commits{$commit_id} = $commit_info;
  93. }
  94. # We return commit_ids as well to preserve ordering
  95. return ( \%file_tree, \%file_content, \%commits, \@commit_ids );
  96. }
  97. 1;