download_archive_dot_org.pl 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. # Try and find .mp3 links in archive.org pages
  5. if ( ! defined $ARGV[0] ) {
  6. print "Need to pass archive.org URL\n";
  7. exit 1;
  8. }
  9. my $page_url = shift(@ARGV);
  10. my $page_content = `curl -s $page_url`;
  11. my @names;
  12. my @links;
  13. my $counter = 1;
  14. foreach my $line ( split("\n",$page_content) ) {
  15. chomp $line;
  16. if ( $line =~ m/<meta itemprop="name" content="(.*)"/ ) {
  17. my $name = $counter . "_" . $1;
  18. push(@names,$name);
  19. $counter++;
  20. } elsif ( $line =~ m/<link itemprop="associatedMedia" href="(.*\.mp3)"/ ) {
  21. push(@links,$1);
  22. }
  23. }
  24. my %link_map;
  25. @link_map{@names} = @links;
  26. sub make_filename($) {
  27. my $name = shift;
  28. my $filename;
  29. open(my $fh, ">>", \$filename);
  30. foreach my $char ( split("",$name) ) {
  31. if ( $char =~ m/[\s->']/ ) {
  32. print $fh "";
  33. } else {
  34. print $fh "$char";
  35. }
  36. }
  37. return $filename;
  38. }
  39. foreach my $key ( keys %link_map ) {
  40. #print "$key : $link_map{$key}\n";
  41. my $filename = make_filename($key) . ".mp3";
  42. #print "wget -q $link_map{$key} -O $filename\n";
  43. print "Downloading $key\n";
  44. system("wget -q $link_map{$key} -O $filename");
  45. }