Browse Source

Download tracks from archive.org streaming pages (mp3 only)

spesk1 4 years ago
parent
commit
0e13287500
1 changed files with 54 additions and 0 deletions
  1. 54 0
      download_archive_dot_org.pl

+ 54 - 0
download_archive_dot_org.pl

@@ -0,0 +1,54 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+# Try and find .mp3 links in archive.org pages
+
+if ( ! defined $ARGV[0] ) {
+	print "Need to pass archive.org URL\n";
+	exit 1;
+}
+
+my $page_url = shift(@ARGV);
+my $page_content = `curl -s $page_url`;
+my @names;
+my @links;
+
+my $counter = 1;
+foreach my $line ( split("\n",$page_content) ) {
+	chomp $line;
+	if ( $line =~ m/<meta itemprop="name" content="(.*)"/ ) {
+		my $name = $counter . "_" . $1;
+		push(@names,$name);
+		$counter++;
+	} elsif ( $line =~ m/<link itemprop="associatedMedia" href="(.*\.mp3)"/ ) {
+		push(@links,$1);
+	}
+}
+
+my %link_map;
+@link_map{@names} = @links;
+
+sub make_filename($) {
+	my $name = shift;
+	my $filename;
+	open(my $fh, ">>", \$filename);
+	foreach my $char ( split("",$name) ) {
+		if ( $char =~ m/[\s->']/ ) {
+			print $fh "";
+		} else {
+			print $fh "$char";
+		}
+	}
+
+	return $filename;
+}
+
+foreach my $key ( keys %link_map ) {
+	#print "$key : $link_map{$key}\n";
+	my $filename = make_filename($key) . ".mp3";
+	#print "wget -q $link_map{$key} -O $filename\n";
+	print "Downloading $key\n";
+	system("wget -q $link_map{$key} -O $filename");
+}