So yeah, I wrote this stuff. If you run it, give it a while to load the links because it has to go to 51 web pages and parse through them, so it takes a few seconds.
Copy and paste from comments:
I created this steaming pile to rip mp3 URLs from Newgrounds
because it is a pain in the ASS to download them manually.
Usage:
Pass a url of a "index" page to some music when you run it. For example,
http://www.newgrounds.com/audio/list/10/3This program will then list all the mp3s associated with all the links on that listing. You will need to
then copy and paste those URLs in a text file and then use Internet Download Manager or something else
to batch download all the links.
'''
Created on Apr 7, 2009
@author: Justin (Ihaveworms)
version 1
I created this steaming pile to rip mp3 URLs from Newgrounds
because it is a pain in the ASS to download them manually.
usage: Pass a url of a "index" page to some music when you run it. For example, http://www.newgrounds.com/audio/list/10/3
This program will then list all the mp3s associated with all the links on that listing. You will need to
then copy and paste those URLs in a text file and then use Internet Download Manager or something else
to batch download all the links.
Written in Python
'''
import sre, urllib2, sys, BaseHTTPServer
def parseAddress(input):
if input[:7] != "http://":
if input.find("://") != -1:
print "Error: Cannot retrive URL, address must be HTTP"
sys.exit(1)
else:
input = "http://" + input
return input
def retrieveWebPage(address):
try:
web_handle = urllib2.urlopen(address)
except urllib2.HTTPError, e:
error_desc = BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code][0]
#print "Cannot retrieve URL: " + str(e.code) + ": " + error_desc
print "Cannot retrieve URL: HTTP Error Code", e.code
sys.exit(1)
except urllib2.URLError, e:
print "Cannot retrieve URL: " + e.reason[1]
sys.exit(1)
except:
print "Cannot retrieve URL: unknown error"
sys.exit(1)
return web_handle
if len(sys.argv) < 2:
print "Usage:"
print "%s url" % (sys.argv[0])
sys.exit(1)
match_set = set()
mp3match_set = set()
address = parseAddress(sys.argv[1])
website_handle = retrieveWebPage(address)
website_text = website_handle.read()
dir = website_handle.geturl().rsplit('/',1)[0]
if (dir == "http:/"):
dir = website_handle.geturl()
#matches = sre.findall('filename=(.*?)&', website_text)
matches = sre.findall('<td class="listtitle"><a href="(.*?)"', website_text)
for match in matches:
if match[:7] != "http://":
if match[0] == "/":
slash = ""
else:
slash = "/"
match_set.add(dir + slash + match)
else:
match_set.add(match)
match_set = list(match_set)
match_set.sort()
#print match_set[1]
for sub_address in match_set:
website_handle = retrieveWebPage(sub_address)
website_text = website_handle.read()
dir = website_handle.geturl().rsplit('/',1)[0]
if (dir == "http:/"):
dir = website_handle.geturl()
sub_matches = sre.findall('filename=(.*?)&', website_text)
for mp3match in sub_matches:
if mp3match[:7] != "http://":
if mp3match[0] == "/":
slash = ""
else:
slash = "/"
mp3match_set.add(dir + slash + mp3match)
else:
mp3match_set.add(mp3match)
mp3match_set = list(mp3match_set)
mp3match_set.sort()
for item in mp3match_set:
print item