/ Published in: Python
                    
                                        
This script downloads all mp3 links from given rss/atom feeds.
I wrote this to learn python so I'm sure there are better ways to do this.
Depends on wget.
                I wrote this to learn python so I'm sure there are better ways to do this.
Depends on wget.
                            
                                Expand |
                                Embed | Plain Text
                            
                        
                        Copy this code and paste it in your HTML
#!/usr/bin/python
"""
@name Music Downloader
@description This script downloads all mp3 links from given rss/atom feeds.
I wrote this to learn python so I'm sure there are better ways to do this.
It works in Mac OSX Leopard WITH wget.
__Add your feed's uris at the bottom__
@author [email protected]
------------------------------------------------------------------------------
Usage: $ python downloader.py [-t]
Options:
-t Shows the links retrieved from all feeds. No download takes place.
TODO:
- logging
- file size restriction support
- multithread support
- links as swf player param
"""
import sqlite3
import feedparser
import urllib
import urlparse
import os
import sys
import re
class Downloader:
def __init__(self, feeds):
if type(feeds) is not list:
raise Exception('A list of blogs is needed')
self.userAgent = 'Mozilla/5.0 (X11; U; Linux i686; en-US;rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)'
self.conn = sqlite3.connect('downloader.db')
self.downloadDir = os.getcwd()
c = self.conn.cursor()
sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='downloaded'"
if c.execute(sql).fetchone() == None:
sql = 'CREATE TABLE downloaded(id INTEGER PRIMARY KEY, remoteFile TEXT, datetime DATE_TIME)'
c.execute(sql)
self.conn.commit()
self.links = []
for feed in feeds:
self.feedData = feedparser.parse(feed)
self.addLinks(feed)
try:
if sys.argv[1] == '-t':
for i in self.links:
print i
except IndexError, e:
print '%i tracks will be downloaded' % len(self.links)
self.downloadFiles()
def addLinks(self, feed = ''):
re_atom = re.compile('^atom.*')
re_rss = re.compile('^rss.*')
print 'Adding links in ' + self.feedData.version + ' feed in ' + feed
if re_atom.search(self.feedData.version):
self.__addAtomLinks()
elif re_rss.search(self.feedData.version):
self.__addRssLinks()
def __addAtomLinks(self):
re_links = re.compile('<a[^>]+href="(http:[^"]+)"[^>]*>')
re_mp3 = re.compile('mp3$')
html = ''
for entry in self.feedData.entries:
""" atom03 """
html += entry.content[0]['value']
links = re_links.findall(html)
for link in links:
if re_mp3.search(link.lower()):
self.links.append(link)
def __addRssLinks(self):
for entry in self.feedData.entries:
if hasattr(entry, 'enclosures'):
for link in entry.enclosures:
if link.type == 'audio/mpeg':
self.links.append(link.href)
def isDownloaded(self, remoteFile):
c = self.conn.cursor()
sql = "SELECT id FROM downloaded WHERE remoteFile = ?"
return c.execute(sql, (remoteFile, )).fetchone() != None
def downloadFiles(self):
i = 0
c = self.conn.cursor()
for link in self.links:
#if i > 5 : break
remoteFile = urlparse.urlsplit(link)[2].split('/')[-1]
localFileName = urllib.unquote(remoteFile)
localFile = self.downloadDir + "/" + localFileName
if self.isDownloaded(remoteFile) == False:
if os.path.exists(localFile):
self.skippingMessage(localFileName)
continue;
print 'Downloading ' + localFileName + ' ... '
os.system('wget --tries=2 --timeout=10 --user-agent="%s" "%s"' % (self.userAgent, link))
sql = 'INSERT INTO downloaded (remoteFile, datetime) VALUES(?, DATETIME("NOW"))'
c.execute(sql, (remoteFile, ))
self.conn.commit()
i += 1
print '------------------------[ O K %i ]--------------------------' % i
else:
self.skippingMessage(localFileName)
def skippingMessage(self, remoteFile):
print 'File ' + remoteFile + ' alredy exists'
print '------------------------[ S K I P ]--------------------------'
if __name__ == '__main__':
feeds = [
'http://oldbluebus.blogspot.com/atom.xml',
'http://ravensingstheblues.blogspot.com/atom.xml'
]
Downloader(feeds)
Comments
 Subscribe to comments
                    Subscribe to comments
                
                