Posted By

panquetofobia on 07/29/08


Tagged

rss python mp3 feeds atom wget SQLite downloader


Versions (?)

Who likes this?

2 people have marked this snippet as a favorite

panquetofobia
gartenstuhl


Mp3 downloader from feeds


 / Published in: Python
 

This script downloads all mp3 links from given rss/atom feeds. I wrote this to learn python so I'm sure there are better ways to do this. Depends on wget.

  1. #!/usr/bin/python
  2. """
  3. @name Music Downloader
  4. @description This script downloads all mp3 links from given rss/atom feeds.
  5. I wrote this to learn python so I'm sure there are better ways to do this.
  6. It works in Mac OSX Leopard WITH wget.
  7. __Add your feed's uris at the bottom__
  8.  
  9.  
  10.  
  11.  
  12. ------------------------------------------------------------------------------
  13.  
  14. Usage: $ python downloader.py [-t]
  15. Options:
  16. -t Shows the links retrieved from all feeds. No download takes place.
  17.  
  18. TODO:
  19. - logging
  20. - file size restriction support
  21. - multithread support
  22. - links as swf player param
  23. """
  24. import sqlite3
  25. import feedparser
  26. import urllib
  27. import urlparse
  28. import os
  29. import sys
  30. import re
  31.  
  32. class Downloader:
  33.  
  34. def __init__(self, feeds):
  35. if type(feeds) is not list:
  36. raise Exception('A list of blogs is needed')
  37. self.userAgent = 'Mozilla/5.0 (X11; U; Linux i686; en-US;rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)'
  38. self.conn = sqlite3.connect('downloader.db')
  39. self.downloadDir = os.getcwd()
  40. c = self.conn.cursor()
  41. sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='downloaded'"
  42. if c.execute(sql).fetchone() == None:
  43. sql = 'CREATE TABLE downloaded(id INTEGER PRIMARY KEY, remoteFile TEXT, datetime DATE_TIME)'
  44. c.execute(sql)
  45. self.conn.commit()
  46. self.links = []
  47. for feed in feeds:
  48. self.feedData = feedparser.parse(feed)
  49. self.addLinks(feed)
  50. try:
  51. if sys.argv[1] == '-t':
  52. for i in self.links:
  53. print i
  54. except IndexError, e:
  55. print '%i tracks will be downloaded' % len(self.links)
  56. print
  57. self.downloadFiles()
  58.  
  59. def addLinks(self, feed = ''):
  60. re_atom = re.compile('^atom.*')
  61. re_rss = re.compile('^rss.*')
  62. print 'Adding links in ' + self.feedData.version + ' feed in ' + feed
  63. if re_atom.search(self.feedData.version):
  64. self.__addAtomLinks()
  65. elif re_rss.search(self.feedData.version):
  66. self.__addRssLinks()
  67.  
  68. def __addAtomLinks(self):
  69. re_links = re.compile('<a[^>]+href="(http:[^"]+)"[^>]*>')
  70. re_mp3 = re.compile('mp3$')
  71. html = ''
  72. for entry in self.feedData.entries:
  73. """ atom03 """
  74. html += entry.content[0]['value']
  75. links = re_links.findall(html)
  76. for link in links:
  77. if re_mp3.search(link.lower()):
  78. self.links.append(link)
  79.  
  80. def __addRssLinks(self):
  81. for entry in self.feedData.entries:
  82. if hasattr(entry, 'enclosures'):
  83. for link in entry.enclosures:
  84. if link.type == 'audio/mpeg':
  85. self.links.append(link.href)
  86.  
  87. def isDownloaded(self, remoteFile):
  88. c = self.conn.cursor()
  89. sql = "SELECT id FROM downloaded WHERE remoteFile = ?"
  90. return c.execute(sql, (remoteFile, )).fetchone() != None
  91.  
  92.  
  93. def downloadFiles(self):
  94. i = 0
  95. c = self.conn.cursor()
  96. for link in self.links:
  97. #if i > 5 : break
  98. remoteFile = urlparse.urlsplit(link)[2].split('/')[-1]
  99. localFileName = urllib.unquote(remoteFile)
  100. localFile = self.downloadDir + "/" + localFileName
  101. if self.isDownloaded(remoteFile) == False:
  102. if os.path.exists(localFile):
  103. self.skippingMessage(localFileName)
  104. continue;
  105. print
  106. print 'Downloading ' + localFileName + ' ... '
  107. os.system('wget --tries=2 --timeout=10 --user-agent="%s" "%s"' % (self.userAgent, link))
  108. sql = 'INSERT INTO downloaded (remoteFile, datetime) VALUES(?, DATETIME("NOW"))'
  109. c.execute(sql, (remoteFile, ))
  110. self.conn.commit()
  111. i += 1
  112. print '------------------------[ O K %i ]--------------------------' % i
  113. else:
  114. self.skippingMessage(localFileName)
  115.  
  116. def skippingMessage(self, remoteFile):
  117. print 'File ' + remoteFile + ' alredy exists'
  118. print '------------------------[ S K I P ]--------------------------'
  119.  
  120. if __name__ == '__main__':
  121.  
  122. feeds = [
  123. 'http://oldbluebus.blogspot.com/atom.xml',
  124. 'http://ravensingstheblues.blogspot.com/atom.xml'
  125. ]
  126.  
  127. Downloader(feeds)

Report this snippet  

You need to login to post a comment.