Posted By

panquetofobia on 07/29/08


Tagged

rss python mp3 feeds atom wget SQLite downloader


Versions (?)

Who likes this?

2 people have marked this snippet as a favorite

panquetofobia
gartenstuhl


Mp3 downloader from feeds


 / Published in: Python
 

This script downloads all mp3 links from given rss/atom feeds. I wrote this to learn python so I'm sure there are better ways to do this. Depends on wget.

  1. #!/usr/bin/python
  2. """
  3. @name Music Downloader
  4. @description This script downloads all mp3 links from given rss/atom feeds.
  5. I wrote this to learn python so I'm sure there are better ways to do this.
  6. It works in Mac OSX Leopard WITH wget.
  7. __Add your feed's uris at the bottom__
  8.  
  9. @author [email protected]
  10.  
  11.  
  12.  
  13. ------------------------------------------------------------------------------
  14.  
  15. Usage: $ python downloader.py [-t]
  16. Options:
  17. -t Shows the links retrieved from all feeds. No download takes place.
  18.  
  19. TODO:
  20. - logging
  21. - file size restriction support
  22. - multithread support
  23. - links as swf player param
  24. """
  25. import sqlite3
  26. import feedparser
  27. import urllib
  28. import urlparse
  29. import os
  30. import sys
  31. import re
  32.  
  33. class Downloader:
  34.  
  35. def __init__(self, feeds):
  36. if type(feeds) is not list:
  37. raise Exception('A list of blogs is needed')
  38. self.userAgent = 'Mozilla/5.0 (X11; U; Linux i686; en-US;rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)'
  39. self.conn = sqlite3.connect('downloader.db')
  40. self.downloadDir = os.getcwd()
  41. c = self.conn.cursor()
  42. sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='downloaded'"
  43. if c.execute(sql).fetchone() == None:
  44. sql = 'CREATE TABLE downloaded(id INTEGER PRIMARY KEY, remoteFile TEXT, datetime DATE_TIME)'
  45. c.execute(sql)
  46. self.conn.commit()
  47. self.links = []
  48. for feed in feeds:
  49. self.feedData = feedparser.parse(feed)
  50. self.addLinks(feed)
  51. try:
  52. if sys.argv[1] == '-t':
  53. for i in self.links:
  54. print i
  55. except IndexError, e:
  56. print '%i tracks will be downloaded' % len(self.links)
  57. print
  58. self.downloadFiles()
  59.  
  60. def addLinks(self, feed = ''):
  61. re_atom = re.compile('^atom.*')
  62. re_rss = re.compile('^rss.*')
  63. print 'Adding links in ' + self.feedData.version + ' feed in ' + feed
  64. if re_atom.search(self.feedData.version):
  65. self.__addAtomLinks()
  66. elif re_rss.search(self.feedData.version):
  67. self.__addRssLinks()
  68.  
  69. def __addAtomLinks(self):
  70. re_links = re.compile('<a[^>]+href="(http:[^"]+)"[^>]*>')
  71. re_mp3 = re.compile('mp3$')
  72. html = ''
  73. for entry in self.feedData.entries:
  74. """ atom03 """
  75. html += entry.content[0]['value']
  76. links = re_links.findall(html)
  77. for link in links:
  78. if re_mp3.search(link.lower()):
  79. self.links.append(link)
  80.  
  81. def __addRssLinks(self):
  82. for entry in self.feedData.entries:
  83. if hasattr(entry, 'enclosures'):
  84. for link in entry.enclosures:
  85. if link.type == 'audio/mpeg':
  86. self.links.append(link.href)
  87.  
  88. def isDownloaded(self, remoteFile):
  89. c = self.conn.cursor()
  90. sql = "SELECT id FROM downloaded WHERE remoteFile = ?"
  91. return c.execute(sql, (remoteFile, )).fetchone() != None
  92.  
  93.  
  94. def downloadFiles(self):
  95. i = 0
  96. c = self.conn.cursor()
  97. for link in self.links:
  98. #if i > 5 : break
  99. remoteFile = urlparse.urlsplit(link)[2].split('/')[-1]
  100. localFileName = urllib.unquote(remoteFile)
  101. localFile = self.downloadDir + "/" + localFileName
  102. if self.isDownloaded(remoteFile) == False:
  103. if os.path.exists(localFile):
  104. self.skippingMessage(localFileName)
  105. continue;
  106. print
  107. print 'Downloading ' + localFileName + ' ... '
  108. os.system('wget --tries=2 --timeout=10 --user-agent="%s" "%s"' % (self.userAgent, link))
  109. sql = 'INSERT INTO downloaded (remoteFile, datetime) VALUES(?, DATETIME("NOW"))'
  110. c.execute(sql, (remoteFile, ))
  111. self.conn.commit()
  112. i += 1
  113. print '------------------------[ O K %i ]--------------------------' % i
  114. else:
  115. self.skippingMessage(localFileName)
  116.  
  117. def skippingMessage(self, remoteFile):
  118. print 'File ' + remoteFile + ' alredy exists'
  119. print '------------------------[ S K I P ]--------------------------'
  120.  
  121. if __name__ == '__main__':
  122.  
  123. feeds = [
  124. 'http://oldbluebus.blogspot.com/atom.xml',
  125. 'http://ravensingstheblues.blogspot.com/atom.xml'
  126. ]
  127.  
  128. Downloader(feeds)

Report this snippet  

You need to login to post a comment.