Python - Stampa i links di una pagina HTML


/ Published in: Python
Save to your folder(s)



Copy this code and paste it in your HTML
  1. import re,sgmllib,sys,urllib
  2.  
  3. class Parser(sgmllib.SGMLParser):
  4.  
  5. def start_a(self, attr):
  6.  
  7. regx = re.compile('[Hh][Tt][Tt][Pp].*\....$')
  8. href = [v for a, v in attr if a == 'href']
  9. try:
  10. if regx.match(href[0]): print href[0]
  11. except:
  12. pass
  13.  
  14. if __name__ == '__main__':
  15.  
  16. try:
  17.  
  18. fd = urllib.urlopen(sys.argv[1])
  19.  
  20. parser = Parser()
  21. parser.feed(fd.read())
  22. parser.close()
  23.  
  24. fd.close()
  25.  
  26. except Exception, error:
  27.  
  28. print 'Errore: ' + str(error)

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.