/ Published in: Python
data:image/s3,"s3://crabby-images/90b0e/90b0e6a2677ca35d5348c94828b6331437fd25ab" alt=""
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
import re,sgmllib,sys,urllib class Parser(sgmllib.SGMLParser): def start_a(self, attr): regx = re.compile('[Hh][Tt][Tt][Pp].*\....$') href = [v for a, v in attr if a == 'href'] try: if regx.match(href[0]): print href[0] except: pass if __name__ == '__main__': try: fd = urllib.urlopen(sys.argv[1]) parser = Parser() parser.feed(fd.read()) parser.close() fd.close() except Exception, error: print 'Errore: ' + str(error)
Comments
data:image/s3,"s3://crabby-images/ddb6f/ddb6f8242eb268e53b908f42b5afcd1004e6fcb1" alt="RSS Feed for Comments RSS Icon"