Revision: 1835
                            
                                                            
                                    
                                        
Updated Code
                                    
                                    
                                                    
                        at November 9, 2006 23:57 by whitetiger
                            
                            Updated Code
import re,sgmllib,sys,urllib
class Parser(sgmllib.SGMLParser):
    def start_a(self, attr):
        regx = re.compile('[Hh][Tt][Tt][Pp].*\....$')
        href = [v for a, v in attr if a == 'href']
        try:
            if regx.match(href[0]): print href[0]
        except:
            pass
if __name__ == '__main__':
    try:
        fd = urllib.urlopen(sys.argv[1])
        parser = Parser()
        parser.feed(fd.read())
        parser.close()
        fd.close()
    except Exception, error:
        print 'Errore: ' + str(error)
                                
                            Revision: 1834
                            
                                                            
                                    
                                        
Initial Code
                                    
                                    
                                                            
                                    
                                        
Initial URL
                                    
                                    
                                
                                                            
                                    
                                        
Initial Description
                                    
                                    
                                
                                                            
                                    
                                        
Initial Title
                                    
                                    
                                                            
                                    
                                        
Initial Tags
                                    
                                    
                                                            
                                    
                                        
Initial Language
                                    
                                    
                                                    
                        at November 9, 2006 23:55 by whitetiger
                            
                            Initial Code
import re,sgmllib,sys,urllib
class Parser(sgmllib.SGMLParser):
    def start_a(self, attr):
        regx = re.compile('[Hh][Tt][Tt][Pp].*\....$')
        href = [v for a, v in attr if a == 'href']
        try:
            if regx.match(href[0]): print href[0]
        except:
            pass
if __name__ == '__main__':
    try:
        fd = urllib.urlopen(sys.argv[1])
        parser = Parser()
        parser.feed(fd.read())
        parser.close()
        fd.close()
    except Exception, error:
        print 'Errore: ' + str(error)
                                Initial URL
Initial Description
Initial Title
Python - Stampa i links di una pagina HTML
Initial Tags
regex, python
Initial Language
Python