Return to Snippet

Revision: 1837
at November 9, 2006 23:57 by whitetiger


Updated Code
import os,re,sys

# python script.py file.html 

links = re.compile('[<].?[Aa].*[Hh][Rr][Ee][Ff].*=.*[\"\']?.*[\"\']?.?[>]')
lunghezza_file = os.stat(sys.argv[1])[6]
f = open(sys.argv[1], 'r')

while(lunghezza_file > 0):
 riga = f.readline()
 lunghezza_file -= len(riga)

 if links.search(riga):
  comparazione = links.search(riga)
  output = comparazione.group(0)
  links2 = re.compile('http:-*[Zz][Ii][Pp]')
  
  if links2.search(output):
   output2 = links2.search(output)
   print output2.group(0)

print 'FATTO'

Revision: 1836
at November 9, 2006 23:55 by whitetiger


Initial Code
import os,re,sys

# python script.py file.html 

links = re.compile('[<].?[Aa].*[Hh][Rr][Ee][Ff].*=.*[\"\']?.*[\"\']?.?[>]')
lunghezza_file = os.stat(sys.argv[1])[6]
f = open(sys.argv[1], 'r')

while(lunghezza_file > 0):
 riga = f.readline()
 lunghezza_file -= len(riga)

 if links.search(riga):
  comparazione = links.search(riga)
  output = comparazione.group(0)
  links2 = re.compile('http:-*[Zz][Ii][Pp]')
  
  if links2.search(output):
   output2 = links2.search(output)
   print output2.group(0)

print 'FATTO'


Initial URL


Initial Description


Initial Title
Python - Cattura tutti i links <a href=

Initial Tags
regex, python

Initial Language
Python