Revision: 50947
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at September 9, 2011 19:52 by mail2yogs
Initial Code
#-------------------------------------------------------------------------------
# Name: findduplicates
# Purpose:
#
# Author: ysharma
#
# Created: 09-09-2011
# Copyright: (c) ysharma 2011
# Licence: <your licence>
#-------------------------------------------------------------------------------
#!/usr/bin/env python
import glob
import re
import md5
def get_duplicates(filelist): ## {
exclude = [ re.compile(pattern) for pattern in ['^\s*#','^\s*$']]
dup={}
for file in filelist: ## {
m=md5.new()
for line in open(file).readlines(): ## {
skip=0
for pattern in exclude:
if pattern.search(line):
skip=1
if skip:
continue
m.update(line)
##}
filehash=m.hexdigest()
dup.setdefault(filehash,[]).append(file)
##}
return [paths for paths in dup.values() if len(paths) > 1]
##}
def main(): ## {
duplicate_files = get_duplicates(glob.glob("omx_proj/impl_1/jobs/job_*/*arun.tcl"))
if len(duplicate_files): ## {
print "Following files are duplicate:"
for files in duplicate_files: ## {
print files
##}
##}
##}
if __name__ == '__main__':
main()
Initial URL
Initial Description
Initial Title
Find duplicate files with regex exclude
Initial Tags
python
Initial Language
Python