Find files with identical names recursively (bash4, assoc. arrays)

Revision: 22661

at January 18, 2010 20:38 by tm

Updated Code

# Find files with duplicate names. Generate a file ("dfn") with "mv" commands
# to move the duplicates to a subdirectory ("DUPS") with mangled filename.
# (e.g. multiple files "foo.txt" become foo_1.txt, foo_2.txt and so on).
# NOTE: Even after sourcing dfn (and thus moving the files) there may be
# duplicate names, since there may already have been a "foo_1.txt" in the first place (and
# so now you have 2). So the scriptlet has to be called multiple times (i.e.
# until dfn is empty)
unset fl; declare -A fl
while IFS=$'\001' read -r ff f; do 
    if [[ ${fl[$f]} ]]; then
        (( fl[$f]++ )); sfx="${f##*.}"
        printf 'mv -- "%s" DUPS/"%s" # Duplicate Filename: "%s" (%i)\n'\
               "$ff" "${f%.*}_${fl[$f]}.${sfx}" "$f" "${fl[$f]}"
    else
        fl[$f]=0;
    fi done < <(find . -type f \
                       -exec bash -c 'for file in "$@"; do printf "%s\001%s\n" "$file" "${file##*/}"; done' _ {} +) >dfn

Revision: 22660

at January 18, 2010 20:37 by tm

Updated Code

# Find files with duplicate names. Generate a file ("dfn") with "mv" commands
# to move the duplicates to a subdirectory ("DUPS") with mangled filename.
# (e.g. multiple files "foo.txt" become foo_1.txt, foo_2.txt and so on).
# NOTE: Even after sourcing dfn (and thus moving the files) there may be
# duplicate names, since there may already have been a "foo_1.txt" in the first place (and
# so now you have 2). So the scriptlet has to be called multiple times (i.e.
# until dfn is empty)
unset fl; declare -A fl
while IFS=$'\001' read -r ff f; do 
    if [[ ${fl[$f]} ]]; then
        (( fl[$f]++ )); sfx="${f##*.}"
        printf 'mv -- "%s" DUPS/"%s" # Duplicate Filename: "%s" (%i)\n'\
               "$ff" "${f%.*}_${fl[$f]}.${sfx}" "$f" "${fl[$f]}"
    else
        fl[$f]=0;
    fi done < <(find . -type f \
                       -exec bash -c 'for file in "$@"; do \
                                      printf "%s\001%s\n" "$file" "${file##*/}"; done' _ {} +) >dfn

Revision: 22659

at January 18, 2010 20:35 by tm

Initial Code

# Find files with duplicate names. Generate a file ("dfn") with "mv" commands
# to move the duplicates to a subdirectory ("DUPS") with mangled filename.
# (e.g. multiple files "foo.txt" become foo_1.txt, foo_2.txt and so on).
# NOTE: Even after sourcing dfn (and thus moving the files) there may be
# duplicate names, since there may already have been a "foo_1.txt" in the first place (and # so now you have 2). So the scriptlet has to be called multiple times (i.e.
# until dfn is empty)
unset fl; declare -A fl
while IFS=$'\001' read -r ff f; do 
    if [[ ${fl[$f]} ]]; then
        (( fl[$f]++ )); sfx="${f##*.}"
        printf 'mv -- "%s" DUPS/"%s" # Duplicate Filename: "%s" (%i)\n'\
               "$ff" "${f%.*}_${fl[$f]}.${sfx}" "$f" "${fl[$f]}"
    else
        fl[$f]=0;
    fi done < <(find . -type f \
                       -exec bash -c 'for file in "$@"; do printf "%s\001%s\n" "$file" "${file##*/}"; done' _ {} +) >dfn

Initial URL

Initial Description

Initial Title

Find files with identical names recursively (bash4, assoc. arrays)

Initial Tags

Bash, find

Initial Language

Bash

Choose a language for easy browsing: