Clayton Kirkwood
2015-08-02 21:44:15 UTC
Hey, been awhile, but I ran into os.walk and it fit what I needed to do for
an issue I've had for a long time: I have tons of pictures in my top
directory of pictures which are duplicated into properly named
subdirectories. Please see issues above my questions with large gaps below.
TIA,
Clayton
#Program to find duplicated pictures in my picture directory tree
#Presumably, if the file exists in a subdirectory I can remove if from the
parent picture directory
#
#Clayton Kirkwood
#01Aug15
import os
from os.path import join, getsize
main_dir = "/users/Clayton/Pictures"
directory_file_list = {}
duplicate_files = 0
top_directory_file_list = 0
for dir_path, directories, files in os.walk(main_dir):
for file in files:
# print( " file = ", file)
# if( ("(\.jpg|\.png|\.avi|\.mp4)$") not in file.lower() ):
# if( (".jpg" or ".png" or ".avi" or ".mp4" ) not in file.lower()
):
#
#why don't these work?, especially the last one. How am I to capture all
camera and video types
#except by the drudgery below. I should be able to just have a list, maybe
from a file, that lists all
#off the types and do something like if master_list not in file.lower()
if( ".jpg" not in file.lower() and
".png" not in file.lower() and
".avi" not in file.lower() and
".mp4" not in file.lower() ):
print( "file ", file, "doesn't contain .jpg or .png or .avi or
.mp4" )
# del files[file]
#
#I get an error on int expected here. If I'm able to access by string, why
wouldn't I be able to
#acess in the del?
directory_file_list[dir_path] = files #this is a list
# print(dir_path, directory_file_list[dir_path])
#print( main_dir )
for directory_path in directory_file_list.keys():
if( directory_path == main_dir ):
top_directory_file_list = directory_file_list[directory_path]
continue
# print( directory_path, ":", directory_file_list[directory_path])
file_list = directory_file_list[directory_path]
# print(file_list)
for file in file_list:
# pass
print( "looking at file ", file, " in top_directory_file_list ",
top_directory_file_list )
if file in top_directory_file_list:
#error: arg of type int not iterable
#yet it works for the for loops
print( "file ", file, " found in both directory_path ",
directory_path, " and ", main_dir)
duplicate_files =+ 1
pass
break
_______________________________________________
Tutor maillist - ***@python.org
To unsubscribe or change subscription options:
https://mail.python.org/mailman/listinfo/tutor
an issue I've had for a long time: I have tons of pictures in my top
directory of pictures which are duplicated into properly named
subdirectories. Please see issues above my questions with large gaps below.
TIA,
Clayton
#Program to find duplicated pictures in my picture directory tree
#Presumably, if the file exists in a subdirectory I can remove if from the
parent picture directory
#
#Clayton Kirkwood
#01Aug15
import os
from os.path import join, getsize
main_dir = "/users/Clayton/Pictures"
directory_file_list = {}
duplicate_files = 0
top_directory_file_list = 0
for dir_path, directories, files in os.walk(main_dir):
for file in files:
# print( " file = ", file)
# if( ("(\.jpg|\.png|\.avi|\.mp4)$") not in file.lower() ):
# if( (".jpg" or ".png" or ".avi" or ".mp4" ) not in file.lower()
):
#
#why don't these work?, especially the last one. How am I to capture all
camera and video types
#except by the drudgery below. I should be able to just have a list, maybe
from a file, that lists all
#off the types and do something like if master_list not in file.lower()
if( ".jpg" not in file.lower() and
".png" not in file.lower() and
".avi" not in file.lower() and
".mp4" not in file.lower() ):
print( "file ", file, "doesn't contain .jpg or .png or .avi or
.mp4" )
# del files[file]
#
#I get an error on int expected here. If I'm able to access by string, why
wouldn't I be able to
#acess in the del?
directory_file_list[dir_path] = files #this is a list
# print(dir_path, directory_file_list[dir_path])
#print( main_dir )
for directory_path in directory_file_list.keys():
if( directory_path == main_dir ):
top_directory_file_list = directory_file_list[directory_path]
continue
# print( directory_path, ":", directory_file_list[directory_path])
file_list = directory_file_list[directory_path]
# print(file_list)
for file in file_list:
# pass
print( "looking at file ", file, " in top_directory_file_list ",
top_directory_file_list )
if file in top_directory_file_list:
#error: arg of type int not iterable
#yet it works for the for loops
print( "file ", file, " found in both directory_path ",
directory_path, " and ", main_dir)
duplicate_files =+ 1
pass
break
_______________________________________________
Tutor maillist - ***@python.org
To unsubscribe or change subscription options:
https://mail.python.org/mailman/listinfo/tutor