31 lines
1.0 KiB
Python
31 lines
1.0 KiB
Python
# iterate files over subdirectories in `00_store`, store image files in hash table, delete files with same hash
|
|
|
|
import os
|
|
import hashlib
|
|
|
|
|
|
def check_same_image(file_path):
|
|
sha256_hash = hashlib.sha256()
|
|
with open(file_path, "rb") as f:
|
|
for byte_block in iter(lambda: f.read(8192), b""):
|
|
sha256_hash.update(byte_block)
|
|
return sha256_hash.hexdigest()
|
|
|
|
|
|
def action(in_path):
|
|
img_hash = {}
|
|
for root, _, files in os.walk(in_path):
|
|
for file in files:
|
|
file_path = os.path.join(root, file)
|
|
md5 = check_same_image(file_path)
|
|
if md5 in img_hash:
|
|
os.remove(file_path)
|
|
print(file_path, "found")
|
|
|
|
else:
|
|
img_hash[md5] = file_path
|
|
|
|
|
|
action("/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/output")
|
|
action("/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/output_mountain")
|