import os import hashlib def find_dup(dir): files = {} for root, _, filenames in os.walk(dir): for f in filenames: file_path = os.path.join(root, f) with open(file_path, "rb") as file: file_hash = hashlib.md5(file.read()).hexdigest() if file_hash in files: files[file_hash].append(file_path) else: files[file_hash] = [file_path] for file_hash, file_paths in files.items(): if len(file_paths) > 1: print("dup:", file_hash) for file_path in file_paths: os.remove(file_path) if __name__ == "__main__": find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")