26 lines
785 B
Python
26 lines
785 B
Python
import os
|
|
import hashlib
|
|
|
|
|
|
def find_dup(dir):
|
|
files = {}
|
|
for root, _, filenames in os.walk(dir):
|
|
for f in filenames:
|
|
file_path = os.path.join(root, f)
|
|
with open(file_path, "rb") as file:
|
|
file_hash = hashlib.md5(file.read()).hexdigest()
|
|
if file_hash in files:
|
|
files[file_hash].append(file_path)
|
|
else:
|
|
files[file_hash] = [file_path]
|
|
|
|
for file_hash, file_paths in files.items():
|
|
if len(file_paths) > 1:
|
|
print("dup:", file_hash)
|
|
for file_path in file_paths:
|
|
os.remove(file_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")
|