import os import shutil import sys from multiprocessing import Pool import imghdr import hashlib from PIL import Image base_dir = ( "/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/p" ) def check_same_image(file_path): sha256_hash = hashlib.sha256() with open(file_path, "rb") as f: for byte_block in iter(lambda: f.read(4096), b""): sha256_hash.update(byte_block) return sha256_hash.hexdigest() def p_check_same_image(in_path): for root, _, files in os.walk(in_path): for file in files: file_path = os.path.join(root, file) if imghdr.what(file_path) is None: print(file_path, "is not image") os.remove(file_path) continue with open(file_path, "rb") as f: contents = f.read() if contents.startswith(b"\xff\xd8") and contents.endswith(b"\xff\xd9"): # it's a jpeg pass else: # print(file_path, "is not valid jpeg") os.remove(file_path) def resize_image(file_path, max_size=1920): if os.path.getsize(file_path) == 0: os.remove(file_path) print(f"Deleted empty file: {file_path}") return with Image.open(file_path) as img: # Resize the image if width or height larger than 1024 if img.width > max_size or img.height > max_size: # Calculate the new size maintaining the aspect ratio aspect_ratio = img.width / img.height if img.width > img.height: new_width = min(img.width, max_size) new_height = int(new_width / aspect_ratio) else: new_height = min(img.height, max_size) new_width = int(new_height * aspect_ratio) # Resize the image resized_img = img.resize((new_width, new_height)) # Save the resized image resized_img.save(file_path) print("resize done " + file_path) else: # skipping # print("skipped " + file_path) return def resize_image_worker(file_path): try: resize_image(file_path) except Exception as e: print(file_path) print(e) def p_resize_image(in_path): from multiprocessing import Pool for root, _, files in os.walk(in_path): for file in files: resize_image_worker(os.path.join(root, file)) def merge_to_output(input_folder, output_folder): if not os.path.exists(output_folder): os.mkdir(output_folder) count = 0 for root, _, filenames in os.walk(input_folder): for fn in filenames: if fn.endswith(".jpg"): count += 1 shutil.copy(os.path.join(root, fn), os.path.join(output_folder, "c_{:010d}.jpg".format(count))) def process_dir(input_folder, output_dir): if not os.path.exists(output_dir): os.makedirs(output_dir) for f in os.listdir(output_dir): if f.endswith(".jpg"): os.remove(os.path.join(output_dir, f)) merge_to_output(input_folder, output_dir) p_check_same_image(output_dir) p_resize_image(output_dir) def process_dir_mp(args): print("process_dir", *args) process_dir(*args) if __name__ == "__main__": with Pool(processes=2) as p: p.map( process_dir_mp, [ (f"{base_dir}/{t_dir}", f"output_{t_dir}") for t_dir in [ # "building", # "african", # "beach", # "bus", # "dinosaur", # "dish", "elephant", # "horse", # "flower", # "mountain", ] ], ) print("done")