update,
This commit is contained in:
140
vinniesniper-54816/task1/_tools/crawer/merge.py
Normal file
140
vinniesniper-54816/task1/_tools/crawer/merge.py
Normal file
@@ -0,0 +1,140 @@
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from multiprocessing import Pool
|
||||
import imghdr
|
||||
import hashlib
|
||||
from PIL import Image
|
||||
|
||||
base_dir = (
|
||||
"/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/p"
|
||||
)
|
||||
|
||||
|
||||
def check_same_image(file_path):
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def p_check_same_image(in_path):
|
||||
for root, _, files in os.walk(in_path):
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
|
||||
if imghdr.what(file_path) is None:
|
||||
print(file_path, "is not image")
|
||||
os.remove(file_path)
|
||||
|
||||
continue
|
||||
|
||||
with open(file_path, "rb") as f:
|
||||
contents = f.read()
|
||||
if contents.startswith(b"\xff\xd8") and contents.endswith(b"\xff\xd9"):
|
||||
# it's a jpeg
|
||||
pass
|
||||
else:
|
||||
# print(file_path, "is not valid jpeg")
|
||||
os.remove(file_path)
|
||||
|
||||
|
||||
def resize_image(file_path, max_size=1920):
|
||||
if os.path.getsize(file_path) == 0:
|
||||
os.remove(file_path)
|
||||
print(f"Deleted empty file: {file_path}")
|
||||
return
|
||||
|
||||
with Image.open(file_path) as img:
|
||||
# Resize the image if width or height larger than 1024
|
||||
if img.width > max_size or img.height > max_size:
|
||||
# Calculate the new size maintaining the aspect ratio
|
||||
aspect_ratio = img.width / img.height
|
||||
if img.width > img.height:
|
||||
new_width = min(img.width, max_size)
|
||||
new_height = int(new_width / aspect_ratio)
|
||||
else:
|
||||
new_height = min(img.height, max_size)
|
||||
new_width = int(new_height * aspect_ratio)
|
||||
|
||||
# Resize the image
|
||||
resized_img = img.resize((new_width, new_height))
|
||||
|
||||
# Save the resized image
|
||||
resized_img.save(file_path)
|
||||
print("resize done " + file_path)
|
||||
else:
|
||||
# skipping
|
||||
# print("skipped " + file_path)
|
||||
return
|
||||
|
||||
|
||||
def resize_image_worker(file_path):
|
||||
try:
|
||||
resize_image(file_path)
|
||||
except Exception as e:
|
||||
print(file_path)
|
||||
print(e)
|
||||
|
||||
|
||||
def p_resize_image(in_path):
|
||||
from multiprocessing import Pool
|
||||
|
||||
for root, _, files in os.walk(in_path):
|
||||
for file in files:
|
||||
resize_image_worker(os.path.join(root, file))
|
||||
|
||||
|
||||
def merge_to_output(input_folder, output_folder):
|
||||
if not os.path.exists(output_folder):
|
||||
os.mkdir(output_folder)
|
||||
count = 0
|
||||
for root, _, filenames in os.walk(input_folder):
|
||||
for fn in filenames:
|
||||
if fn.endswith(".jpg"):
|
||||
count += 1
|
||||
shutil.copy(os.path.join(root, fn), os.path.join(output_folder, "c_{:010d}.jpg".format(count)))
|
||||
|
||||
|
||||
def process_dir(input_folder, output_dir):
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
for f in os.listdir(output_dir):
|
||||
if f.endswith(".jpg"):
|
||||
os.remove(os.path.join(output_dir, f))
|
||||
|
||||
merge_to_output(input_folder, output_dir)
|
||||
p_check_same_image(output_dir)
|
||||
p_resize_image(output_dir)
|
||||
|
||||
|
||||
def process_dir_mp(args):
|
||||
print("process_dir", *args)
|
||||
process_dir(*args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
with Pool(processes=2) as p:
|
||||
p.map(
|
||||
process_dir_mp,
|
||||
[
|
||||
(f"{base_dir}/{t_dir}", f"output_{t_dir}")
|
||||
for t_dir in [
|
||||
# "building",
|
||||
# "african",
|
||||
# "beach",
|
||||
# "bus",
|
||||
# "dinosaur",
|
||||
# "dish",
|
||||
"elephant",
|
||||
# "horse",
|
||||
# "flower",
|
||||
# "mountain",
|
||||
]
|
||||
],
|
||||
)
|
||||
|
||||
print("done")
|
Reference in New Issue
Block a user