update,

2025-01-31 22:36:21 +08:00
parent f4fc0b8f71
commit c7fb335275
1399 changed files with 5714 additions and 0 deletions
--- a/vinniesniper-54816/task1/_tools/crawer/check_valid_image.py
+++ b/vinniesniper-54816/task1/_tools/crawer/check_valid_image.py
@@ -0,0 +1,41 @@
+# iterate files over subdirectories in `00_store`, store image files in hash table, delete files with same hash
+import os
+import hashlib
+import imghdr
+
+deleted = 0
+
+
+def check_same_image(file_path):
+    sha256_hash = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+
+
+def action(in_path):
+    for root, _, files in os.walk(in_path):
+        for file in files:
+            file_path = os.path.join(root, file)
+
+            if imghdr.what(file_path) is None:
+                print(file_path, "is not image")
+                os.remove(file_path)
+
+                continue
+
+            with open(file_path, "rb") as f:
+                contents = f.read()
+                if contents.startswith(b"\xff\xd8") and contents.endswith(b"\xff\xd9"):
+                    # it's a jpeg
+                    pass
+                else:
+                    # print(file_path, "is not valid jpeg")
+                    os.remove(file_path)
+
+
+action("/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/output")
+action("/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/output_mountain")
+
+print("deleted:" + str(deleted))
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_african.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_african.py
@@ -0,0 +1,89 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are beach cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "african people",
+    "african people over the world",
+    "african people in hong kong",
+    "african people in china",
+    "african people in england",
+    "african people in us",
+    "african people in australia",
+    "african people in brazil",
+    "african people in india",
+    "african people in japan",
+    "african people in russia",
+    "african people in south africa",
+    "african people in argentina",
+    "african people in mexico",
+    "african people in italy",
+    "african people in france",
+    "african people in spain",
+    "african people in germany",
+    "african people in thailand",
+    "african people in vietnam",
+    "african people in indonesia",
+    "african people in philippines",
+    "african people in malaysia",
+    "african people in singapore",
+    "african people in egypt",
+    "african people in turkey",
+    "african people in greece",
+    "african people in portugal",
+    "african people in netherlands",
+    "african people in belgium",
+    "african people in sweden",
+    "african people in norway",
+    "african people in denmark",
+    "african people in finland",
+    "african people in poland",
+    "african people in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/african/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/african/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/african/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [
+    google,
+    bing,
+    baidu,
+]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_beach.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_beach.py
@@ -0,0 +1,85 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are beach cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "beach",
+    "beach over the world",
+    "beach in hong kong",
+    "beach in china",
+    "beach in england",
+    "beach in us",
+    "beach in australia",
+    "beach in brazil",
+    "beach in india",
+    "beach in japan",
+    "beach in russia",
+    "beach in south africa",
+    "beach in argentina",
+    "beach in mexico",
+    "beach in italy",
+    "beach in france",
+    "beach in spain",
+    "beach in germany",
+    "beach in thailand",
+    "beach in vietnam",
+    "beach in indonesia",
+    "beach in philippines",
+    "beach in malaysia",
+    "beach in singapore",
+    "beach in egypt",
+    "beach in turkey",
+    "beach in greece",
+    "beach in portugal",
+    "beach in netherlands",
+    "beach in belgium",
+    "beach in sweden",
+    "beach in norway",
+    "beach in denmark",
+    "beach in finland",
+    "beach in poland",
+    "beach in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/beach/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/beach/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/beach/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_building.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_building.py
@@ -0,0 +1,84 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are building cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "building",
+    "building over the world",
+    "building in hong kong",
+    "building in china",
+    "building in england",
+    "building in us",
+    "building in australia",
+    "building in brazil",
+    "building in india",
+    "building in japan",
+    "building in russia",
+    "building in south africa",
+    "building in argentina",
+    "building in mexico",
+    "building in italy",
+    "building in france",
+    "building in spain",
+    "building in germany",
+    "building in thailand",
+    "building in vietnam",
+    "building in indonesia",
+    "building in philippines",
+    "building in malaysia",
+    "building in singapore",
+    "building in egypt",
+    "building in turkey",
+    "building in greece",
+    "building in portugal",
+    "building in netherlands",
+    "building in belgium",
+    "building in sweden",
+    "building in norway",
+    "building in denmark",
+    "building in finland",
+    "building in poland",
+    "building in ukraine",
+]
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/building/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/building/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/building/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_bus.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_bus.py
@@ -0,0 +1,85 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are beach cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "bus",
+    "bus over the world",
+    "bus in hong kong",
+    "bus in china",
+    "bus in england",
+    "bus in us",
+    "bus in australia",
+    "bus in brazil",
+    "bus in india",
+    "bus in japan",
+    "bus in russia",
+    "bus in south africa",
+    "bus in argentina",
+    "bus in mexico",
+    "bus in italy",
+    "bus in france",
+    "bus in spain",
+    "bus in germany",
+    "bus in thailand",
+    "bus in vietnam",
+    "bus in indonesia",
+    "bus in philippines",
+    "bus in malaysia",
+    "bus in singapore",
+    "bus in egypt",
+    "bus in turkey",
+    "bus in greece",
+    "bus in portugal",
+    "bus in netherlands",
+    "bus in belgium",
+    "bus in sweden",
+    "bus in norway",
+    "bus in denmark",
+    "bus in finland",
+    "bus in poland",
+    "bus in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/bus/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/bus/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/bus/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_dinosaur.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_dinosaur.py
@@ -0,0 +1,85 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are beach cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "dinosaur",
+    "dinosaur over the world",
+    "dinosaur in hong kong",
+    "dinosaur in china",
+    "dinosaur in england",
+    "dinosaur in us",
+    "dinosaur in australia",
+    "dinosaur in brazil",
+    "dinosaur in india",
+    "dinosaur in japan",
+    "dinosaur in russia",
+    "dinosaur in south africa",
+    "dinosaur in argentina",
+    "dinosaur in mexico",
+    "dinosaur in italy",
+    "dinosaur in france",
+    "dinosaur in spain",
+    "dinosaur in germany",
+    "dinosaur in thailand",
+    "dinosaur in vietnam",
+    "dinosaur in indonesia",
+    "dinosaur in philippines",
+    "dinosaur in malaysia",
+    "dinosaur in singapore",
+    "dinosaur in egypt",
+    "dinosaur in turkey",
+    "dinosaur in greece",
+    "dinosaur in portugal",
+    "dinosaur in netherlands",
+    "dinosaur in belgium",
+    "dinosaur in sweden",
+    "dinosaur in norway",
+    "dinosaur in denmark",
+    "dinosaur in finland",
+    "dinosaur in poland",
+    "dinosaur in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/dinosaur/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/dinosaur/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/dinosaur/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_dish.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_dish.py
@@ -0,0 +1,84 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are building cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "dish",
+    "dish over the world",
+    "dish in hong kong",
+    "dish in china",
+    "dish in england",
+    "dish in us",
+    "dish in australia",
+    "dish in brazil",
+    "dish in india",
+    "dish in japan",
+    "dish in russia",
+    "dish in south africa",
+    "dish in argentina",
+    "dish in mexico",
+    "dish in italy",
+    "dish in france",
+    "dish in spain",
+    "dish in germany",
+    "dish in thailand",
+    "dish in vietnam",
+    "dish in indonesia",
+    "dish in philippines",
+    "dish in malaysia",
+    "dish in singapore",
+    "dish in egypt",
+    "dish in turkey",
+    "dish in greece",
+    "dish in portugal",
+    "dish in netherlands",
+    "dish in belgium",
+    "dish in sweden",
+    "dish in norway",
+    "dish in denmark",
+    "dish in finland",
+    "dish in poland",
+    "dish in ukraine",
+]
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/dish/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/dish/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/dish/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_elephant
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_elephant
@@ -0,0 +1,52 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+
+# we are building cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "elephant",
+    "elephant over the world",
+    "elephant in hong kong",
+    "elephant in china",
+    "elephant in england",
+    "elephant in us",
+    "elephant in australia",
+    "elephant in brazil",
+    "elephant in india",
+    "elephant in japan",
+    "elephant in russia",
+    "elephant in south africa",
+    "elephant in argentina",
+    "elephant in mexico",
+    "elephant in italy",
+    "elephant in france",
+    "elephant in spain",
+    "elephant in germany",
+    "elephant in thailand",
+    "elephant in vietnam",
+    "elephant in indonesia",
+    "elephant in philippines",
+    "elephant in malaysia",
+    "elephant in singapore",
+    "elephant in egypt",
+    "elephant in turkey",
+    "elephant in greece",
+    "elephant in portugal",
+    "elephant in netherlands",
+    "elephant in belgium",
+    "elephant in sweden",
+    "elephant in norway",
+    "elephant in denmark",
+    "elephant in finland",
+    "elephant in poland",
+    "elephant in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+for i in range(99):
+    for c in classes:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f'p/elephant/{c.replace(" ","_")}'})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_elephant
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_elephant
@@ -0,0 +1,65 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+
+# we are building cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "elephant",
+    "elephant over the world",
+    "elephant in hong kong",
+    "elephant in china",
+    "elephant in england",
+    "elephant in us",
+    "elephant in australia",
+    "elephant in brazil",
+    "elephant in india",
+    "elephant in japan",
+    "elephant in russia",
+    "elephant in south africa",
+    "elephant in argentina",
+    "elephant in mexico",
+    "elephant in italy",
+    "elephant in france",
+    "elephant in spain",
+    "elephant in germany",
+    "elephant in thailand",
+    "elephant in vietnam",
+    "elephant in indonesia",
+    "elephant in philippines",
+    "elephant in malaysia",
+    "elephant in singapore",
+    "elephant in egypt",
+    "elephant in turkey",
+    "elephant in greece",
+    "elephant in portugal",
+    "elephant in netherlands",
+    "elephant in belgium",
+    "elephant in sweden",
+    "elephant in norway",
+    "elephant in denmark",
+    "elephant in finland",
+    "elephant in poland",
+    "elephant in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+
+for c in classes:
+    # bing_crawler = BingImageCrawler(storage={"root_dir": f'p/elephant/{c.replace(" ","_")}'})
+    # bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+    # flickr_crawler = FlickrImageCrawler(storage={"root_dir": f"p/elephant/flickr"})
+    # flickr_crawler.crawl(keyword=c, max_num=10000, min_size=(200, 200), max_size=None)
+
+    baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/elephant/baidu"})
+    baidu_crawler.crawl(keyword=c, max_num=10000, min_size=(200, 200), max_size=None)
+
+
+for c in classes:
+    google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/elephant/google"})
+    google_crawler.crawl(keyword=c, max_num=10000, min_size=(200, 200), max_size=None)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_elephant.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_elephant.py
@@ -0,0 +1,85 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are building cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "elephant",
+    "elephant over the world",
+    "elephant in hong kong",
+    "elephant in china",
+    "elephant in england",
+    "elephant in us",
+    "elephant in australia",
+    "elephant in brazil",
+    "elephant in india",
+    "elephant in japan",
+    "elephant in russia",
+    "elephant in south africa",
+    "elephant in argentina",
+    "elephant in mexico",
+    "elephant in italy",
+    "elephant in france",
+    "elephant in spain",
+    "elephant in germany",
+    "elephant in thailand",
+    "elephant in vietnam",
+    "elephant in indonesia",
+    "elephant in philippines",
+    "elephant in malaysia",
+    "elephant in singapore",
+    "elephant in egypt",
+    "elephant in turkey",
+    "elephant in greece",
+    "elephant in portugal",
+    "elephant in netherlands",
+    "elephant in belgium",
+    "elephant in sweden",
+    "elephant in norway",
+    "elephant in denmark",
+    "elephant in finland",
+    "elephant in poland",
+    "elephant in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/elephant/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/elephant/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/elephant/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_flower.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_flower.py
@@ -0,0 +1,85 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are beach cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "flower",
+    "flower over the world",
+    "flower in hong kong",
+    "flower in china",
+    "flower in england",
+    "flower in us",
+    "flower in australia",
+    "flower in brazil",
+    "flower in india",
+    "flower in japan",
+    "flower in russia",
+    "flower in south africa",
+    "flower in argentina",
+    "flower in mexico",
+    "flower in italy",
+    "flower in france",
+    "flower in spain",
+    "flower in germany",
+    "flower in thailand",
+    "flower in vietnam",
+    "flower in indonesia",
+    "flower in philippines",
+    "flower in malaysia",
+    "flower in singapore",
+    "flower in egypt",
+    "flower in turkey",
+    "flower in greece",
+    "flower in portugal",
+    "flower in netherlands",
+    "flower in belgium",
+    "flower in sweden",
+    "flower in norway",
+    "flower in denmark",
+    "flower in finland",
+    "flower in poland",
+    "flower in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/flower/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/flower/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/flower/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_horse.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_horse.py
@@ -0,0 +1,86 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+
+# we are beach cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "horse",
+    "horse over the world",
+    "horse in hong kong",
+    "horse in china",
+    "horse in england",
+    "horse in us",
+    "horse in australia",
+    "horse in brazil",
+    "horse in india",
+    "horse in japan",
+    "horse in russia",
+    "horse in south africa",
+    "horse in argentina",
+    "horse in mexico",
+    "horse in italy",
+    "horse in france",
+    "horse in spain",
+    "horse in germany",
+    "horse in thailand",
+    "horse in vietnam",
+    "horse in indonesia",
+    "horse in philippines",
+    "horse in malaysia",
+    "horse in singapore",
+    "horse in egypt",
+    "horse in turkey",
+    "horse in greece",
+    "horse in portugal",
+    "horse in netherlands",
+    "horse in belgium",
+    "horse in sweden",
+    "horse in norway",
+    "horse in denmark",
+    "horse in finland",
+    "horse in poland",
+    "horse in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/horse/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/horse/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/horse/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_mountain.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_mountain.py
@@ -0,0 +1,83 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are building cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "mountain",
+    "mountain over the world",
+    "mountain in hong kong",
+    "mountain in china",
+    "mountain in england",
+    "mountain in us",
+    "mountain in australia",
+    "mountain in brazil",
+    "mountain in india",
+    "mountain in japan",
+    "mountain in russia",
+    "mountain in south africa",
+    "mountain in argentina",
+    "mountain in mexico",
+    "mountain in italy",
+    "mountain in france",
+    "mountain in spain",
+    "mountain in germany",
+    "mountain in thailand",
+    "mountain in vietnam",
+    "mountain in indonesia",
+    "mountain in philippines",
+    "mountain in malaysia",
+    "mountain in singapore",
+    "mountain in egypt",
+    "mountain in turkey",
+    "mountain in greece",
+    "mountain in portugal",
+    "mountain in netherlands",
+    "mountain in belgium",
+    "mountain in sweden",
+    "mountain in norway",
+    "mountain in denmark",
+    "mountain in finland",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/mountain/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/mountain/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/mountain/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_n.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_n.py
@@ -0,0 +1,9 @@
+from icrawler.builtin import BingImageCrawler
+
+classes = ["trees", "roads", "Human faces"]
+number = 100
+
+for c in classes:
+    # see n is represent negaive images
+    bing_crawler = BingImageCrawler(storage={"root_dir": f'n/{c.replace(" ","_")}'})
+    bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_patient_bed.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_patient_bed.py
@@ -0,0 +1,54 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are beach cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "patient bed",
+    "patient bed in hong kong",
+    "patient bed in china",
+    "patient bed in japan",
+    "patient bed in taiwan",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/bus/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/bus/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/bus/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)
--- a/vinniesniper-54816/task1/_tools/crawer/delete_similiar_img.py
+++ b/vinniesniper-54816/task1/_tools/crawer/delete_similiar_img.py
@@ -0,0 +1,30 @@
+# iterate files over subdirectories in `00_store`, store image files in hash table, delete files with same hash
+
+import os
+import hashlib
+
+
+def check_same_image(file_path):
+    sha256_hash = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(8192), b""):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+
+
+def action(in_path):
+    img_hash = {}
+    for root, _, files in os.walk(in_path):
+        for file in files:
+            file_path = os.path.join(root, file)
+            md5 = check_same_image(file_path)
+            if md5 in img_hash:
+                os.remove(file_path)
+                print(file_path, "found")
+
+            else:
+                img_hash[md5] = file_path
+
+
+action("/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/output")
+action("/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/output_mountain")
--- a/vinniesniper-54816/task1/_tools/crawer/import_img
+++ b/vinniesniper-54816/task1/_tools/crawer/import_img
@@ -0,0 +1,105 @@
+import glob
+import os
+import shutil
+import cv2
+
+def merge_images(input_folder, output_folder):
+    if not os.path.exists(output_folder):
+        os.mkdir(output_folder)
+
+    count = 0
+    for root, _, filenames in os.walk(input_folder):
+        for fn in filenames:
+            if fn.endswith('.jpg'):
+                count += 1
+                shutil.copy(os.path.join(root, fn), os.path.join(output_folder, 'c_{:010d}.jpg'.format(count)))
+
+
+if __name__ == '__main__':
+
+    merge_images('p/beach', 'output')
+
+    for f in glob.glob(os.path.join('/home/logic/test/data/1xx_Beach', 'c_*.jpg')):
+        os.remove(f)
+
+    for f in glob.glob('output/*.jpg'):
+        img = cv2.imread(f)
+        if img is None:
+            continue
+        shutil.copy(f, '/home/logic/test/data/1xx_Beach')
+    for f in glob.glob('output/*.jpg'):
+        os.remove(f)
+    print('beach done')
+
+    merge_images('p/building', 'output')
+    for f in glob.glob('output/*.jpg'):
+        img = cv2.imread(f)
+        if img is None:
+            continue
+        shutil.copy(f, '/home/logic/test/data/2xx_Building')
+    for f in glob.glob('output/*.jpg'):
+        os.remove(f)
+    print('building done')
+    
+    merge_images('p/bus', 'output')
+    for f in glob.glob('output/*.jpg'):
+        img = cv2.imread(f)
+        if img is None:
+            continue
+        shutil.copy(f, '/home/logic/test/data/3xx_Bus')
+    for f in glob.glob('output/*.jpg'):
+        os.remove(f)
+    print('bus done')
+    
+    merge_images('p/dinosaur', 'output')
+    for f in glob.glob('output/*.jpg'):
+        img = cv2.imread(f)
+        if img is None:
+            continue
+        shutil.copy(f, '/home/logic/test/data/4xx_Dinosaur')
+    for f in glob.glob('output/*.jpg'):
+        os.remove(f)
+    print('dinosaur done')
+    
+    merge_images('p/elephant', 'output')
+    for f in glob.glob('output/*.jpg'):
+        img = cv2.imread(f)
+        if img is None:
+            continue
+        shutil.copy(f, '/home/logic/test/data/5xx_Elephant')
+    for f in glob.glob('output/*.jpg'):
+        os.remove(f)
+    print('elephant done')
+    
+    merge_images('p/horse', 'output')
+    for f in glob.glob('output/*.jpg'):
+        img = cv2.imread(f)
+        if img is None:
+            continue
+        shutil.copy(f, '/home/logic/test/data/7xx_Horse')
+    for f in glob.glob('output/*.jpg'):
+        os.remove(f)
+    print('horse done')
+    
+    merge_images('p/mountain', 'output')
+    for f in glob.glob('output/*.jpg'):
+        img = cv2.imread(f)
+        if img is None:
+            continue
+        shutil.copy(f, '/home/logic/test/data/8xx_Mountain')
+    for f in glob.glob('output/*.jpg'):
+        os.remove(f)
+    print('mountain done')
+    
+    merge_images('p/dish', 'output')
+    for f in glob.glob('output/*.jpg'):
+        img = cv2.imread(f)
+        if img is None:
+            continue
+        shutil.copy(f, '/home/logic/test/data/9xx_Dish')
+    for f in glob.glob('output/*.jpg'):
+        os.remove(f)
+    print('dish done')
+
+    print('done')
+
--- a/vinniesniper-54816/task1/_tools/crawer/import_img.py
+++ b/vinniesniper-54816/task1/_tools/crawer/import_img.py
@@ -0,0 +1,114 @@
+import glob
+import os
+import shutil
+import cv2
+
+import random
+import string
+
+beach_src_path = "p/beach"
+beach_target_path = "/home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816/src/data/1xx_Beach"
+building_src_path = "p/building"
+building_target_path = "/home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816/src/data/2xx_Building"
+
+bus_src_path = "p/bus"
+bus_target_path = "/home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816/src/data/3xx_Bus"
+
+dinosaur_src_path = "p/dinosaur"
+dinosaur_target_path = "/home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816/src/data/4xx_Dinosaur"
+
+elephant_src_path = "p/elephant"
+elephant_target_path = "/home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816/src/data/5xx_Elephant"
+
+flower_src_path = "p/flower"
+flower_target_path = "/home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816/src/data/6xx_Flower"
+
+horse_src_path = "p/horse"
+horse_target_path = "/home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816/src/data/7xx_Horse"
+
+mountain_src_path = "p/mountain"
+mountain_target_path = "/home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816/src/data/8xx_Mountain"
+
+dish_src_path = "p/dish"
+dish_target_path = "/home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816/src/data/9xx_Dish"
+
+def merge_images(input_folder, output_folder):
+    if not os.path.exists(output_folder):
+        os.mkdir(output_folder)
+
+    count = 0
+    for root, _, filenames in os.walk(input_folder):
+        for fn in list(sorted(filenames)):
+            if (count < 10000):
+                if os.path.getsize(os.path.join(root, fn)) == 0:
+                    os.remove(os.path.join(root, fn))
+                    continue
+
+                img = cv2.imread(os.path.join(root, fn))
+                if img is None:
+                    os.remove(os.path.join(root, fn))
+                    continue
+
+                if fn.endswith('.jpg'):
+                    count += 1
+                    shutil.copy(os.path.join(root, fn), os.path.join(output_folder, 'c_{:010d}.jpg'.format(count)))
+
+def copy_img(src_path, target_path):
+    random_str = ''.join(random.choices(string.ascii_letters + string.digits, k=5))
+    tmp_dir = '_tmp/_tmp_' + random_str
+    os.mkdir(tmp_dir)
+
+    for f in glob.glob(os.path.join(target_path, 'c_*.jpg')):
+        os.remove(f)
+
+    merge_images(src_path, tmp_dir)
+
+    for f in sorted(glob.glob(tmp_dir+'/*.jpg')):
+        shutil.copy(f, target_path)
+
+    shutil.rmtree(tmp_dir)
+    print(' img add done' )
+
+from concurrent.futures import ThreadPoolExecutor
+
+def add_img(src_path, target_path):
+    print('add to ' + target_path, end="")
+    copy_img(src_path, target_path)
+
+with ThreadPoolExecutor() as executor:
+    executor.submit(add_img, beach_src_path, beach_target_path)
+    executor.submit(add_img, building_src_path, building_target_path)
+    executor.submit(add_img, bus_src_path, bus_target_path)
+    executor.submit(add_img, dinosaur_src_path, dinosaur_target_path)
+    executor.submit(add_img, elephant_src_path, elephant_target_path)
+    executor.submit(add_img, flower_src_path, flower_target_path)
+    executor.submit(add_img, horse_src_path, horse_target_path)
+    executor.submit(add_img, mountain_src_path, mountain_target_path)
+    executor.submit(add_img, dish_src_path, dish_target_path)
+
+
+# print('add to bus', end="")
+# copy_img(bus_src_path, bus_target_path)
+
+# print('add to dinosaur', end="")
+# copy_img(dinosaur_src_path, dinosaur_target_path)
+
+# print('add to elephant', end="")
+# copy_img(elephant_src_path, elephant_target_path)
+
+# print('add to flower', end="")
+# copy_img(flower_src_path, flower_target_path)
+
+# print('add to horse', end="")
+# copy_img(horse_src_path, horse_target_path)
+
+# print('add to mountain', end="")
+# copy_img(mountain_src_path, mountain_target_path)
+
+# print('add to dish', end="")
+# copy_img(dish_src_path, dish_target_path)
+
+print('done')
+
+
+
--- a/vinniesniper-54816/task1/_tools/crawer/import_mountain.sh
+++ b/vinniesniper-54816/task1/_tools/crawer/import_mountain.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+set -e
+
+python ./merge.py p/mountain
+
+cp /home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816-src/task1/_lab/003-crawler-mountain/output/*.jpg \
+    /home/logic/test/data/8xx_Mountain
+
+echo "done"
--- a/vinniesniper-54816/task1/_tools/crawer/kill_crawler.sh
+++ b/vinniesniper-54816/task1/_tools/crawer/kill_crawler.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+set -ex
+
+ps -ef|grep -i crawler|grep -v grep|awk '{print $2}'|xargs kill -9
--- a/vinniesniper-54816/task1/_tools/crawer/merge
+++ b/vinniesniper-54816/task1/_tools/crawer/merge
@@ -0,0 +1,19 @@
+import os
+import shutil
+import sys
+
+def merge_images(input_folder, output_folder):
+    if not os.path.exists(output_folder):
+        os.mkdir(output_folder)
+    count = 0
+    for root, _, filenames in os.walk(input_folder):
+        for fn in filenames:
+            if fn.endswith('.jpg'):
+                count += 1
+                shutil.copy(os.path.join(root, fn), os.path.join(output_folder, 'c_{:010d}.jpg'.format(count)))
+
+if __name__ == '__main__':
+    for f in os.listdir('output_mountain'):
+        if f.endswith('.jpg'):
+            os.remove(os.path.join('output_mountain', f))
+    merge_images("/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/p/mountain", 'output_mountain')
--- a/vinniesniper-54816/task1/_tools/crawer/merge.py
+++ b/vinniesniper-54816/task1/_tools/crawer/merge.py
@@ -0,0 +1,140 @@
+import os
+import shutil
+import sys
+from multiprocessing import Pool
+import imghdr
+import hashlib
+from PIL import Image
+
+base_dir = (
+    "/home/logic/_workspace/task-list/servers/logic-NUC8i5BEH/opencv-workdesk/001_monitor/src/003-crawler-mountain/p"
+)
+
+
+def check_same_image(file_path):
+    sha256_hash = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+
+
+def p_check_same_image(in_path):
+    for root, _, files in os.walk(in_path):
+        for file in files:
+            file_path = os.path.join(root, file)
+
+            if imghdr.what(file_path) is None:
+                print(file_path, "is not image")
+                os.remove(file_path)
+
+                continue
+
+            with open(file_path, "rb") as f:
+                contents = f.read()
+                if contents.startswith(b"\xff\xd8") and contents.endswith(b"\xff\xd9"):
+                    # it's a jpeg
+                    pass
+                else:
+                    # print(file_path, "is not valid jpeg")
+                    os.remove(file_path)
+
+
+def resize_image(file_path, max_size=1920):
+    if os.path.getsize(file_path) == 0:
+        os.remove(file_path)
+        print(f"Deleted empty file: {file_path}")
+        return
+
+    with Image.open(file_path) as img:
+        # Resize the image if width or height larger than 1024
+        if img.width > max_size or img.height > max_size:
+            # Calculate the new size maintaining the aspect ratio
+            aspect_ratio = img.width / img.height
+            if img.width > img.height:
+                new_width = min(img.width, max_size)
+                new_height = int(new_width / aspect_ratio)
+            else:
+                new_height = min(img.height, max_size)
+                new_width = int(new_height * aspect_ratio)
+
+            # Resize the image
+            resized_img = img.resize((new_width, new_height))
+
+            # Save the resized image
+            resized_img.save(file_path)
+            print("resize done " + file_path)
+        else:
+            # skipping
+            # print("skipped " + file_path)
+            return
+
+
+def resize_image_worker(file_path):
+    try:
+        resize_image(file_path)
+    except Exception as e:
+        print(file_path)
+        print(e)
+
+
+def p_resize_image(in_path):
+    from multiprocessing import Pool
+
+    for root, _, files in os.walk(in_path):
+        for file in files:
+            resize_image_worker(os.path.join(root, file))
+
+
+def merge_to_output(input_folder, output_folder):
+    if not os.path.exists(output_folder):
+        os.mkdir(output_folder)
+    count = 0
+    for root, _, filenames in os.walk(input_folder):
+        for fn in filenames:
+            if fn.endswith(".jpg"):
+                count += 1
+                shutil.copy(os.path.join(root, fn), os.path.join(output_folder, "c_{:010d}.jpg".format(count)))
+
+
+def process_dir(input_folder, output_dir):
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    for f in os.listdir(output_dir):
+        if f.endswith(".jpg"):
+            os.remove(os.path.join(output_dir, f))
+
+    merge_to_output(input_folder, output_dir)
+    p_check_same_image(output_dir)
+    p_resize_image(output_dir)
+
+
+def process_dir_mp(args):
+    print("process_dir", *args)
+    process_dir(*args)
+
+
+if __name__ == "__main__":
+
+    with Pool(processes=2) as p:
+        p.map(
+            process_dir_mp,
+            [
+                (f"{base_dir}/{t_dir}", f"output_{t_dir}")
+                for t_dir in [
+                    # "building",
+                    # "african",
+                    # "beach",
+                    # "bus",
+                    # "dinosaur",
+                    # "dish",
+                    "elephant",
+                    # "horse",
+                    # "flower",
+                    # "mountain",
+                ]
+            ],
+        )
+
+    print("done")
--- a/vinniesniper-54816/task1/_tools/crawer/notes.md
+++ b/vinniesniper-54816/task1/_tools/crawer/notes.md
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000001.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000001.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000002.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000002.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000003.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000003.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000004.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000004.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000005.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000005.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000006.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000006.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000007.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000007.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000008.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000008.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000009.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000009.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000010.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000010.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000011.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000011.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000012.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000012.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000013.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000013.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000014.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000014.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000015.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000015.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000016.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000016.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000017.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000017.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000018.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000018.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000019.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000019.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000020.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000020.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000021.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000021.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000022.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000022.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000023.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000023.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000024.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000024.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000025.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000025.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000026.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000026.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000027.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000027.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000028.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000028.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000029.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000029.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000030.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000030.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000031.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000031.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000032.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000032.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000033.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000033.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000034.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000034.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000035.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000035.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000036.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000036.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000037.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000037.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000038.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000038.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000039.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000039.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000040.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000040.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000041.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000041.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000042.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000042.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000043.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000043.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000044.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000044.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000045.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000045.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000046.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000046.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000047.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000047.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000048.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000048.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000049.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000049.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000050.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000050.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000051.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000051.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000052.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000052.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000053.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000053.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000054.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000054.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000055.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000055.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000056.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000056.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000057.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000057.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000058.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000058.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000059.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000059.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000060.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000060.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000061.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000061.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000062.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000062.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000063.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000063.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000064.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000064.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000065.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000065.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000066.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000066.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000067.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000067.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000068.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000068.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000069.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000069.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000070.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000070.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000071.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000071.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000072.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000072.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000073.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000073.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000074.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000074.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000075.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000075.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000076.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000076.jpg
--- a/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000077.jpg
+++ b/vinniesniper-54816/task1/_tools/crawer/p/bus/bing_patient_bed/000077.jpg
--- a/Show More
+++ b/Show More