update,

2025-01-31 22:36:21 +08:00
parent f4fc0b8f71
commit c7fb335275
1399 changed files with 5714 additions and 0 deletions
--- a/vinniesniper-54816/task1/_tools/crawer/crawler_dinosaur.py
+++ b/vinniesniper-54816/task1/_tools/crawer/crawler_dinosaur.py
@@ -0,0 +1,85 @@
+from icrawler.builtin import BingImageCrawler
+from icrawler.builtin import FlickrImageCrawler
+from icrawler.builtin import BaiduImageCrawler
+from icrawler.builtin import GoogleImageCrawler
+from icrawler.builtin import GreedyImageCrawler
+
+google = "google"
+bing = "bing"
+baidu = "baidu"
+greedy = "greedy"
+
+# we are beach cats image detection that's why we put cat here
+# if you want some other images then put that name in classes list
+classes = [
+    "dinosaur",
+    "dinosaur over the world",
+    "dinosaur in hong kong",
+    "dinosaur in china",
+    "dinosaur in england",
+    "dinosaur in us",
+    "dinosaur in australia",
+    "dinosaur in brazil",
+    "dinosaur in india",
+    "dinosaur in japan",
+    "dinosaur in russia",
+    "dinosaur in south africa",
+    "dinosaur in argentina",
+    "dinosaur in mexico",
+    "dinosaur in italy",
+    "dinosaur in france",
+    "dinosaur in spain",
+    "dinosaur in germany",
+    "dinosaur in thailand",
+    "dinosaur in vietnam",
+    "dinosaur in indonesia",
+    "dinosaur in philippines",
+    "dinosaur in malaysia",
+    "dinosaur in singapore",
+    "dinosaur in egypt",
+    "dinosaur in turkey",
+    "dinosaur in greece",
+    "dinosaur in portugal",
+    "dinosaur in netherlands",
+    "dinosaur in belgium",
+    "dinosaur in sweden",
+    "dinosaur in norway",
+    "dinosaur in denmark",
+    "dinosaur in finland",
+    "dinosaur in poland",
+    "dinosaur in ukraine",
+]
+
+number = 99999
+# here root directory is find your root directory there u will find
+# new file name data in which all images are saved.
+
+from multiprocessing import Pool
+
+
+def crawler(s_c):
+    search_engine = s_c[0]
+    c = s_c[1]
+    c_dir = c.replace(" ", "_")
+
+    if search_engine == bing:
+        bing_crawler = BingImageCrawler(storage={"root_dir": f"p/dinosaur/bing_{c_dir}"})
+        bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == google:
+        google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/dinosaur/google_{c_dir}"})
+        google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+    elif search_engine == baidu:
+        baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/dinosaur/baidu_{c_dir}"})
+        baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
+
+
+process_list = []
+for search_engine in [google, bing, baidu, greedy]:
+    for c in classes:
+        process_list.append([search_engine, c])
+
+with Pool() as pool:
+    import random
+
+    random.shuffle(process_list)
+    pool.map(crawler, process_list)