Files
004_comission/vinniesniper-54816/task1/_tools/crawer/crawler_mountain.py
louiscklaw c7fb335275 update,
2025-01-31 22:36:21 +08:00

84 lines
2.4 KiB
Python

from icrawler.builtin import BingImageCrawler
from icrawler.builtin import FlickrImageCrawler
from icrawler.builtin import BaiduImageCrawler
from icrawler.builtin import GoogleImageCrawler
from icrawler.builtin import GreedyImageCrawler
google = "google"
bing = "bing"
baidu = "baidu"
greedy = "greedy"
# we are building cats image detection that's why we put cat here
# if you want some other images then put that name in classes list
classes = [
"mountain",
"mountain over the world",
"mountain in hong kong",
"mountain in china",
"mountain in england",
"mountain in us",
"mountain in australia",
"mountain in brazil",
"mountain in india",
"mountain in japan",
"mountain in russia",
"mountain in south africa",
"mountain in argentina",
"mountain in mexico",
"mountain in italy",
"mountain in france",
"mountain in spain",
"mountain in germany",
"mountain in thailand",
"mountain in vietnam",
"mountain in indonesia",
"mountain in philippines",
"mountain in malaysia",
"mountain in singapore",
"mountain in egypt",
"mountain in turkey",
"mountain in greece",
"mountain in portugal",
"mountain in netherlands",
"mountain in belgium",
"mountain in sweden",
"mountain in norway",
"mountain in denmark",
"mountain in finland",
]
number = 99999
# here root directory is find your root directory there u will find
# new file name data in which all images are saved.
from multiprocessing import Pool
def crawler(s_c):
search_engine = s_c[0]
c = s_c[1]
c_dir = c.replace(" ", "_")
if search_engine == bing:
bing_crawler = BingImageCrawler(storage={"root_dir": f"p/mountain/bing_{c_dir}"})
bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
elif search_engine == google:
google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/mountain/google_{c_dir}"})
google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
elif search_engine == baidu:
baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/mountain/baidu_{c_dir}"})
baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
process_list = []
for search_engine in [google, bing, baidu, greedy]:
for c in classes:
process_list.append([search_engine, c])
with Pool() as pool:
import random
random.shuffle(process_list)
pool.map(crawler, process_list)