Files
004_comission/vinniesniper-54816/task1/_tools/crawer/crawler_flower.py
louiscklaw c7fb335275 update,
2025-01-31 22:36:21 +08:00

86 lines
2.4 KiB
Python

from icrawler.builtin import BingImageCrawler
from icrawler.builtin import FlickrImageCrawler
from icrawler.builtin import BaiduImageCrawler
from icrawler.builtin import GoogleImageCrawler
from icrawler.builtin import GreedyImageCrawler
google = "google"
bing = "bing"
baidu = "baidu"
greedy = "greedy"
# we are beach cats image detection that's why we put cat here
# if you want some other images then put that name in classes list
classes = [
"flower",
"flower over the world",
"flower in hong kong",
"flower in china",
"flower in england",
"flower in us",
"flower in australia",
"flower in brazil",
"flower in india",
"flower in japan",
"flower in russia",
"flower in south africa",
"flower in argentina",
"flower in mexico",
"flower in italy",
"flower in france",
"flower in spain",
"flower in germany",
"flower in thailand",
"flower in vietnam",
"flower in indonesia",
"flower in philippines",
"flower in malaysia",
"flower in singapore",
"flower in egypt",
"flower in turkey",
"flower in greece",
"flower in portugal",
"flower in netherlands",
"flower in belgium",
"flower in sweden",
"flower in norway",
"flower in denmark",
"flower in finland",
"flower in poland",
"flower in ukraine",
]
number = 99999
# here root directory is find your root directory there u will find
# new file name data in which all images are saved.
from multiprocessing import Pool
def crawler(s_c):
search_engine = s_c[0]
c = s_c[1]
c_dir = c.replace(" ", "_")
if search_engine == bing:
bing_crawler = BingImageCrawler(storage={"root_dir": f"p/flower/bing_{c_dir}"})
bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
elif search_engine == google:
google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/flower/google_{c_dir}"})
google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
elif search_engine == baidu:
baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/flower/baidu_{c_dir}"})
baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
process_list = []
for search_engine in [google, bing, baidu, greedy]:
for c in classes:
process_list.append([search_engine, c])
with Pool() as pool:
import random
random.shuffle(process_list)
pool.map(crawler, process_list)