90 lines
2.7 KiB
Python
90 lines
2.7 KiB
Python
from icrawler.builtin import BingImageCrawler
|
|
from icrawler.builtin import FlickrImageCrawler
|
|
from icrawler.builtin import BaiduImageCrawler
|
|
from icrawler.builtin import GoogleImageCrawler
|
|
from icrawler.builtin import GreedyImageCrawler
|
|
|
|
google = "google"
|
|
bing = "bing"
|
|
baidu = "baidu"
|
|
greedy = "greedy"
|
|
|
|
# we are beach cats image detection that's why we put cat here
|
|
# if you want some other images then put that name in classes list
|
|
classes = [
|
|
"african people",
|
|
"african people over the world",
|
|
"african people in hong kong",
|
|
"african people in china",
|
|
"african people in england",
|
|
"african people in us",
|
|
"african people in australia",
|
|
"african people in brazil",
|
|
"african people in india",
|
|
"african people in japan",
|
|
"african people in russia",
|
|
"african people in south africa",
|
|
"african people in argentina",
|
|
"african people in mexico",
|
|
"african people in italy",
|
|
"african people in france",
|
|
"african people in spain",
|
|
"african people in germany",
|
|
"african people in thailand",
|
|
"african people in vietnam",
|
|
"african people in indonesia",
|
|
"african people in philippines",
|
|
"african people in malaysia",
|
|
"african people in singapore",
|
|
"african people in egypt",
|
|
"african people in turkey",
|
|
"african people in greece",
|
|
"african people in portugal",
|
|
"african people in netherlands",
|
|
"african people in belgium",
|
|
"african people in sweden",
|
|
"african people in norway",
|
|
"african people in denmark",
|
|
"african people in finland",
|
|
"african people in poland",
|
|
"african people in ukraine",
|
|
]
|
|
|
|
number = 99999
|
|
# here root directory is find your root directory there u will find
|
|
# new file name data in which all images are saved.
|
|
|
|
from multiprocessing import Pool
|
|
|
|
|
|
def crawler(s_c):
|
|
search_engine = s_c[0]
|
|
c = s_c[1]
|
|
c_dir = c.replace(" ", "_")
|
|
|
|
if search_engine == bing:
|
|
bing_crawler = BingImageCrawler(storage={"root_dir": f"p/african/bing_{c_dir}"})
|
|
bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
|
|
elif search_engine == google:
|
|
google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/african/google_{c_dir}"})
|
|
google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
|
|
elif search_engine == baidu:
|
|
baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/african/baidu_{c_dir}"})
|
|
baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
|
|
|
|
|
|
process_list = []
|
|
for search_engine in [
|
|
google,
|
|
bing,
|
|
baidu,
|
|
]:
|
|
for c in classes:
|
|
process_list.append([search_engine, c])
|
|
|
|
with Pool() as pool:
|
|
import random
|
|
|
|
random.shuffle(process_list)
|
|
pool.map(crawler, process_list)
|