from icrawler.builtin import BingImageCrawler from icrawler.builtin import FlickrImageCrawler from icrawler.builtin import BaiduImageCrawler from icrawler.builtin import GoogleImageCrawler from icrawler.builtin import GreedyImageCrawler google = "google" bing = "bing" baidu = "baidu" greedy = "greedy" # we are beach cats image detection that's why we put cat here # if you want some other images then put that name in classes list classes = [ "horse", "horse over the world", "horse in hong kong", "horse in china", "horse in england", "horse in us", "horse in australia", "horse in brazil", "horse in india", "horse in japan", "horse in russia", "horse in south africa", "horse in argentina", "horse in mexico", "horse in italy", "horse in france", "horse in spain", "horse in germany", "horse in thailand", "horse in vietnam", "horse in indonesia", "horse in philippines", "horse in malaysia", "horse in singapore", "horse in egypt", "horse in turkey", "horse in greece", "horse in portugal", "horse in netherlands", "horse in belgium", "horse in sweden", "horse in norway", "horse in denmark", "horse in finland", "horse in poland", "horse in ukraine", ] number = 99999 # here root directory is find your root directory there u will find # new file name data in which all images are saved. from multiprocessing import Pool def crawler(s_c): search_engine = s_c[0] c = s_c[1] c_dir = c.replace(" ", "_") if search_engine == bing: bing_crawler = BingImageCrawler(storage={"root_dir": f"p/horse/bing_{c_dir}"}) bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0) elif search_engine == google: google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/horse/google_{c_dir}"}) google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0) elif search_engine == baidu: baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/horse/baidu_{c_dir}"}) baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0) process_list = [] for search_engine in [google, bing, baidu, greedy]: for c in classes: process_list.append([search_engine, c]) with Pool() as pool: import random random.shuffle(process_list) pool.map(crawler, process_list)