from icrawler.builtin import BingImageCrawler from icrawler.builtin import FlickrImageCrawler from icrawler.builtin import BaiduImageCrawler from icrawler.builtin import GoogleImageCrawler from icrawler.builtin import GreedyImageCrawler google = "google" bing = "bing" baidu = "baidu" greedy = "greedy" # we are beach cats image detection that's why we put cat here # if you want some other images then put that name in classes list classes = [ "bus", "bus over the world", "bus in hong kong", "bus in china", "bus in england", "bus in us", "bus in australia", "bus in brazil", "bus in india", "bus in japan", "bus in russia", "bus in south africa", "bus in argentina", "bus in mexico", "bus in italy", "bus in france", "bus in spain", "bus in germany", "bus in thailand", "bus in vietnam", "bus in indonesia", "bus in philippines", "bus in malaysia", "bus in singapore", "bus in egypt", "bus in turkey", "bus in greece", "bus in portugal", "bus in netherlands", "bus in belgium", "bus in sweden", "bus in norway", "bus in denmark", "bus in finland", "bus in poland", "bus in ukraine", ] number = 99999 # here root directory is find your root directory there u will find # new file name data in which all images are saved. from multiprocessing import Pool def crawler(s_c): search_engine = s_c[0] c = s_c[1] c_dir = c.replace(" ", "_") if search_engine == bing: bing_crawler = BingImageCrawler(storage={"root_dir": f"p/bus/bing_{c_dir}"}) bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0) elif search_engine == google: google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/bus/google_{c_dir}"}) google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0) elif search_engine == baidu: baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/bus/baidu_{c_dir}"}) baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0) process_list = [] for search_engine in [google, bing, baidu, greedy]: for c in classes: process_list.append([search_engine, c]) with Pool() as pool: import random random.shuffle(process_list) pool.map(crawler, process_list)