update,
This commit is contained in:
86
vinniesniper-54816/task1/_tools/crawer/crawler_horse.py
Normal file
86
vinniesniper-54816/task1/_tools/crawer/crawler_horse.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from icrawler.builtin import BingImageCrawler
|
||||
from icrawler.builtin import FlickrImageCrawler
|
||||
from icrawler.builtin import BaiduImageCrawler
|
||||
from icrawler.builtin import GoogleImageCrawler
|
||||
from icrawler.builtin import GreedyImageCrawler
|
||||
|
||||
google = "google"
|
||||
bing = "bing"
|
||||
baidu = "baidu"
|
||||
greedy = "greedy"
|
||||
|
||||
|
||||
# we are beach cats image detection that's why we put cat here
|
||||
# if you want some other images then put that name in classes list
|
||||
classes = [
|
||||
"horse",
|
||||
"horse over the world",
|
||||
"horse in hong kong",
|
||||
"horse in china",
|
||||
"horse in england",
|
||||
"horse in us",
|
||||
"horse in australia",
|
||||
"horse in brazil",
|
||||
"horse in india",
|
||||
"horse in japan",
|
||||
"horse in russia",
|
||||
"horse in south africa",
|
||||
"horse in argentina",
|
||||
"horse in mexico",
|
||||
"horse in italy",
|
||||
"horse in france",
|
||||
"horse in spain",
|
||||
"horse in germany",
|
||||
"horse in thailand",
|
||||
"horse in vietnam",
|
||||
"horse in indonesia",
|
||||
"horse in philippines",
|
||||
"horse in malaysia",
|
||||
"horse in singapore",
|
||||
"horse in egypt",
|
||||
"horse in turkey",
|
||||
"horse in greece",
|
||||
"horse in portugal",
|
||||
"horse in netherlands",
|
||||
"horse in belgium",
|
||||
"horse in sweden",
|
||||
"horse in norway",
|
||||
"horse in denmark",
|
||||
"horse in finland",
|
||||
"horse in poland",
|
||||
"horse in ukraine",
|
||||
]
|
||||
|
||||
number = 99999
|
||||
# here root directory is find your root directory there u will find
|
||||
# new file name data in which all images are saved.
|
||||
|
||||
from multiprocessing import Pool
|
||||
|
||||
|
||||
def crawler(s_c):
|
||||
search_engine = s_c[0]
|
||||
c = s_c[1]
|
||||
c_dir = c.replace(" ", "_")
|
||||
|
||||
if search_engine == bing:
|
||||
bing_crawler = BingImageCrawler(storage={"root_dir": f"p/horse/bing_{c_dir}"})
|
||||
bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
|
||||
elif search_engine == google:
|
||||
google_crawler = GoogleImageCrawler(storage={"root_dir": f"p/horse/google_{c_dir}"})
|
||||
google_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
|
||||
elif search_engine == baidu:
|
||||
baidu_crawler = BaiduImageCrawler(storage={"root_dir": f"p/horse/baidu_{c_dir}"})
|
||||
baidu_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
|
||||
|
||||
|
||||
process_list = []
|
||||
for search_engine in [google, bing, baidu, greedy]:
|
||||
for c in classes:
|
||||
process_list.append([search_engine, c])
|
||||
|
||||
with Pool() as pool:
|
||||
import random
|
||||
|
||||
random.shuffle(process_list)
|
||||
pool.map(crawler, process_list)
|
Reference in New Issue
Block a user