update,
This commit is contained in:
14
vinniesniper-54816/task1/_lab/003-crawler-bus/crawler.py
Normal file
14
vinniesniper-54816/task1/_lab/003-crawler-bus/crawler.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from icrawler.builtin import BingImageCrawler
|
||||
from icrawler.builtin import FlickrImageCrawler
|
||||
|
||||
# we are building cats image detection that's why we put cat here
|
||||
# if you want some other images then put that name in classes list
|
||||
classes = ['buses','bus','bus over the world','bus in hong kong','bus in china','bus in england', 'bus in us','electric bus']
|
||||
number = 999
|
||||
# here root directory is find your root directory there u will find
|
||||
# new file name data in which all images are saved.
|
||||
|
||||
for i in range(99):
|
||||
for c in classes:
|
||||
bing_crawler = BingImageCrawler(storage={"root_dir": f'p/{c.replace(" ",".")}'})
|
||||
bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
|
@@ -0,0 +1,9 @@
|
||||
from icrawler.builtin import BingImageCrawler
|
||||
|
||||
classes = ["trees", "roads", "Human faces"]
|
||||
number = 100
|
||||
|
||||
for c in classes:
|
||||
# see n is represent negaive images
|
||||
bing_crawler = BingImageCrawler(storage={"root_dir": f'n/{c.replace(" ",".")}'})
|
||||
bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)
|
6
vinniesniper-54816/task1/_lab/003-crawler-bus/import_bus.sh
Executable file
6
vinniesniper-54816/task1/_lab/003-crawler-bus/import_bus.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
cp /home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816-src/task1/_lab/003-crawler/output/*.jpg \
|
||||
/home/logic/.keras/datasets/flower_photos/3xx_Bus
|
16
vinniesniper-54816/task1/_lab/003-crawler-bus/merge.py
Normal file
16
vinniesniper-54816/task1/_lab/003-crawler-bus/merge.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
def merge_images(input_folder, output_folder):
|
||||
if not os.path.exists(output_folder):
|
||||
os.mkdir(output_folder)
|
||||
|
||||
count = 0
|
||||
for root, _, filenames in os.walk(input_folder):
|
||||
for fn in filenames:
|
||||
if fn.endswith('.jpg'):
|
||||
count += 1
|
||||
shutil.copy(os.path.join(root, fn), os.path.join(output_folder, 'c_{:010d}.jpg'.format(count)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
merge_images('p', 'output')
|
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
|
||||
def find_dup(dir):
|
||||
files = {}
|
||||
for root, _, filenames in os.walk(dir):
|
||||
for f in filenames:
|
||||
file_path = os.path.join(root, f)
|
||||
with open(file_path, "rb") as file:
|
||||
file_hash = hashlib.md5(file.read()).hexdigest()
|
||||
if file_hash in files:
|
||||
files[file_hash].append(file_path)
|
||||
else:
|
||||
files[file_hash] = [file_path]
|
||||
|
||||
for file_hash, file_paths in files.items():
|
||||
if len(file_paths) > 1:
|
||||
print("dup:", file_hash)
|
||||
for file_path in file_paths:
|
||||
os.remove(file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")
|
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
|
||||
def find_dup(dir):
|
||||
files = {}
|
||||
for root, _, filenames in os.walk(dir):
|
||||
for f in filenames:
|
||||
file_path = os.path.join(root, f)
|
||||
with open(file_path, "rb") as file:
|
||||
file_hash = hashlib.md5(file.read()).hexdigest()
|
||||
if file_hash in files:
|
||||
files[file_hash].append(file_path)
|
||||
else:
|
||||
files[file_hash] = [file_path]
|
||||
|
||||
for file_hash, file_paths in files.items():
|
||||
if len(file_paths) > 1:
|
||||
print("dup:", file_hash)
|
||||
for file_path in file_paths:
|
||||
os.remove(file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")
|
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
|
||||
def find_dup(dir):
|
||||
files = {}
|
||||
for root, _, filenames in os.walk(dir):
|
||||
for f in filenames:
|
||||
file_path = os.path.join(root, f)
|
||||
with open(file_path, "rb") as file:
|
||||
file_hash = hashlib.md5(file.read()).hexdigest()
|
||||
if file_hash in files:
|
||||
files[file_hash].append(file_path)
|
||||
else:
|
||||
files[file_hash] = [file_path]
|
||||
|
||||
for file_hash, file_paths in files.items():
|
||||
if len(file_paths) > 1:
|
||||
print("dup:", file_hash)
|
||||
for file_path in file_paths:
|
||||
os.remove(file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")
|
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
|
||||
def find_dup(dir):
|
||||
files = {}
|
||||
for root, _, filenames in os.walk(dir):
|
||||
for f in filenames:
|
||||
file_path = os.path.join(root, f)
|
||||
with open(file_path, "rb") as file:
|
||||
file_hash = hashlib.md5(file.read()).hexdigest()
|
||||
if file_hash in files:
|
||||
files[file_hash].append(file_path)
|
||||
else:
|
||||
files[file_hash] = [file_path]
|
||||
|
||||
for file_hash, file_paths in files.items():
|
||||
if len(file_paths) > 1:
|
||||
print("dup:", file_hash)
|
||||
for file_path in file_paths:
|
||||
os.remove(file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")
|
9
vinniesniper-54816/task1/_lab/003-crawler-bus/run.sh
Executable file
9
vinniesniper-54816/task1/_lab/003-crawler-bus/run.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
rm -rf p/*
|
||||
rm -rf n/*
|
||||
|
||||
python ./crawler.py
|
||||
# python ./crawler_n.py
|
Reference in New Issue
Block a user