This commit is contained in:
louiscklaw
2025-01-31 22:57:47 +08:00
parent b1cd1d4662
commit b3cc8e8323
764 changed files with 722101 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
from icrawler.builtin import BingImageCrawler
from icrawler.builtin import FlickrImageCrawler
# we are building cats image detection that's why we put cat here
# if you want some other images then put that name in classes list
classes = ['buses','bus','bus over the world','bus in hong kong','bus in china','bus in england', 'bus in us','electric bus']
number = 999
# here root directory is find your root directory there u will find
# new file name data in which all images are saved.
for i in range(99):
for c in classes:
bing_crawler = BingImageCrawler(storage={"root_dir": f'p/{c.replace(" ",".")}'})
bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)

View File

@@ -0,0 +1,9 @@
from icrawler.builtin import BingImageCrawler
classes = ["trees", "roads", "Human faces"]
number = 100
for c in classes:
# see n is represent negaive images
bing_crawler = BingImageCrawler(storage={"root_dir": f'n/{c.replace(" ",".")}'})
bing_crawler.crawl(keyword=c, filters=None, max_num=number, offset=0)

View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -ex
cp /home/logic/_wsl_workspace/comission-playlist-2024/vinniesniper-54816-src/task1/_lab/003-crawler/output/*.jpg \
/home/logic/.keras/datasets/flower_photos/3xx_Bus

View File

@@ -0,0 +1,16 @@
import os
import shutil
def merge_images(input_folder, output_folder):
if not os.path.exists(output_folder):
os.mkdir(output_folder)
count = 0
for root, _, filenames in os.walk(input_folder):
for fn in filenames:
if fn.endswith('.jpg'):
count += 1
shutil.copy(os.path.join(root, fn), os.path.join(output_folder, 'c_{:010d}.jpg'.format(count)))
if __name__ == '__main__':
merge_images('p', 'output')

View File

@@ -0,0 +1,25 @@
import os
import hashlib
def find_dup(dir):
files = {}
for root, _, filenames in os.walk(dir):
for f in filenames:
file_path = os.path.join(root, f)
with open(file_path, "rb") as file:
file_hash = hashlib.md5(file.read()).hexdigest()
if file_hash in files:
files[file_hash].append(file_path)
else:
files[file_hash] = [file_path]
for file_hash, file_paths in files.items():
if len(file_paths) > 1:
print("dup:", file_hash)
for file_path in file_paths:
os.remove(file_path)
if __name__ == "__main__":
find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")

View File

@@ -0,0 +1,25 @@
import os
import hashlib
def find_dup(dir):
files = {}
for root, _, filenames in os.walk(dir):
for f in filenames:
file_path = os.path.join(root, f)
with open(file_path, "rb") as file:
file_hash = hashlib.md5(file.read()).hexdigest()
if file_hash in files:
files[file_hash].append(file_path)
else:
files[file_hash] = [file_path]
for file_hash, file_paths in files.items():
if len(file_paths) > 1:
print("dup:", file_hash)
for file_path in file_paths:
os.remove(file_path)
if __name__ == "__main__":
find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")

View File

@@ -0,0 +1,25 @@
import os
import hashlib
def find_dup(dir):
files = {}
for root, _, filenames in os.walk(dir):
for f in filenames:
file_path = os.path.join(root, f)
with open(file_path, "rb") as file:
file_hash = hashlib.md5(file.read()).hexdigest()
if file_hash in files:
files[file_hash].append(file_path)
else:
files[file_hash] = [file_path]
for file_hash, file_paths in files.items():
if len(file_paths) > 1:
print("dup:", file_hash)
for file_path in file_paths:
os.remove(file_path)
if __name__ == "__main__":
find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")

View File

@@ -0,0 +1,25 @@
import os
import hashlib
def find_dup(dir):
files = {}
for root, _, filenames in os.walk(dir):
for f in filenames:
file_path = os.path.join(root, f)
with open(file_path, "rb") as file:
file_hash = hashlib.md5(file.read()).hexdigest()
if file_hash in files:
files[file_hash].append(file_path)
else:
files[file_hash] = [file_path]
for file_hash, file_paths in files.items():
if len(file_paths) > 1:
print("dup:", file_hash)
for file_path in file_paths:
os.remove(file_path)
if __name__ == "__main__":
find_dup("/home/logic/_wsl_workspace/comission-playlist/vinniesniper-54816/task1/_lab/003-crawler-bus/p/flower")

View File

@@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -ex
rm -rf p/*
rm -rf n/*
python ./crawler.py
# python ./crawler_n.py