import cv2 as cv import numpy as np from glob import glob import os, sys from pprint import pprint # the directory of the image database database_dir = "image.orig" # Compute pixel-by-pixel difference and return the sum def compareImgs(img1, img2): # resize img2 to img1 img2 = cv.resize(img2, (img1.shape[1], img1.shape[0])) diff = cv.absdiff(img1, img2) return diff.sum() def compareImgs_hist(img1, img2): width, height = img1.shape[1], img1.shape[0] img2 = cv.resize(img2, (width, height)) num_bins = 10 hist1 = [0] * num_bins hist2 = [0] * num_bins bin_width = 255.0 / num_bins + 1e-4 # compute histogram from scratch # for w in range(width): # for h in range(height): # hist1[int(img1[h, w] / bin_width)] += 1 # hist2[int(img2[h, w] / bin_width)] += 1 # compute histogram by using opencv function # https://docs.opencv.org/4.x/d6/dc7/group__imgproc__hist.html#ga4b2b5fd75503ff9e6844cc4dcdaed35d hist1 = cv.calcHist([img1], [0], None, [num_bins], [0, 255]) hist2 = cv.calcHist([img2], [0], None, [num_bins], [0, 255]) sum = 0 for i in range(num_bins): sum += abs(hist1[i] - hist2[i]) return sum / float(width * height) def color_layout_descriptor(image, num_blocks=8, resize_to_px=256): resized_image = cv.resize(image, (resize_to_px, resize_to_px)) ycrcb_image = cv.cvtColor(resized_image, cv.COLOR_BGR2YCrCb) # Step 5: Divide the image into sub-blocks block_size = int(256 / num_blocks) blocks = [] for i in range(num_blocks): for j in range(num_blocks): block = ycrcb_image[ i * block_size : (i + 1) * block_size, j * block_size : (j + 1) * block_size, ] blocks.append(block) # Step 6: Extract features from each sub-block i = 0 features = [] center_features = [] for block in blocks: # Compute the mean and standard deviation of each color channel mean_y, mean_cr, mean_cb = np.mean(block, axis=(0, 1)) std_y, std_cr, std_cb = np.std(block, axis=(0, 1)) features.extend([mean_y, mean_cr, mean_cb, std_y, std_cr, std_cb]) if (i == 28) or (i == 29) or (i == 36) or (i == 37): center_features.extend([mean_y, mean_cr, mean_cb, std_y, std_cr, std_cb]) i += 1 # Step 7: Concatenate features into a single vector cld_full_picture = np.array(features) cld_center = np.array(center_features) return (cld_full_picture, cld_center) def colorlayoutCompare(cld1, cld2): distance = np.linalg.norm(cld1 - cld2) similarity = 1.0 / (1.0 + distance) return (distance, similarity) def retrieval(choice="3"): print("testing retrieval ...") # print("1: beach") # print("2: building") # print("3: bus") # print("4: dinosaur") # print("5: flower") # print("6: horse") # print("7: man") # choice = input("Type in the number to choose a category and type enter to confirm\n") if choice == "1": img_input = cv.imread("beach.jpg") print("test: %s - beach" % choice) if choice == "2": img_input = cv.imread("building.jpg") print("test: %s - building" % choice) if choice == "3": img_input = cv.imread("bus.jpg") print("test: %s - bus" % choice) if choice == "4": img_input = cv.imread("dinosaur.jpg") print("test: %s - dinosaur" % choice) if choice == "5": img_input = cv.imread("flower.jpg") print("test: %s - flower" % choice) if choice == "6": img_input = cv.imread("horse.jpg") print("test: %s - horse" % choice) if choice == "7": img_input = cv.imread("man.jpg") print("test: %s - man" % choice) min_diff = 1e50 # src_input = cv.imread("man.jpg") # cv.imshow("Input", img_input) # change the image to gray scale # src_gray = cv.cvtColor(img_input, cv.COLOR_BGR2GRAY) # read image database database = sorted(glob(database_dir + "/*.jpg")) descriptors = [] (cld1, cld1_center) = color_layout_descriptor(img_input) for img in database: print(f"processing {img}", end="\r") # read image img_rgb = cv.imread(img) (cld2, cld2_center) = color_layout_descriptor(img_rgb) # compare the two images # diff = compareImgs(img_input, img_rgb) (diff_full, s_full) = colorlayoutCompare(cld1, cld2) (diff_center, s_center) = colorlayoutCompare(cld1_center, cld2_center) # compare the two images by histogram, uncomment the following line to use histogram # diff = compareImgs_hist(src_gray, img_gray) # print(img, diff) len_good_matches = SIFT_compare(img_input, img_rgb) descriptors.append([img, img_rgb, s_full, s_center, len_good_matches]) print("\nprocess done") normalized_descriptors = [] max_s_full = max(descriptors, key=lambda x: x[2])[2] min_s_full = min(descriptors, key=lambda x: x[2])[2] max_s_center = max(descriptors, key=lambda x: x[3])[3] min_s_center = min(descriptors, key=lambda x: x[3])[3] max_good_matches = max(descriptors, key=lambda x: x[4])[4] min_good_matches = min(descriptors, key=lambda x: x[4])[4] for descriptor in descriptors: normalized_s_full = (descriptor[2] - min_s_full) / (max_s_full - min_s_full) normalized_s_center = (descriptor[3] - min_s_center) / (max_s_center - min_s_center) normalized_good_matches = (descriptor[4] - min_good_matches) / (max_good_matches - min_good_matches) normalized_descriptors.append([descriptor[0], descriptor[1], normalized_s_full, normalized_s_center, normalized_good_matches]) print("\nnormalized descriptors done") for descriptor in normalized_descriptors: print(descriptor[0], descriptor[2], descriptor[3], descriptor[4]) import csv with open("descriptor.csv", "w", newline="") as csvfile: spamwriter = csv.writer(csvfile) spamwriter.writerow(["image", "similarity_full", "similarity_center", "good_matches"]) for descriptor in normalized_descriptors: spamwriter.writerow([descriptor[0], descriptor[2], descriptor[3], descriptor[4]]) import xlsxwriter workbook = xlsxwriter.Workbook("descriptor.xlsx") worksheet = workbook.add_worksheet() row = 0 worksheet.write(row, 0, "image") worksheet.write(row, 1, "similarity_full") worksheet.write(row, 2, "similarity_center") worksheet.write(row, 3, "good_matches") row += 1 for descriptor in normalized_descriptors: worksheet.write(row, 0, descriptor[0]) worksheet.write(row, 1, descriptor[2]) worksheet.write(row, 2, descriptor[3]) worksheet.write(row, 3, descriptor[4]) row += 1 workbook.close() sys.exit() # sort by s_full, largest first descriptors.sort(key=lambda x: x[2]) result = descriptors[0] closest_img = result[1] # get feature match diff_with_matches = [] for descriptor in descriptors[0:5]: # SIFT_debug(img_input, diff[2]) full_file_name = descriptor[0] img = descriptor[1] diff_full = descriptor[2] diff_center = descriptor[3] filename_only = full_file_name.replace("image.orig/", "") category = filename_only[0] len_good_matches = SIFT_compare(img_input, img) diff_with_matches.append([full_file_name, img, len_good_matches, 0, s_full, s_center]) matches = sorted(diff_with_matches, key=lambda x: x[4], reverse=True) for match in matches: pprint((match[0], match[2], match[4], match[5])) [full_file_name, closest_img, len_good_matches, _, s_full, s_center] = matches[0] # print("the most similar image is %s, the pixel-by-pixel difference is %f " % (result, min_diff)) # print("\n") cv.imshow("Result", closest_img) # cv.waitKey(0) cv.destroyAllWindows() filename_only = full_file_name.replace("image.orig/", "") category = filename_only[0] print("f:" + filename_only + ": c:" + category) return category def SIFT(): img1 = cv.imread("flower.jpg") img2 = cv.imread("image.orig/685.jpg") if img1 is None or img2 is None: print("Error loading images!") exit(0) # -- Step 1: Detect the keypoints using SIFT Detector, compute the descriptors minHessian = 400 detector = cv.SIFT_create() keypoints1, descriptors1 = detector.detectAndCompute(img1, None) keypoints2, descriptors2 = detector.detectAndCompute(img2, None) # -- Step 2: Matching descriptor vectors with a brute force matcher matcher = cv.DescriptorMatcher_create(cv.DescriptorMatcher_BRUTEFORCE) matches = matcher.match(descriptors1, descriptors2) # -- Draw matches img_matches = np.empty((max(img1.shape[0], img2.shape[0]), img1.shape[1] + img2.shape[1], 3), dtype=np.uint8) cv.drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches) # -- Show detected matches cv.imshow("Matches: SIFT (Python)", img_matches) cv.waitKey() # draw good matches matches = sorted(matches, key=lambda x: x.distance) min_dist = matches[0].distance good_matches = tuple(filter(lambda x: x.distance <= 2 * min_dist, matches)) img_matches = np.empty((max(img1.shape[0], img2.shape[0]), img1.shape[1] + img2.shape[1], 3), dtype=np.uint8) cv.drawMatches(img1, keypoints1, img2, keypoints2, good_matches, img_matches, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS) # -- Show detected matches cv.imshow("Good Matches: SIFT (Python)", img_matches) cv.waitKey() def SIFT_debug(img1, img2): # img1 = cv.imread("flower.jpg") # img2 = cv.imread("image.orig/685.jpg") if img1 is None or img2 is None: print("Error loading images!") exit(0) # -- Step 1: Detect the keypoints using SIFT Detector, compute the descriptors minHessian = 400 detector = cv.SIFT_create() keypoints1, descriptors1 = detector.detectAndCompute(img1, None) keypoints2, descriptors2 = detector.detectAndCompute(img2, None) # -- Step 2: Matching descriptor vectors with a brute force matcher matcher = cv.DescriptorMatcher_create(cv.DescriptorMatcher_BRUTEFORCE) matches = matcher.match(descriptors1, descriptors2) # -- Draw matches img_matches = np.empty((max(img1.shape[0], img2.shape[0]), img1.shape[1] + img2.shape[1], 3), dtype=np.uint8) cv.drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches) # -- Show detected matches cv.imshow("Matches: SIFT (Python)", img_matches) cv.waitKey() # draw good matches matches = sorted(matches, key=lambda x: x.distance) min_dist = matches[0].distance good_matches = tuple(filter(lambda x: x.distance <= 2 * min_dist, matches)) img_matches = np.empty((max(img1.shape[0], img2.shape[0]), img1.shape[1] + img2.shape[1], 3), dtype=np.uint8) cv.drawMatches(img1, keypoints1, img2, keypoints2, good_matches, img_matches, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS) # -- Show detected matches cv.imshow("Good Matches: SIFT (Python)", img_matches) cv.waitKey() def SIFT_compare(img1, img2): # img1 = cv.imread("flower.jpg") # img2 = cv.imread("image.orig/685.jpg") if img1 is None or img2 is None: print("Error loading images!") exit(0) # -- Step 1: Detect the keypoints using SIFT Detector, compute the descriptors minHessian = 400 detector = cv.SIFT_create() keypoints1, descriptors1 = detector.detectAndCompute(img1, None) keypoints2, descriptors2 = detector.detectAndCompute(img2, None) # -- Step 2: Matching descriptor vectors with a brute force matcher matcher = cv.DescriptorMatcher_create(cv.DescriptorMatcher_BRUTEFORCE) matches = matcher.match(descriptors1, descriptors2) # -- Draw matches img_matches = np.empty((max(img1.shape[0], img2.shape[0]), img1.shape[1] + img2.shape[1], 3), dtype=np.uint8) # cv.drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches) # -- Show detected matches # cv.imshow('Matches: SIFT (Python)', img_matches) # cv.waitKey() # draw good matches matches = sorted(matches, key=lambda x: x.distance) min_dist = matches[0].distance good_matches = tuple(filter(lambda x: x.distance <= 2 * min_dist, matches)) img_matches = np.empty((max(img1.shape[0], img2.shape[0]), img1.shape[1] + img2.shape[1], 3), dtype=np.uint8) # cv.drawMatches(img1, keypoints1, img2, keypoints2, good_matches, img_matches, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS) # -- Show detected matches # cv.imshow('Good Matches: SIFT (Python)', img_matches) # cv.waitKey() return len(good_matches) def test(): test_result = True # 1, 2, 4, 7 for i in [6]: if str(i) == retrieval(str(i)): print("test ok") else: test_result = False if test_result: print("all test ok") pass else: print("some test failed") pass def main(): test() sys.exit() print("1: Image retrieval demo") print("2: SIFT demo") number = int(input("Type in the number to choose a demo and type enter to confirm\n")) if number == 1: retrieval() elif number == 2: SIFT() # pass else: print("Invalid input") exit() main()