How to detect the rectangular box in an image using python opencv

I just started with python and opencv very recently so I am not super knowlegable in it yet.

I am trying to detect the greyish rectangular box in a list of images.

The current version of the code works quite good but I still have issues sometimes, and the greyish box is not detected correctly.

I would appreciate if someone with more experience than me could help me to consistently detect the greyish rectangular box correctly.

Is my approach even the best? Is there a better approach that I am just not aware of ?

(I only can use CPU currently)

This is the current version of the code:

import cv2
import matplotlib.pyplot as plt
import os

def process_and_save_images(input_dir, save_subdir="processed_7"):
    # Ensure the output directory exists
    save_dir = os.path.join(input_dir, save_subdir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # List all files in the input directory
    files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]

    # Filter out only the PNG files (you can add other formats if needed)
    image_paths = [os.path.join(input_dir, f) for f in files if f.lower().endswith('.png')]

    for image_path in image_paths:
        # Load image, grayscale, adaptive threshold
        image = cv2.imread(image_path)
        
        # Crop the bottom third of the image
        height, width, _ = image.shape
        cropped = image[int(2*height/3):, :]
        result_cropped = cropped.copy()

        gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
        thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 51, 9)

        # Fill rectangular contours
        cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = cnts[0] if len(cnts) == 2 else cnts[1]
        for c in cnts:
            cv2.drawContours(thresh, [c], -1, (255,255,255), -1)

        # Morph open
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
        opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)

        # Closing operation
        closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel, iterations=2)

        # Find the contour with the largest area
        cnts = cv2.findContours(closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = cnts[0] if len(cnts) == 2 else cnts[1]

        
        if cnts:
            largest_contour = max(cnts, key=cv2.contourArea)
            
            # Draw the bounding rectangle of the largest contour
            x, y, w, h = cv2.boundingRect(largest_contour)
            cv2.rectangle(result_cropped, (x, y), (x + w, y + h), (36, 255, 12), 3)
        else:
            print(f"No contours found in {image_path}. Skipping...")

        # Use matplotlib to display the images and save them
        plt.figure(figsize=(20, 10))

        plt.subplot(1, 4, 1)
        plt.imshow(thresh, cmap='gray')
        plt.title('Thresholded Image')

        plt.subplot(1, 4, 2)
        plt.imshow(opening, cmap='gray')
        plt.title('Morphological Opening')

        plt.subplot(1, 4, 3)
        plt.imshow(closing, cmap='gray')
        plt.title('Morphological Closing')        

        plt.subplot(1, 4, 4)
        plt.imshow(cv2.cvtColor(result_cropped, cv2.COLOR_BGR2RGB))
        plt.title('Image with Largest Rectangle')

        plt.tight_layout()

        # Save the figure
        filename = os.path.basename(image_path).replace('.png', '_processed.png')
        output_path = os.path.join(save_dir, filename)
        plt.savefig(output_path)
        plt.close()  # Close the current figure to release memory

    print("Processing and saving completed.")

# Example usage:
process_and_save_images('./temporal')

Here is the result for the set of images that I am trying the code with.
https://imgur.com/a/7C78U9l

Here is the original images:
https://imgur.com/a/XKB6KHR

Here is an example where the current approach fails:
enter image description here

enter image description here

enter image description here

  • Might not be super related but since the box is for subtitle perhaps you can try opencv along with pytesseract’s OCR function to detect the bounding box of any texts, and conjunct it with the binary mask you obtained?

    – 

I think you can use cv.grabCut which is sort of designed to do things like this.
And a note on your code, you can use cv.HoughLines at the final stage and find straight lines instead of contours which are sensitive to the extra parts you segment.

Anyways here is my code and I think it works fine but I only tested a handful of your images.

import cv2 as cv
import numpy as np


img = cv.imread(IMAGE)

height, width, _ = img.shape

p0 = (5, int(2*height/3))
p1 = (width - 5, height - 5)

color_mask = cv.inRange(img, (15, 15, 15), (100, 100, 100))
color_mask = color_mask // 255

color_masked = img * color_mask[:, :, None]

mask = np.zeros(img.shape[:2],np.uint8)
bgdModel = np.zeros((1,65),np.float64)
fgdModel = np.zeros((1,65),np.float64)

rect = (*p0, *p1)
cv.grabCut(color_masked, mask, rect, bgdModel, fgdModel, 10, cv.GC_INIT_WITH_RECT)

grab_mask = np.where((mask == 2) | (mask == 0), 0, 1).astype(np.uint8)
masked = color_masked * grab_mask[:,:, np.newaxis]

kernel = cv.getStructuringElement(cv.MORPH_RECT, (5, 5))
closing = cv.morphologyEx(masked, cv.MORPH_CLOSE, kernel, iterations=2)
opening = cv.morphologyEx(closing, cv.MORPH_OPEN, kernel, iterations=2)



canny = cv.Canny(opening, 50, 150)
lines = cv.HoughLinesP(canny, 0.5, np.pi/360, 50)

for line in lines:
    line = line[0]
    cv.line(img, (line[0], line[1]), (line[2], line[3]), (255, 0, 0))

box = cv.boundingRect(lines.reshape(-1, 2))
cv.rectangle(img, box, (0, 0, 255))

cv.imshow('Result', img)
cv.waitKey(0)
cv.destroyAllWindows()

It’s mostly youre code but I used cv.garbCut to segment part of the caption (instead of thresholding) and at the end used cv.HoughLines.

You may have to adjust some parameters and keep testing and manipulating code to work fine for each image. Although you might consider using OCR as M Ciel suggested. I think it’s a more standard approch.

Leave a Comment