Tesseract text detection in computer vision

Computer vision involves extracting information from visual data and allows us to perform complex tasks such as classification, prediction, recognition, and much more! In this Answer, we'll look at how to detect text using Tesseract in media, a classic optical character recognition application.

Optical character recognition

Optical character recognition (OCR), is a revolutionary technology that enables machines to interpret and convert images of text into machine-readable formats. It allows us to utilize the potential of printed or handwritten text.

Simply put, the goal of OCR is to convert the human perception of characters and convert them into machine-encoded text.

from pytesseract import *
import cv2

def process_image(image_path):
    img = cv2.imread(image_path)
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    save_text = image_to_data(rgb, output_type=Output.DICT)

    for i in range(0, len(save_text["text"])):
        x = save_text["left"][i]
        y = save_text["top"][i]
        w = save_text["width"][i]
        h = save_text["height"][i]

        text = save_text["text"][i]
        confidence_level = int(save_text["conf"][i])

        if confidence_level > 75:
            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), 2)
            (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
            cv2.rectangle(img, (x, y - text_height - 5), (x + text_width, y), (255, 255, 255), -1)
            cv2.putText(img, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)


    return img

if __name__ == "__main__":
    input_image_path = 'sample_img.png'
    processed_image = process_image(input_image_path)

    cv2.imshow("Image", processed_image)
    cv2.waitKey(0)

from pytesseract import *
import cv2

def process_image(image_path):
    img = cv2.imread(image_path)
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    save_text = image_to_data(rgb, output_type=Output.DICT)

    for i in range(0, len(save_text["text"])):
        x = save_text["left"][i]
        y = save_text["top"][i]
        w = save_text["width"][i]
        h = save_text["height"][i]

        text = save_text["text"][i]
        confidence_level = int(save_text["conf"][i])

        if confidence_level > 75:
            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), 2)
            (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
            cv2.rectangle(img, (x, y - text_height - 5), (x + text_width, y), (255, 255, 255), -1)
            cv2.putText(img, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)
            
            print(f"Confidence: {confidence_level}")
            print(f"Text: {text}\n")

    return img

if __name__ == "__main__":
    input_image_path = 'sample_img.png'
    processed_image = process_image(input_image_path)

    cv2.imshow("Image", processed_image)
    cv2.waitKey(0)

from pytesseract import *
import cv2

def process_image(image):
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    save_text = image_to_data(rgb, output_type=Output.DICT)

    for i in range(0, len(save_text["text"])):
        x = save_text["left"][i]
        y = save_text["top"][i]
        w = save_text["width"][i]
        h = save_text["height"][i]

        text = save_text["text"][i]
        confidence_level = int(save_text["conf"][i])

        if confidence_level > 75:
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), 2)
            (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
            cv2.rectangle(image, (x, y - text_height - 5), (x + text_width, y), (255, 255, 255), -1)
            cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)

    return image

def process_video(video_path):
    video = cv2.VideoCapture(video_path)

    while video.isOpened():
        ret, frame = video.read()
        if not ret:
            break

        processed_frame = process_image(frame)

        cv2.imshow('Video', processed_frame)

        if cv2.waitKey(1) == 27:
            break


    video.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    input_video_url = 'https://player.vimeo.com/external/581763177.sd.mp4?s=7c0e1dbf0a173ca1c9c3ac37a05c2498f905ad11&profile_id=165&oauth2_token_id=57447761'
    process_video(input_video_url)

Tesseract text detection in computer vision

Optical character recognition

Text detection

Tesseract

Generic code walkthrough

Text detection in images

Text detection demonstration in images

Terminal output code

Printing the text on the terminal

Text detection in videos

Text detection demonstration in videos