Quantcast
Channel: Active questions tagged python - Stack Overflow
Viewing all articles
Browse latest Browse all 16478

Digit OCR using Tesseract

$
0
0

I'm trying to ocr some numbers:

enter image description hereenter image description hereenter image description hereenter image description hereenter image description hereenter image description here

And I have made this code to test different psm arguments (6,7,8,13), I don't see much difference.

import osimport pytesseractimport matplotlib.pyplot as pltimport cv2import numpy as nppytesseract.pytesseract.tesseract_cmd = (    r"path/to/tesseract")def apply_tesseract(image_path, psm):    image = cv2.imread(image_path)    text = pytesseract.image_to_string(image, config=f"--psm {psm} digits")    return image, textdef display_images_with_text(images, texts):    num_images = len(images)    num_rows = min(3, num_images)    num_cols = (num_images + num_rows - 1) // num_rows    fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 8), subplot_kw={'xticks': [], 'yticks': []})    for i, (image, text) in enumerate(zip(images, texts)):        ax = axes[i // num_cols, i % num_cols] if num_rows > 1 else axes[i % num_cols]        ax.imshow(image)        ax.axis("off")        ax.set_title(text)    plt.show()def main(folder_path):    for psm in [6]:        images = []        texts = []        for filename in os.listdir(folder_path):            if filename.lower().endswith((".png")):                image_path = os.path.join(folder_path, filename)                image, text = apply_tesseract(image_path, psm)                images.append(image)                texts.append(text)        display_images_with_text(images, texts)if __name__ == "__main__":    folder_path = r"./digitImages"    main(folder_path)

This is the output of --psm 6

enter image description here

As you can see, it's not that good.

How can I improve this? the number images are already black and white and quite small, I've tried some processing but I end up with the same black and white image.

# Read the original imageoriginal_image = cv2.imread(image_path)new_width = original_image.shape[1] * 2  # Double the widthnew_height = original_image.shape[0] * 2  # Double the heightresized_image = cv2.resize(original_image, (new_width, new_height))# Convert the original image to grayscalegray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)# Sharpen the blurred imagesharpen_kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])sharpen = cv2.filter2D(gray, -1, sharpen_kernel)# Apply Otsu's thresholding to the blurred imagethresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_OTSU)[1]

Viewing all articles
Browse latest Browse all 16478

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>