I'm trying to ocr some numbers:
And I have made this code to test different psm arguments (6,7,8,13), I don't see much difference.
import osimport pytesseractimport matplotlib.pyplot as pltimport cv2import numpy as nppytesseract.pytesseract.tesseract_cmd = ( r"path/to/tesseract")def apply_tesseract(image_path, psm): image = cv2.imread(image_path) text = pytesseract.image_to_string(image, config=f"--psm {psm} digits") return image, textdef display_images_with_text(images, texts): num_images = len(images) num_rows = min(3, num_images) num_cols = (num_images + num_rows - 1) // num_rows fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 8), subplot_kw={'xticks': [], 'yticks': []}) for i, (image, text) in enumerate(zip(images, texts)): ax = axes[i // num_cols, i % num_cols] if num_rows > 1 else axes[i % num_cols] ax.imshow(image) ax.axis("off") ax.set_title(text) plt.show()def main(folder_path): for psm in [6]: images = [] texts = [] for filename in os.listdir(folder_path): if filename.lower().endswith((".png")): image_path = os.path.join(folder_path, filename) image, text = apply_tesseract(image_path, psm) images.append(image) texts.append(text) display_images_with_text(images, texts)if __name__ == "__main__": folder_path = r"./digitImages" main(folder_path)
This is the output of --psm 6
As you can see, it's not that good.
How can I improve this? the number images are already black and white and quite small, I've tried some processing but I end up with the same black and white image.
# Read the original imageoriginal_image = cv2.imread(image_path)new_width = original_image.shape[1] * 2 # Double the widthnew_height = original_image.shape[0] * 2 # Double the heightresized_image = cv2.resize(original_image, (new_width, new_height))# Convert the original image to grayscalegray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)# Sharpen the blurred imagesharpen_kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])sharpen = cv2.filter2D(gray, -1, sharpen_kernel)# Apply Otsu's thresholding to the blurred imagethresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_OTSU)[1]