Binarize low contrast images

Question

I have a bunch of image-snippets with low contrast which I'd like to binarize using python.

I tried varies thresholding methods like Otsu and Huang, but none seems to work for all my image snippets.

Following instructions like this one, I puzzled together the code below:

import cv2
import numpy as np
from matplotlib import pyplot as plt
import math
import glob
import os.path
import os


def permissions(targetfile):    
    os.chmod(targetfile, mode=0o755)
    os.chown(targetfile, 1000, 1000)

 #resize snippet
def resize( image):
    image_resized = cv2.resize(image, None, fx=12, fy=12)
    return image_resized

#Apply clahe    
def clahe( image):
    # Clahe parameters
    cl1 = 6
    cl2 = 9
    cl3 = 9 
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=cl1, tileGridSize=(cl2, cl3))
    cv_gray_clahe = clahe.apply(image_gray)
    return cv_gray_clahe

# Binarize image using Huangs method (https://github.com/dnhkng/Huang-Thresholding)
def binarize( image):
    # image = np.array(image)                                 # image needs to be of class 'numpy.ndarray'
    histogram, bin_edges = np.histogram(image, bins=range(257))
    huang_threshold = Huang(histogram)
    huang_threshold = huang_threshold

    threshold = np.where(image > huang_threshold, 1, 0)
    threshold = threshold.astype(np.uint8)
    return threshold


def Huang(data):
    """Implements Huang's fuzzy thresholding method 
        Uses Shannon's entropy function (one can also use Yager's entropy function) 
        Huang L.-K. and Wang M.-J.J. (1995) "Image Thresholding by Minimizing  
        the Measures of Fuzziness" Pattern Recognition, 28(1): 41-51"""    
    threshold=-1

    first_bin=  0
    for ih in range(254):
        if data[ih] != 0:
            first_bin = ih
            break
     
    last_bin=254;
    for ih in range(254,-1,-1):
        if data[ih] != 0:
            last_bin = ih
            break

    term = 1.0 / (last_bin - first_bin)
    
    # print (first_bin, last_bin, term)
    mu_0 = np.zeros(shape=(254,1))
    num_pix = 0.0
    sum_pix = 0.0
    for ih in range(first_bin,254):
        sum_pix = sum_pix + (ih * data[ih])
        num_pix = num_pix + data[ih]
        mu_0[ih] = sum_pix / num_pix # NUM_PIX cannot be zero !

    mu_1 = np.zeros(shape=(254,1))
    num_pix = 0.0
    sum_pix = 0.0
    for ih in range(last_bin, 1, -1 ):
        sum_pix = sum_pix + (ih * data[ih])
        num_pix = num_pix + data[ih]

        mu_1[ih-1] = sum_pix / num_pix # NUM_PIX cannot be zero !

    min_ent = float("inf")
    for it in range(254): 
        ent = 0.0
        for ih in range(it):
            # Equation (4) in Reference
            mu_x = 1.0 / ( 1.0 + term * math.fabs( ih - mu_0[it]))
            if ( not ((mu_x  < 1e-06 ) or (mu_x > 0.999999))):

                # Equation (6) & (8) in Reference
                ent = ent + data[ih] * (-mu_x * math.log(mu_x) - (1.0 - mu_x) * math.log(1.0 - mu_x) )
        
        
        for ih in range(it + 1, 254):
            # Equation (4) in Ref. 1 */
            mu_x = 1.0 / (1.0 + term * math.fabs( ih - mu_1[it]))
            if ( not((mu_x  < 1e-06 ) or ( mu_x > 0.999999))):
                # Equation (6) & (8) in Reference
                ent = ent + data[ih] * (-mu_x * math.log(mu_x) - (1.0 - mu_x) * math.log(1.0 - mu_x) )  
        if (ent < min_ent):
            min_ent = ent
            threshold = it
        # print ("min_ent, threshold ", min_ent, threshold)
    return threshold


#Inputfiles:
path = glob.glob("./" + "*.JPG")
path.extend(glob.glob("./" + "*.jpg"))

#Output directory
targetdir = "./output/"
os.makedirs( targetdir, exist_ok=True)
permissions(targetdir)

for img in path: 
    
    poststring = ""
    targetfile = targetdir + os.path.basename(img).split('.')[0] + poststring + \
                    os.path.splitext(img)[1]
    # Change filename of targetfile
    

    if not os.path.exists(targetfile):                    
        print("Processing targetfile: ", targetfile)
        
        # read image and resize
        image = cv2.imread(img)        
        resized_image = resize(image)   
        
        #clahe
        clahe_image = clahe(resized_image)
        denoised_image = cv2.fastNlMeansDenoising(clahe_image, h = 21, templateWindowSize = 9, searchWindowSize = 21)
        
        #huang thresholding
        binarized_image = binarize( denoised_image)
        binarized_image *= 255
        
        # dilate
        kernel = np.ones((12,12),np.uint8)
        dilate = cv2.dilate(binarized_image,kernel,iterations=3)

        # Flood fill
        h, w = image.shape[:2]

        for row in range(h):
            if dilate[row, 0] == 255:
                cv2.floodFill(dilate, None, (0, row), 0)
            if dilate[row, w-1] == 255:
                cv2.floodFill(dilate, None, (w-1, row), 0)

        for col in range(w):
            if dilate[0, col] == 255:
                cv2.floodFill(dilate, None, (col, 0), 0)
            if dilate[h-1, col] == 255:
                cv2.floodFill(dilate, None, (col, h-1), 0)

        
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))
        foreground = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, kernel)
        foreground = cv2.morphologyEx(foreground, cv2.MORPH_CLOSE, kernel)

        # Creating background
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (17, 17))
        background = cv2.dilate(foreground, kernel, iterations=3)
        
        cv2.imwrite(targetfile, background)
        permissions(targetfile)
        
    else:
        print("Skipping, because already existing: ", targetfile)
        permissions(targetfile)
        
    print('')

The result is still not satisfying:

Could you please advise on how to loose the noise, maintain the desired features and receive straight/ellipse-like contourlines?

Adding the original snippets here for testing purposes: download snippets

Since you know the number of shapes that you want to find, you can clean up some of the noise by computing the connected components, then keep only the 5 largest. You could then start a flood-fill from the center of each component with some tighter thresholds to find its boundaries. — Homer512, Oct 06 '22 at 09:55
learn about signal to noise ratio. your data has nearly no signal left, even if noise is fairly low. this data is junk. you can throw it away. 11 and 14 at least, and a few of the others. — Christoph Rackwitz, Oct 06 '22 at 09:58
Please share some actual images, rather than screenshots of them - one decent one and a couple of problematic ones maybe. Thank you. — Mark Setchell, Oct 06 '22 at 10:12
Why do I know the number of shapes within a snippet? I know that there is always the center + at least 2 shapes as part of the outer ring. But since the ring has 20 bits, I could end up with 1 + 10 = 11 shapes. The snippets.zip which are ready to download are no screenshots. They are cropped from the original image. I will add some snippets with good contrast later today. — dejhost, Oct 06 '22 at 11:08
how did you even make those crops? they surely come from a larger image, and the crops were made by hand, right? so your problem isn't actually treating those crops, but FINDING the patterns in the first place — Christoph Rackwitz, Oct 06 '22 at 13:52
I bet you’d get better results on these examples if you avoid CLAHE and such nonsense. Just threshold each image as it is! — Cris Luengo, Oct 06 '22 at 14:03
The snippets are cropped from the original image, using a ML-Model to detect these patterns/markers. So they are not handmade. I have a second ML-model that finds the center-feature within the snippets. Here is a link to the snippets, now comprising a few examples with better contrast - and some with even worse: [extended snippets](https://nextcloud.subseascanning.com/index.php/s/kdXzgPXRFNRL8b3) I used a plugin in the tool "ImageJ" to compare 17 different threshold for binarization. In my opinion, resizing, clahe a.s.o. improve the results... — dejhost, Oct 06 '22 at 15:22
Increase the contrast dramatically then threshold then use morphology to close up small gaps — fmw42, Oct 06 '22 at 15:27
if your ML models can find these things, you can make your ML models emit a segmentation too, or even decode the markers. — Christoph Rackwitz, Oct 06 '22 at 20:57

score 3 · Answer 1 · answered Oct 06 '22 at 20:04

Here is one approach by stretching the contrast first. It may need tuning for other images.

Read the input
Stretch the contrast
Convert to gray
Adaptive threshold
Get contours and filter on area larger than some threshold
Draw white filled contours on black background for those contours that pass the filter
Save the results

Input:

import cv2
import numpy as np
import skimage.exposure

# load image
img = cv2.imread('low_contrast.png')

# stretch contrast
stretch = skimage.exposure.rescale_intensity(img, in_range=(95,115), out_range=(0,255)).astype(np.uint8)

# Gaussian blur
blur = cv2.GaussianBlur(stretch, (0,0), sigmaX=5, sigmaY=5)

# convert to gray
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)

# threshold
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 101, -9)

# get contours and filter on area
contour_img = img.copy()
result = np.zeros_like(thresh)
contours = cv2.findContours(thresh , cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
    area = cv2.contourArea(cntr)
    if area > 2000:
        cv2.drawContours(contour_img, [cntr], 0, (255,255,255), 1)
        cv2.drawContours(result, [cntr], 0, (255), -1)


cv2.imwrite('low_contrast_stretched.png', stretch)
cv2.imwrite('low_contrast_blur.png', blur)
cv2.imwrite('low_contrast_gray.png', gray)
cv2.imwrite('low_contrast_thresh.png', thresh)
cv2.imwrite('low_contrast_contours.png', contour_img)
cv2.imwrite('low_contrast_contours_filled.png', result)

cv2.imshow('stretch', stretch)
cv2.imshow('blur', blur)
cv2.imshow('gray', gray)
cv2.imshow('thresh', thresh)
cv2.imshow('contours', contour_img)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()

Contrast stretched image: