autocropper update #22

Merged
ewellenr merged 2 commits from autocropper into main 2023-11-13 23:24:04 -05:00
2 changed files with 255 additions and 32 deletions

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 136,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 137,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 138,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
@ -99,7 +99,7 @@
},
{
"cell_type": "code",
"execution_count": 139,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@ -228,7 +228,7 @@
},
{
"cell_type": "code",
"execution_count": 140,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
@ -250,7 +250,7 @@
},
{
"cell_type": "code",
"execution_count": 141,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
@ -270,6 +270,7 @@
" kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))\n",
" kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))\n",
" kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))\n",
" adaptivekernel = None\n",
" \n",
" # return lab[:,:,2]\n",
"\n",
@ -283,11 +284,32 @@
" # imglist = []\n",
"\n",
" Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)\n",
" \n",
" # return Bthresh\n",
"\n",
" contours, heirarchy = cv2.findContours(255-Bthresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" # imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)\n",
" # return imgcopy\n",
" \n",
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
"\n",
" for i, contour in enumerate(contours):\n",
" b = cv2.boundingRect(contour)\n",
" boundingboxes[i] = b\n",
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)\n",
" # return imgcopy\n",
" \n",
" epsilonvalue = np.median(boundingboxes, axis=0)[3]\n",
" \n",
" adaptivekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(epsilonvalue/15), int(epsilonvalue/15)))\n",
" \n",
" # imglist.append(Bthresh)\n",
" # imglist.append(255-Bthresh)\n",
" \n",
" morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)\n",
" # morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, adaptivekernel, iterations=2)\n",
" goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)\n",
" # goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, adaptivekernel, iterations=3)\n",
" # morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)\n",
" # imglist.append(morphedBthresh)\n",
" # imglist.append(goodmorphBthresh)\n",
@ -319,7 +341,9 @@
" mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)\n",
" \n",
" bingus = cv2.bitwise_or(goodmorphBthresh, mask)\n",
" # bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel)\n",
" # imglist.append(bingus)\n",
" # return imglist\n",
" return bingus\n",
" \n",
" # imglist.append(image)\n",
@ -427,7 +451,103 @@
},
{
"cell_type": "code",
"execution_count": 142,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# funtion to correct the median-angle to give it to the cv2.warpaffine() function\n",
"def anglecorrector(angle):\n",
" if 0 <= angle <= 90:\n",
" corrected_angle = angle - 90\n",
" elif -45 <= angle < 0:\n",
" corrected_angle = angle - 90\n",
" elif -90 <= angle < -45:\n",
" corrected_angle = 90 + angle\n",
" return corrected_angle"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):\n",
" borderType = cv2.BORDER_CONSTANT\n",
" out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)\n",
" return out\n",
"\n",
"def mergecontours(contours):\n",
" cont = np.vstack(contours)\n",
" finalcontour = cv2.convexHull(cont)\n",
" return finalcontour"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"def getSkewAngle(cvImage) -> float:\n",
" # Prep image, copy, convert to gray scale, blur, and threshold\n",
" newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))\n",
" # return newImage\n",
" gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)\n",
" blur = cv2.GaussianBlur(gray, (9, 9), 0)\n",
" thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]\n",
"\n",
" # Apply dilate to merge text into meaningful lines/paragraphs.\n",
" # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.\n",
" # But use smaller kernel on Y axis to separate between different blocks of text\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))\n",
" dilate = cv2.dilate(thresh, kernel, iterations=5)\n",
" # return dilate\n",
"\n",
" # Find all contours\n",
" contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n",
" contours = sorted(contours, key = cv2.contourArea, reverse = True)\n",
"\n",
" # Find largest contour and surround in min area box\n",
" largestContour = contours[0]\n",
"\n",
" mergedcontour = mergecontours(contours)\n",
"\n",
" # return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)\n",
" minAreaRect = cv2.minAreaRect(mergedcontour)\n",
" minAreaRect = list(minAreaRect)\n",
" minAreaRect[1] = list(minAreaRect[1])\n",
" if (minAreaRect[1][0] > minAreaRect[1][1]):\n",
" temp = minAreaRect[1][0]\n",
" minAreaRect[1][0] = minAreaRect[1][1]\n",
" minAreaRect[1][1] = temp\n",
" minAreaRect[2] -= 90\n",
" # return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)\n",
" # minAreaRect = cv2.minAreaRect(largestContour)\n",
"\n",
" box = cv2.boxPoints(minAreaRect)\n",
" box = np.intp(box) \n",
" newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)\n",
" # return newImage\n",
"\n",
" # Determine the angle. Convert it to the value that was originally used to obtain skewed image\n",
" angle = minAreaRect[-1]\n",
" # print(angle)\n",
" angle = anglecorrector(angle)+90\n",
" # print(angle)\n",
" return angle\n",
"\n",
"def minboxdeskew(img, fill=(0,0,0)):\n",
" colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)\n",
" angle = getSkewAngle(colourimg)\n",
" padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)\n",
" rotated = mf.rotate(padimg, angle, fill=fill)\n",
" return rotated"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
@ -435,11 +555,11 @@
" whitedbackground = mf.whiteoutbackground(image)\n",
" # return whitedbackground\n",
" \n",
" textrefined = mf.textClarifying(whitedbackground)\n",
" textrefined = textClarifying(whitedbackground)\n",
" # return textrefined\n",
" #maybe now is when I put in the line removing function\n",
" \n",
" lineout = removeLinesFromText(textrefined)\n",
" lineout = mf.removeLinesFromText(textrefined)\n",
" \n",
" return lineout\n",
" # implement a function that's called refine text\n",
@ -453,9 +573,13 @@
" # return postprocessed\n",
" postprocessed = mf.croptoblack(postprocessed)\n",
" \n",
" postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
" # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
" # return postprocessed\n",
" \n",
" final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
" # final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
" final = minboxdeskew(postprocessed, fill=(255,255,255))\n",
" \n",
" # final = mf.croptoblack(final)\n",
" \n",
" # cv2.imshow(\"postprocessed\", mf.ResizeWithAspectRatio(postprocessed, 1000))\n",
" # cv2.imshow(\"final\", mf.ResizeWithAspectRatio(final, 1000))\n",
@ -467,19 +591,19 @@
},
{
"cell_type": "code",
"execution_count": 143,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# prepped, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
"outs = houghlineprocessing(img)\n",
"outs = mf.houghlineprocessing(img)\n",
"# print(croprect)\n",
"#need to fix premorphCrop. it removes too much"
]
},
{
"cell_type": "code",
"execution_count": 144,
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
@ -493,7 +617,7 @@
},
{
"cell_type": "code",
"execution_count": 145,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
@ -505,7 +629,7 @@
},
{
"cell_type": "code",
"execution_count": 146,
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [

View File

@ -81,6 +81,28 @@ def colourscaler(n, min, max):
diff = abs(max - min)
return clip((temp/diff)*255, 0, 255)
def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):
borderType = cv2.BORDER_CONSTANT
out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)
return out
def mergecontours(contours):
cont = np.vstack(contours)
finalcontour = cv2.convexHull(cont)
return finalcontour
# funtion to correct the median-angle to give it to the cv2.warpaffine() function
# specifically, when getting the angle from a minAreaRect rectangle
def anglecorrector(angle):
if 0 <= angle <= 90:
corrected_angle = angle - 90
elif -45 <= angle < 0:
corrected_angle = angle - 90
elif -90 <= angle < -45:
corrected_angle = 90 + angle
return corrected_angle
tensorize = v2.Compose([v2.ToImageTensor(), v2.ConvertImageDtype()]) ## for converting an image (usually PIL image) to a pytorch tensor
## ------------------------------for selective segmentation search crop------------------------------
@ -649,23 +671,19 @@ def textClarifying(image):
## Try using the LAB colour space???
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)
autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
# hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
# kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
kernel4 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
# kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))
kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))
#This will probably need fixing later because it should be in proportion to the character/word height vs actual picture dimension
kdim = 5-(max(gray.shape[0], gray.shape[1])//1000)
print(kdim)
kernel8 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kdim, kdim))
adaptivekernel = None
# return lab[:,:,2]
@ -679,11 +697,32 @@ def textClarifying(image):
# imglist = []
Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)
# return Bthresh
contours, heirarchy = cv2.findContours(255-Bthresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)
# return imgcopy
boundingboxes = np.empty((len(contours), 4), dtype=int)
for i, contour in enumerate(contours):
b = cv2.boundingRect(contour)
boundingboxes[i] = b
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)
# return imgcopy
epsilonvalue = np.median(boundingboxes, axis=0)[3]
adaptivekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(epsilonvalue/15), int(epsilonvalue/15)))
# imglist.append(Bthresh)
# imglist.append(255-Bthresh)
morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)
# morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, adaptivekernel, iterations=2)
goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)
# goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, adaptivekernel, iterations=3)
# morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)
# imglist.append(morphedBthresh)
# imglist.append(goodmorphBthresh)
@ -698,7 +737,7 @@ def textClarifying(image):
# imglist.append(morphedthresh)
# anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)
anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)
anded2 = cv2.bitwise_and(morphedBthresh, 255-morphedthresh)
# imglist.append(anded1)
# imglist.append(anded2)
@ -715,8 +754,9 @@ def textClarifying(image):
mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)
bingus = cv2.bitwise_or(goodmorphBthresh, mask)
bingus = cv2.morphologyEx(bingus, cv2.MORPH_ERODE, kernel8)
# bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel)
# imglist.append(bingus)
# return imglist
return bingus
@ -942,7 +982,60 @@ def externaldeskew(image, fill=(0,0,0), alreadygray=False):
rotated = rotate(image, angle, fill=fill)
return rotated
def getreceipttextAngle(cvImage) -> float:
# Prep image, copy, convert to gray scale, blur, and threshold
newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))
# return newImage
gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9, 9), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Apply dilate to merge text into meaningful lines/paragraphs.
# Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
# But use smaller kernel on Y axis to separate between different blocks of text
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
dilate = cv2.dilate(thresh, kernel, iterations=5)
# return dilate
# Find all contours
contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key = cv2.contourArea, reverse = True)
# Find largest contour and surround in min area box
largestContour = contours[0]
mergedcontour = mergecontours(contours)
# return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)
minAreaRect = cv2.minAreaRect(mergedcontour)
minAreaRect = list(minAreaRect)
minAreaRect[1] = list(minAreaRect[1])
if (minAreaRect[1][0] > minAreaRect[1][1]):
temp = minAreaRect[1][0]
minAreaRect[1][0] = minAreaRect[1][1]
minAreaRect[1][1] = temp
minAreaRect[2] -= 90
# return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)
# minAreaRect = cv2.minAreaRect(largestContour)
box = cv2.boxPoints(minAreaRect)
box = np.intp(box)
newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)
# return newImage
# Determine the angle. Convert it to the value that was originally used to obtain skewed image
angle = minAreaRect[-1]
# print(angle)
angle = anglecorrector(angle)+90
# print(angle)
return angle
def receipttextdeskew(img, fill=(0,0,0)):
colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
angle = getreceipttextAngle(colourimg)
padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)
rotated = rotate(padimg, angle, fill=fill)
return rotated
## ------------------------------Full deskewing and cropping------------------------------
def houghlineprocessing(image):
@ -953,10 +1046,16 @@ def houghlineprocessing(image):
postprocessed = cropclarifying(croppedanddeskewed)
# return postprocessed
postprocessed = croptoblack(postprocessed)
# postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)
# return postprocessed
# final = externaldeskew(postprocessed, fill=(255,255,255))
final = receipttextdeskew(postprocessed, fill=(255,255,255))
final = cv2.cvtColor(final, cv2.COLOR_GRAY2BGR)
postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)
final = externaldeskew(postprocessed, fill=(255,255,255))
# final = mf.croptoblack(final)
# cv2.imshow("postprocessed", mf.ResizeWithAspectRatio(postprocessed, 1000))
# cv2.imshow("final", mf.ResizeWithAspectRatio(final, 1000))