Updating text refiner #11

Merged
ewellenr merged 15 commits from autocropper into main 2023-10-30 00:38:09 -04:00
5 changed files with 2133 additions and 412 deletions

View File

@ -62,8 +62,4 @@
"thread": "cpp",
"typeinfo": "cpp"
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"python.formatting.provider": "none",
}

View File

@ -0,0 +1,553 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2535,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import myfunctions as mf\n",
"import numpy as np\n",
"import math\n",
"import scipy.stats as st"
]
},
{
"cell_type": "code",
"execution_count": 2536,
"metadata": {},
"outputs": [],
"source": [
"img = cv2.imread('./test_images/IMG_7605.jpg')"
]
},
{
"cell_type": "code",
"execution_count": 2537,
"metadata": {},
"outputs": [],
"source": [
"def whiteoutbackground(image):\n",
" # imagecpy = image.copy()\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" \n",
" # blur = cv2.blur(gray, (7,7))\n",
" \n",
" # window = 51\n",
" window = gray.shape[1]//8\n",
" if window % 2 == 0:\n",
" window += 1\n",
" thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 2)\n",
" # return thresh\n",
" \n",
" \n",
" \n",
" contours, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" \n",
" biggestcontour = max(contours, key=cv2.contourArea)\n",
" # imagecpy = cv2.drawContours(imagecpy, [biggestcontour], -1, (0,255,0), thickness=3)\n",
" # return imagecpy\n",
" \n",
" blank = np.full(thresh.shape, 255, dtype=np.uint8)\n",
" mask = blank.copy()\n",
" mask = cv2.drawContours(mask, [biggestcontour], -1, (0,0,0), thickness=cv2.FILLED)\n",
" \n",
" invertmask = 255 - mask\n",
" \n",
" \n",
" dim = int(min(invertmask.shape[0], invertmask.shape[1])/100)\n",
" # # dim = 21\n",
" # print(dim)\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
" # invertmask = cv2.morphologyEx(invertmask, cv2.MORPH_DILATE, kernel)\n",
" mask = 255 - cv2.morphologyEx(invertmask, cv2.MORPH_ERODE, kernel, iterations=1)\n",
" # # return mask\n",
" \n",
" mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)\n",
" whitedbackground = cv2.bitwise_or(image, mask)\n",
" # return whitedbackground\n",
" \n",
" gray2 = cv2.cvtColor(whitedbackground, cv2.COLOR_BGR2GRAY)\n",
"\n",
" canny = cv2.Canny(gray2, 0, 500, None, 3)\n",
" \n",
" maskgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)\n",
" \n",
" \n",
" dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)\n",
" invert = 255-maskgray\n",
" # dim = 21\n",
" # print(dim)\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
" morphedmask = cv2.morphologyEx(invert, cv2.MORPH_OPEN, kernel, iterations=10)\n",
" # return 255 - morphedmask\n",
" morphedmask = 255 - morphedmask\n",
"\n",
" finalmask = cv2.bitwise_or(morphedmask, maskgray)\n",
" \n",
" # edgecontours, _ = cv2.findContours(255-morphedmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" finalmaskbgr = cv2.cvtColor(finalmask, cv2.COLOR_GRAY2BGR)\n",
" # return finalmaskbgr\n",
"\n",
" whitedbackground = cv2.bitwise_or(whitedbackground, finalmaskbgr)\n",
" # return whitedbackground\n",
" \n",
" test = cv2.inpaint(whitedbackground, finalmask, 3, cv2.INPAINT_TELEA)\n",
" return test"
]
},
{
"cell_type": "code",
"execution_count": 2538,
"metadata": {},
"outputs": [],
"source": [
"def fillLines(image):\n",
" edges = cv2.Canny(image, 0, 500, 3)\n",
" # return edges\n",
" maxgap = int(min(image.shape[0], image.shape[1])/30)\n",
" # print(maxgap)\n",
" minlength = int(min(image.shape[0], image.shape[1])/2)\n",
" linesP = cv2.HoughLinesP(edges, 1, np.pi / 180, 50, None, minlength, maxgap)\n",
" for line in linesP:\n",
" l = line[0]\n",
" image = cv2.line(image, (l[0], l[1]), (l[2], l[3]), 0, thickness=1)\n",
" # colourimage = cv2.line(colourimage, (l[0], l[1]), (l[2], l[3]), (0,255,0), thickness=3)\n",
" return image\n",
"\n",
"\n",
"def removeCardinalLines(image, horizontal=False):\n",
" axis = 0\n",
" if (horizontal):\n",
" cardinal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))\n",
" axis = 1\n",
" else:\n",
" cardinal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,15))\n",
" lines = cv2.morphologyEx(image, cv2.MORPH_OPEN, cardinal_kernel, iterations=2)\n",
" # return lines\n",
"\n",
" mask = np.zeros(image.shape, dtype=np.uint8)\n",
" contours, _ = cv2.findContours(255-lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" # mask = cv2.drawContours(mask, contours, -1, 255, thickness=3)\n",
" # return mask\n",
" \n",
" \n",
" boxes = []\n",
" dims = np.array([])\n",
" rects = []\n",
" for contour in contours:\n",
" rect = cv2.minAreaRect(contour)\n",
" rect = list(rect)\n",
" rect[1]=list(rect[1])\n",
" if (rect[1][axis] > rect[1][1-axis]):\n",
" rect[2] = rect[2] -90\n",
" temp = rect[1][1]\n",
" rect[1][1]=rect[1][0]\n",
" rect[1][0]=temp\n",
" # print(rect)\n",
" rects.append(rect)\n",
" dims = np.append(dims, rect[1][axis])\n",
" \n",
" # box = cv2.boxPoints(rect)\n",
" # box = np.intp(box)\n",
" # boxes.append(box) \n",
" # mask = cv2.drawContours(mask, [box], -1, 255, thickness=2)\n",
" # break\n",
" # return mask\n",
" # print(dims)\n",
" meddim = np.median(dims)\n",
" # print(meddim)\n",
" \n",
" for rect in rects:\n",
" # print(rect[1][axis])\n",
" # print(meddim/2)\n",
" # print(rect[1][1-axis])\n",
" # print(rect[1][axis])\n",
" if (rect[1][axis] < meddim/2 and rect[1][1-axis] > image.shape[axis]/5):\n",
" box = cv2.boxPoints(rect)\n",
" box = np.intp(box)\n",
" # boxes.append(box) \n",
" # mask = cv2.drawContours(mask, [box], -1, 255, thickness=2)\n",
" image = cv2.drawContours(image, [box], -1, 255, thickness=cv2.FILLED)\n",
" \n",
" # return mask\n",
" \n",
" return image\n",
"\n",
"\n",
"def removeLinesFromText(image):\n",
" image = removeCardinalLines(image)\n",
" image = removeCardinalLines(image, horizontal=True)\n",
" return image\n",
" \n",
" \n",
" \n",
" colourimage = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)\n",
" imgcopy = image.copy()\n",
" \n",
" \n",
" for i in range(0,7):\n",
" imgcopy = fillLines(imgcopy)\n",
" \n",
" return imgcopy\n",
" \n",
" # maxgap = int(min(image.shape[0], image.shape[1])/20)\n",
" edges = cv2.Canny(imgcopy, 0, 500, 3)\n",
" # return edges\n",
" maxgap = int(min(imgcopy.shape[0], imgcopy.shape[1])/30)\n",
" # print(maxgap)\n",
" minlength = int(min(imgcopy.shape[0], imgcopy.shape[1])/2)\n",
" linesP = cv2.HoughLinesP(edges, 1, np.pi / 180, 50, None, minlength, maxgap)\n",
" \n",
" # print(len(linesP))\n",
" # print(linesP)\n",
" # mask = np.zeros(colourimage.shape, dtype=np.uint8)\n",
"\n",
" \n",
" for line in linesP:\n",
" l = line[0]\n",
" # colourimage = cv2.line(colourimage, (l[0], l[1]), (l[2], l[3]), (0,255,0), thickness=20)\n",
" image = cv2.line(image, (l[0], l[1]), (l[2], l[3]), 255, thickness=20)\n",
" # mask = cv2.line(mask, (l[0], l[1]), (l[2], l[3]), (255,255,255), thickness=3)\n",
" \n",
" # kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (4, 4))\n",
" # image = 255-cv2.morphologyEx(255-image, cv2.MORPH_OPEN, kernel)\n",
" # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))\n",
" # image = 255-cv2.morphologyEx(255-image, cv2.MORPH_DILATE, kernel)\n",
"\n",
" # return colourimage\n",
" return image\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 2539,
"metadata": {},
"outputs": [],
"source": [
"def reduceColours(x, centering=127):\n",
" a=0.00008\n",
" b=40\n",
" c=256\n",
" x = x.astype(int)\n",
" # value = np.cbrt((x-centering)/a)+centering\n",
" value = -((c+4)/(1+np.exp((x-centering)/b)))+c\n",
" value = np.clip(value, 0, 255)\n",
" return value.astype(np.uint8)\n",
"\n",
"def bwadjustment(image, center=127):\n",
" gray = reduceColours(image,center)\n",
" \n",
" return gray"
]
},
{
"cell_type": "code",
"execution_count": 2540,
"metadata": {},
"outputs": [],
"source": [
"def textClarifying(image):\n",
" \n",
" ## Try using the LAB colour space???\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)\n",
" \n",
" lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)\n",
" hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)\n",
" \n",
" kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))\n",
" kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))\n",
" kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))\n",
" kernel4 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))\n",
" kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))\n",
" kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))\n",
" kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))\n",
" \n",
" # return lab[:,:,2]\n",
"\n",
" currentimgofatype = lab[:,:,0] # L-channel: expresses the brightness in the image\n",
" # currentimgofatype = lab[:,:,1] # A-channel: expresses variation of color in the image between red and green\n",
" # currentimgofatype = lab[:,:,2] # B-channel: expresses variation of color in the image between yellow and blue\n",
" \n",
" # currentimgofatype = hls[:,:,0]\n",
" # currentimgofatype = hls[:,:,1]\n",
" # currentimgofatype = hls[:,:,2]\n",
" # imglist = []\n",
"\n",
" Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)\n",
" # imglist.append(Bthresh)\n",
" # imglist.append(255-Bthresh)\n",
" \n",
" morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)\n",
" goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)\n",
" # morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)\n",
" # imglist.append(morphedBthresh)\n",
" # imglist.append(goodmorphBthresh)\n",
" \n",
" \n",
" thresh = cv2.threshold(currentimgofatype, 0, 255, cv2.THRESH_OTSU)[1]\n",
" # imglist.append(thresh)\n",
" \n",
" morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel6)\n",
" morphedthresh = cv2.morphologyEx(morphedthresh, cv2.MORPH_ERODE, kernel7)\n",
" \n",
" \n",
" \n",
" # imglist.append(morphedthresh)\n",
" anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)\n",
" anded2 = cv2.bitwise_and(morphedBthresh, 255-morphedthresh)\n",
" # imglist.append(anded1)\n",
" # imglist.append(anded2)\n",
" \n",
" contours, other = cv2.findContours(anded2, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)\n",
" # print(other)\n",
" \n",
" mask = np.full(gray.shape,fill_value=255, dtype=np.uint8)\n",
" \n",
" for i, contour in enumerate(contours):\n",
" if (other[0][i][2] != -1 and other[0][i][3] == -1):\n",
" b = cv2.boundingRect(contour)\n",
" # image = cv2.rectangle(image, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3)\n",
" mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)\n",
" \n",
" bingus = cv2.bitwise_or(goodmorphBthresh, mask)\n",
" # imglist.append(bingus)\n",
" return bingus\n",
" \n",
" # imglist.append(image)\n",
" \n",
" # reversedxor = 255-cv2.bitwise_and(255-Bthresh, 255-morphedthresh)\n",
" # # imglist.append(reversedxor)\n",
" \n",
" # morphedanded = cv2.morphologyEx(anded1, cv2.MORPH_DILATE, kernel5)\n",
" # # imglist.append(morphedanded)\n",
" \n",
" # testout = cv2.bitwise_or(Bthresh, morphedanded)\n",
" # # imglist.append(testout)\n",
" \n",
" # # # blurred = cv2.GaussianBlur(anded, (21,21), 0)\n",
" # # # imglist.append(blurred)\n",
" \n",
" # # # blurthresh = cv2.adaptiveThreshold(255-blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)\n",
" # # # imglist.append(blurthresh)\n",
" \n",
" # # # Bthreshmasked = cv2.bitwise_and(gray, gray, mask=anded)\n",
" # # # imglist.append(Bthreshmasked)\n",
" \n",
" # # # thresh2 = cv2.threshold(Bthreshmasked, autothreshold, 255, cv2.THRESH_BINARY)[1]\n",
" # # # imglist.append(thresh2)\n",
" \n",
" \n",
" \n",
" # # xored = cv2.bitwise_xor(Bthresh, 255-thresh)\n",
" # # # imglist.append(xored)\n",
"\n",
" # # morphxor = cv2.morphologyEx(xored, cv2.MORPH_DILATE, kernel4, iterations=2)\n",
" # # morphxor = cv2.morphologyEx(morphxor, cv2.MORPH_ERODE, kernel5, iterations=2)\n",
" # # # morphxor = cv2.morphologyEx(morphxor, cv2.MORPH_ERODE, kernel2, iterations=2)\n",
" # # # imglist.append(morphxor)\n",
" \n",
" \n",
" # # comboed = cv2.bitwise_or(Bthresh, 255-morphxor)\n",
" # # # imglist.append(comboed)\n",
" # # # maybe SOMETHING CAN BE DONE SINCE THEY KIND OF GET A HALO OF WHITE AROUND THE TEXT\n",
" \n",
" # final = cv2.morphologyEx(testout, cv2.MORPH_OPEN,kernel3)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_OPEN,kernel3)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel3)\n",
" # # imglist.append(final)\n",
" # return imglist\n",
" # return final\n",
"\n",
" \n",
" autothreshold = np.clip(np.mean(gray)/1.5, 0, 255)\n",
" thresh1 = cv2.threshold(gray, autothreshold, 255, cv2.THRESH_TOZERO)[1]\n",
" # return thresh1\n",
" ### FLATTEN colours?\n",
" ## do a threshold gradient thing first?\n",
" centervalue = np.clip(np.mean(thresh1)/1.6, 0, 255)\n",
" grayflattened = bwadjustment(thresh1, centervalue)\n",
" # return grayflattened\n",
" thresh = cv2.adaptiveThreshold(grayflattened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 41, 25)\n",
" # autothreshold = np.clip(np.mean(grayflattened)/1.25, 0, 255)\n",
" # thresh = cv2.threshold(grayflattened, autothreshold, 255, cv2.THRESH_BINARY)[1]\n",
" # thresh = cv2.threshold(grayflattened, 0, 255, cv2.THRESH_OTSU)[1]\n",
" # return thresh\n",
" \n",
" kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))\n",
" kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))\n",
" kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))\n",
" \n",
" \n",
" # final = thresh\n",
" final = cv2.morphologyEx(thresh, cv2.MORPH_OPEN,kernel3)\n",
" final = cv2.morphologyEx(final, cv2.MORPH_OPEN,kernel3)\n",
" final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel3)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel3)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel2)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel1)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n",
" # final = 255-cv2.morphologyEx(255-final, cv2.MORPH_CLOSE,kernel)\n",
" \n",
" \n",
" reduced = cv2.morphologyEx(final, cv2.MORPH_DILATE, kernel1)\n",
" reduced = cv2.morphologyEx(reduced, cv2.MORPH_ERODE, kernel2)\n",
" reduced = cv2.morphologyEx(reduced, cv2.MORPH_DILATE, kernel2)\n",
" reduced = cv2.morphologyEx(reduced, cv2.MORPH_ERODE, kernel2)\n",
" reduced = 255 - cv2.morphologyEx(255-reduced, cv2.MORPH_OPEN, kernel3)\n",
" \n",
" # return reduced\n",
" contours, _ = cv2.findContours(255-reduced, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" \n",
" \n",
" mask = np.zeros(final.shape, dtype=np.uint8)\n",
" rects = []\n",
" for contour in contours:\n",
" b = cv2.boundingRect(contour)\n",
" rects.append(b)\n",
" mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 255, thickness=cv2.FILLED)\n",
" \n",
" # return mask\n",
" final = cv2.bitwise_or(final, 255-mask)\n",
" \n",
" return final\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 2541,
"metadata": {},
"outputs": [],
"source": [
"def cropclarifying(image):\n",
" whitedbackground = whiteoutbackground(image)\n",
" # return whitedbackground\n",
" \n",
" textrefined = textClarifying(whitedbackground)\n",
" # return textrefined\n",
" #maybe now is when I put in the line removing function\n",
" \n",
" lineout = removeLinesFromText(textrefined)\n",
" \n",
" return lineout\n",
" # implement a function that's called refine text\n",
"\n",
"\n",
"\n",
"def houghlineprocessing(image):\n",
" croppedanddeskewed, _ = mf.houghlinedeskewandcrop(image)\n",
" \n",
" postprocessed = cropclarifying(croppedanddeskewed)\n",
" # return postprocessed\n",
" postprocessed = mf.croptoblack(postprocessed)\n",
" \n",
" postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
" \n",
" final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
" \n",
" # cv2.imshow(\"postprocessed\", mf.ResizeWithAspectRatio(postprocessed, 1000))\n",
" # cv2.imshow(\"final\", mf.ResizeWithAspectRatio(final, 1000))\n",
" # cv2.waitKey(0)\n",
" # cv2.destroyAllWindows()\n",
" \n",
" return final"
]
},
{
"cell_type": "code",
"execution_count": 2542,
"metadata": {},
"outputs": [],
"source": [
"# prepped, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
"outs = houghlineprocessing(img)\n",
"# print(croprect)\n",
"#need to fix premorphCrop. it removes too much"
]
},
{
"cell_type": "code",
"execution_count": 2543,
"metadata": {},
"outputs": [],
"source": [
"# shrunk, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
"# shrunk1, croprect = mf.premorphCrop(shrunk)\n",
"# print(croprect)\n",
"# print(int(30*4.032 - 0))\n",
"# # temp = img[100:, :, :]\n",
"# temp = shrunk[croprect[1]:croprect[1]+croprect[3], croprect[0]:croprect[0]+croprect[2], :]\n"
]
},
{
"cell_type": "code",
"execution_count": 2544,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"temp\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"# # cv2.imshow(\"shrunk1\", mf.ResizeWithAspectRatio(shrunk1, height=1000))\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 2546,
"metadata": {},
"outputs": [],
"source": [
"# for out in outs:\n",
"# if (out.shape[0] > out.shape[1]):\n",
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"# else:\\\n",
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, width=1000))\n",
"# key = cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()\n",
"# if (key == 107):\n",
"# break\n",
"if (isinstance(outs, np.ndarray)):\n",
" if (outs.shape[0] > outs.shape[1]):\n",
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, height=1350))\n",
" else:\n",
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n",
"else:\n",
" for i, out in enumerate(outs):\n",
" if (out.shape[0] > out.shape[1]):\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
" else:\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -41,12 +41,12 @@ def squareandthenresize(image, fill=0, width=None, height=None, inter=cv2.INTER_
else:
finalimage = out
return finalimage
# class SquarePad:
# def __init__(self, fill):
# self.fill = fill
# def __call__(self, image):
# w, h = image.shape[1], image.shape[0]
# max_wh = np.max([w, h])
@ -54,8 +54,8 @@ def squareandthenresize(image, fill=0, width=None, height=None, inter=cv2.INTER_
# vp = int((max_wh - h) / 2)
# padding = (hp, vp, hp, vp)
# return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, self.fill)
def squarepad(image, fill=0, returnoffset=False):
w, h = image.shape[1], image.shape[0]
max_wh = np.max([w, h])
@ -65,7 +65,7 @@ def squarepad(image, fill=0, returnoffset=False):
if (returnoffset):
return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, fill), hp, vp
return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, fill)
def rotate(img, angle, fill=(0,0,0)):
rows,cols = img.shape[0], img.shape[1]
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
@ -95,11 +95,11 @@ def biggestRects(n, rects):
dict[tuple(rect)] = rectArea(rect)
# maxh.heappush(rectArea(rect))
# print(maxh[0])
heap = [(-value, key) for key,value in dict.items()]
largest = hq.nsmallest(n, heap)
# hq.heapify(list(dict.items()))
# for i in range(0,n):
@ -123,7 +123,7 @@ def overlapRect(rects):
rightwall = min(rightwall, x+w)
topwall = max(topwall, y)
bottomwall = min(bottomwall, y+h)
if (topwall >= bottomwall or leftwall >= rightwall):
return (-1, -1, -1, -1)
return (leftwall, topwall, rightwall-leftwall, bottomwall-topwall)
@ -167,7 +167,7 @@ def mergerects(rects, xywhtype=True):
maxrect[2] = maxrect[2]-maxrect[0]
maxrect[3] = maxrect[3]-maxrect[1]
return maxrect
def rectscontaining(rect, outerrects):
temprects = set()
for i, outerrect in enumerate(outerrects):
@ -181,7 +181,7 @@ def lineAngle(line):
# print(line)
angle = (math.atan2(line[3] - line[1], line[2] - line[0]) % np.pi) - (np.pi/2)
return angle
def WithinXDegrees(lines, margin, baseangle=0):
# outlines = np.array([[]])
outlines = np.empty((0, 4))
@ -208,7 +208,7 @@ def lineBoundingRect(lines, asRect=False, returnint=False):
y1 = int(y1)
x2 = int(x2)
y2 = int(y2)
return (x1,y1,x2,y2)
# print(lines.max(0))
# print(type(lines))
@ -224,10 +224,67 @@ def lineswithinrange(lines, pt1, pt2, x=True, y=False):
maxy = max(pt1[1], pt2[1])
out_lines = [line for line in out_lines if ((min(line[1],line[3]) >= minx) and (max(line[1],line[3]) <= maxx))]
return out_lines
def premorphCrop(image):
return morphologyCrop(image, special=True, withRectangle=True)
# convert to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
window = gray.shape[1]//8
if window % 2 == 0:
window += 1
# print(window)
# gray = cv2.blur(gray, (11,11))
# threshold
# thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, window, 2)
# return thresh
# apply morphology
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)
# morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((3,3), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# return morph
# get largest contour
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = contours[0] if len(contours) == 2 else contours[1]
area_thresh = 0
for c in contours:
area = cv2.contourArea(c)
if area > area_thresh:
area_thresh = area
big_contour = c
# get bounding box
x,y,w,h = cv2.boundingRect(big_contour)
# draw filled contour on black background
mask = np.zeros_like(gray)
mask = cv2.merge([mask,mask,mask])
# mask = cv2.blur(mask,(121,121))
cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)
# apply mask to input
result1 = image.copy()
mask = cv2.blur(mask,(3,3))
result1 = cv2.bitwise_and(result1, mask)
# crop result
result2 = result1[y:y+h, x:x+w]
return result2, (x,y,w,h)
def rotatePoint(img, pt, angle, returnint=True):
rotateaxisx = img.shape[0]/2
rotateaxisy = img.shape[1]/2
@ -256,22 +313,26 @@ def rotateLine(img, line, angle, returnint=True):
pt1 = rotatePoint(img, (line[0],line[1]), angle, returnint)
pt2 = rotatePoint(img, (line[2],line[3]), angle, returnint)
return (pt1[0], pt1[1], pt2[0], pt2[1])
def prepimageforhoughline(image):
prepped, scaler, hp, vp = squareandthenresize(image, fill=255, width=1000, returnscalerinfo=True)
prepped, croprect = premorphCrop(prepped)
prepped = squareandthenresize(prepped, fill=255, width=1000)
finalcroprect = (int(croprect[0]*scaler - hp), int(croprect[1]*scaler - vp), int(croprect[2]*scaler), int(croprect[3]*scaler))
gray1 = cv2.cvtColor(prepped, cv2.COLOR_BGR2GRAY)
dst1 = cv2.Canny(gray1, 0, 500, None, 3)
kernel = np.ones((5,5), np.uint8)
out = cv2.morphologyEx(dst1, cv2.MORPH_DILATE, kernel)
out = cv2.blur(out, (5,5))
kernel = np.ones((6,6), np.uint8)
dst1 = cv2.morphologyEx(out, cv2.MORPH_ERODE, kernel)
# return dst1
dst1 = cv2.Canny(dst1, 0, 500, None, 3)
# return dst1
accompaniedimage = image[finalcroprect[1]:finalcroprect[1]+finalcroprect[3], finalcroprect[0]:finalcroprect[0]+finalcroprect[2], :]
accompaniedimage = squarepad(accompaniedimage, fill=255)
return dst1, accompaniedimage
@ -298,7 +359,7 @@ def houghlinedeskewangle(image):
# angles[i] = math.atan2(unroundedpt2[1] - unroundedpt1[1], unroundedpt2[0] - unroundedpt1[0]) % np.pi
angles[i] = lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1]))
# cv2.line(cdstP, pt1, pt2, (0,0,255), 3, cv2.LINE_AA)
mode = st.mode(np.around(angles, decimals=3))[0]
rotationangle = np.rad2deg(mode)
return rotationangle
@ -308,14 +369,14 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle):
rotatedbaseimage = rotate(baseimage, rotationangle)
sizemultiplier = rotatedbaseimage.shape[0]/rotateddst1.shape[0]
linesP = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 30, None, 90, 30)
rotatedlines = [rotateLine(rotateddst1, line[0], rotationangle) for line in linesP]
rotatedlines = np.reshape(rotatedlines, (len(rotatedlines),1,4))
vmarginlines = WithinXDegrees(rotatedlines, 7)
hmarginlines = WithinXDegrees(rotatedlines, 7, baseangle=90)
vrect = lineBoundingRect(vmarginlines,asRect=False, returnint=True)
@ -325,15 +386,15 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle):
marginlines = np.append(vmarginlines, hmarginlines, axis=0)
else:
marginlines = vmarginlines
rect = lineBoundingRect(marginlines,asRect=False, returnint=True)
scaledrect = (int(rect[0]*sizemultiplier), int(rect[1]*sizemultiplier), int(rect[2]*sizemultiplier), int(rect[3]*sizemultiplier))
croppedbaseimage = rotatedbaseimage[scaledrect[1]:scaledrect[3], scaledrect[0]:scaledrect[2], :]
shrunkencbi, sizemultiplier = ResizeWithAspectRatio(croppedbaseimage, width=1000, retscale=True)
gray = cv2.cvtColor(shrunkencbi, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
@ -348,40 +409,40 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle):
mx = rect
mx_area = area
scaledmx = (int(mx[0]*sizemultiplier), int(mx[1]*sizemultiplier), int(mx[2]*sizemultiplier), int(mx[3]*sizemultiplier))
finalbaseimage = croppedbaseimage[scaledmx[1]:scaledmx[1]+scaledmx[3], scaledmx[0]:scaledmx[0]+scaledmx[2], :]
return finalbaseimage, rotationangle
def houghlinedeskewandcrop(image):
def houghlinedeskewandcrop(image):
dst1, croppedogimage = prepimageforhoughline(image) ## scaling and cropping occurs. need to also return the changes done
# print(dst1.shape)
# print(croppedogimage.shape)
## -----------------finding angle to deskew-----------------
rotationangle = houghlinedeskewangle(dst1)
# -----------------end of finding angle to deskew-----------------
## -----------------deskewing and then cropping-----------------
return houghlinedeskewthencrop(croppedogimage, dst1, rotationangle)
def bruteforceprocessrects(greaterrects, lesserrects):
# squaredgrects = np.array([mf.xywhrectto2prect(rect) for rect in greaterrects])
# squaredlrects = np.array([mf.xywhrectto2prect(rect) for rect in lesserrects])
# print(squaredgrects)
# print(type(squaredgrects))
greatersortedbylowerx = (greaterrects[:,0]).argsort()
greatersortedbylowery = (greaterrects[:,1]).argsort()
greatersortedbyupperx = (greaterrects[:,0]+greaterrects[:,2]).argsort()
greatersortedbyuppery = (greaterrects[:,1]+greaterrects[:,3]).argsort()
# greatersortedbylowerx = (greaterrects[:,0]).argsort()
# greatersortedbylowery = (greaterrects[:,1]).argsort()
# greatersortedbyupperx = (greaterrects[:,0]+greaterrects[:,2]).argsort()
# greatersortedbyuppery = (greaterrects[:,1]+greaterrects[:,3]).argsort()
outerboxes = []
for innerrect in lesserrects:
outerboxes.append(rectscontaining(innerrect, greaterrects))
actingrects = lesserrects
actingrects = lesserrects.copy()
##IMPLEMENT BRUTEFORCE MERGE/RECHECKCONTAINS HERE
i = 0
while (i < len(actingrects)):
@ -401,103 +462,157 @@ def bruteforceprocessrects(greaterrects, lesserrects):
i = i+1
# print(actingrects)
return actingrects
def processrects(greaterrects, lesserrects):
return bruteforceprocessrects(greaterrects, lesserrects)
def whiteoutbackground(image):
# imagecpy = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
invertedgray = 255 - gray
# cv2.imshow("result2", mf.ResizeWithAspectRatio(invertedgray, height=1000))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# print(np.mean(invertedgray.flatten()))
# print(np.median(invertedgray.flatten()))
autothreshold = np.clip(np.mean(invertedgray)*1.6, 0, 255)
# print(autothreshold)
# autothreshold = 70
# print(autothreshold)
thresh = cv2.threshold(invertedgray, autothreshold, 255, cv2.THRESH_TOZERO)[1]
thresh = 255 - thresh
# blur = cv2.blur(gray, (7,7))
# window = 51
window = gray.shape[1]//8
if window % 2 == 0:
window += 1
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 2)
# return thresh
# flattenedsortedgray = np.sort(gray.flatten())
# autothreshold = np.emath.logn(1.02, np.mean(flattenedsortedgray))
# print(autothreshold)
# print(st.mode(flattenedsortedgray)[0])
# print(np.median(flattenedsortedgray))
thresh = cv2.threshold(thresh, np.mean(thresh), 255, cv2.THRESH_BINARY)[1]
kernel = np.ones((3,3), np.uint8)
# numiters = 5- thresh.shape[0]//1000
thresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel, iterations = 3)
# cv2.imshow("result2", mf.ResizeWithAspectRatio(thresh, height=1000))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# return 0
# thresh = cv2.Canny(thresh, 0, 500, None, 3)
contours, heirarchy = cv2.findContours(thresh,cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# whitedbackground = cv2.drawContours(image, contours, -1, (0,255,0))
largestcontour = max(contours, key=cv2.contourArea)
blank = np.full(thresh.shape, 255, dtype=np.uint8)
mask = cv2.drawContours(blank, [largestcontour], -1, (0,0,0), thickness=cv2.FILLED)
contours, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
biggestcontour = max(contours, key=cv2.contourArea)
# imagecpy = cv2.drawContours(imagecpy, [biggestcontour], -1, (0,255,0), thickness=3)
# return imagecpy
blank = np.full(thresh.shape, 255, dtype=np.uint8)
mask = blank.copy()
mask = cv2.drawContours(mask, [biggestcontour], -1, (0,0,0), thickness=cv2.FILLED)
invertmask = 255 - mask
dim = int(min(invertmask.shape[0], invertmask.shape[1])/100)
# # dim = 21
# print(dim)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
# invertmask = cv2.morphologyEx(invertmask, cv2.MORPH_DILATE, kernel)
mask = 255 - cv2.morphologyEx(invertmask, cv2.MORPH_ERODE, kernel, iterations=1)
# # return mask
mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
whitedbackground = cv2.bitwise_or(image, mask)
return whitedbackground
# return whitedbackground
gray2 = cv2.cvtColor(whitedbackground, cv2.COLOR_BGR2GRAY)
canny = cv2.Canny(gray2, 0, 500, None, 3)
maskgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)
invert = 255-maskgray
# dim = 21
# print(dim)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
morphedmask = cv2.morphologyEx(invert, cv2.MORPH_OPEN, kernel, iterations=10)
# return 255 - morphedmask
morphedmask = 255 - morphedmask
finalmask = cv2.bitwise_or(morphedmask, maskgray)
# edgecontours, _ = cv2.findContours(255-morphedmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
finalmaskbgr = cv2.cvtColor(finalmask, cv2.COLOR_GRAY2BGR)
# return finalmaskbgr
whitedbackground = cv2.bitwise_or(whitedbackground, finalmaskbgr)
# return whitedbackground
test = cv2.inpaint(whitedbackground, finalmask, 3, cv2.INPAINT_TELEA)
return test
def removeCardinalLines(image, horizontal=False):
axis = 0
if (horizontal):
cardinal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
axis = 1
else:
cardinal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,15))
lines = cv2.morphologyEx(image, cv2.MORPH_OPEN, cardinal_kernel, iterations=2)
# return lines
mask = np.zeros(image.shape, dtype=np.uint8)
contours, _ = cv2.findContours(255-lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# mask = cv2.drawContours(mask, contours, -1, 255, thickness=3)
# return mask
boxes = []
dims = np.array([])
rects = []
for contour in contours:
rect = cv2.minAreaRect(contour)
rect = list(rect)
rect[1]=list(rect[1])
if (rect[1][axis] > rect[1][1-axis]):
rect[2] = rect[2] -90
temp = rect[1][1]
rect[1][1]=rect[1][0]
rect[1][0]=temp
# print(rect)
rects.append(rect)
dims = np.append(dims, rect[1][axis])
# box = cv2.boxPoints(rect)
# box = np.intp(box)
# boxes.append(box)
# mask = cv2.drawContours(mask, [box], -1, 255, thickness=2)
# break
# return mask
# print(dims)
meddim = np.median(dims)
# print(meddim)
for rect in rects:
# print(rect[1][axis])
# print(meddim/2)
# print(rect[1][1-axis])
# print(rect[1][axis])
if (rect[1][axis] < meddim/2 and rect[1][1-axis] > image.shape[axis]/5):
box = cv2.boxPoints(rect)
box = np.intp(box)
# boxes.append(box)
# mask = cv2.drawContours(mask, [box], -1, 255, thickness=2)
image = cv2.drawContours(image, [box], -1, 255, thickness=cv2.FILLED)
# return mask
return image
def removeLinesFromText(image):
image = removeCardinalLines(image)
image = removeCardinalLines(image, horizontal=True)
return image
def cropclarifying(image):
whitedbackground = whiteoutbackground(image)
# return whitedbackground
gray = cv2.cvtColor(whitedbackground, cv2.COLOR_BGR2GRAY)
autothreshold = np.clip(np.mean(gray)/1.3, 0, 255)
demoed = cv2.threshold(gray, autothreshold, 255, cv2.THRESH_BINARY)[1]
demoedcanny = cv2.Canny(demoed, 0, 500, None, 3)
demoedcontours, heirarchy = cv2.findContours(demoedcanny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
demoedboxes = np.empty([len(demoedcontours), 4], dtype=int)
for i, contour in enumerate(demoedcontours):
demoedboxes[i] = cv2.boundingRect(contour)
b = demoedboxes[i]
textrefined = textClarifying(whitedbackground)
# return textrefined
#maybe now is when I put in the line removing function
specle = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 10)
speclecanny = cv2.Canny(specle, 0, 500, None, 3)
speclecontours, heirarchy = cv2.findContours(speclecanny, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
specleboxes = np.empty([len(speclecontours), 4], dtype=int)
lineout = removeLinesFromText(textrefined)
for i, contour in enumerate(speclecontours):
specleboxes[i] = cv2.boundingRect(contour)
b = specleboxes[i]
return lineout
# implement a function that's called refine text
mask = np.zeros(whitedbackground.shape[:2], dtype=np.uint8)
goodrects = processrects(specleboxes, demoedboxes)
for rect in goodrects:
b = rect
mask = cv2.rectangle(mask, (b[0], b[1]), (b[0]+b[2], b[1]+b[3]), 255, thickness=cv2.FILLED)
whitedscreen = np.full(whitedbackground.shape, fill_value=255, dtype=np.uint8)
invertedmask = cv2.bitwise_not(mask)
whitedscreen = cv2.bitwise_and(whitedscreen, whitedscreen, mask=invertedmask)
whitedbackground = cv2.bitwise_and(whitedbackground, whitedbackground, mask=mask)
whitedbackground = cv2.bitwise_or(whitedscreen, whitedbackground)
finalgray = cv2.cvtColor(whitedbackground, cv2.COLOR_BGR2GRAY)
binaryImage = cv2.threshold(finalgray, 0, 255, cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (4, 4))
final = cv2.morphologyEx(binaryImage, cv2.MORPH_OPEN,kernel)
final = cv2.morphologyEx(final, cv2.MORPH_OPEN,kernel)
return final
def croptoblack(image, extraborder=10):
invertedimage = cv2.bitwise_not(image)
blackpixels = cv2.findNonZero(invertedimage)
@ -509,8 +624,89 @@ def croptoblack(image, extraborder=10):
maxy = min(maxs[0][1]+extraborder, image.shape[0])
# print(blackpixels)
return image[miny:maxy, minx:maxx]
def reduceColours(x, centering=127):
a=0.00008
b=40
c=256
x = x.astype(int)
# value = np.cbrt((x-centering)/a)+centering
value = -((c+4)/(1+np.exp((x-centering)/b)))+c
value = np.clip(value, 0, 255)
return value.astype(np.uint8)
def bwadjustment(image, center=127):
gray = reduceColours(image,center)
return gray
def textClarifying(image):
## Try using the LAB colour space???
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
# hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
# kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
kernel4 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
# kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))
kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))
# return lab[:,:,2]
currentimgofatype = lab[:,:,0] # L-channel: expresses the brightness in the image
# currentimgofatype = lab[:,:,1] # A-channel: expresses variation of color in the image between red and green
# currentimgofatype = lab[:,:,2] # B-channel: expresses variation of color in the image between yellow and blue
# currentimgofatype = hls[:,:,0]
# currentimgofatype = hls[:,:,1]
# currentimgofatype = hls[:,:,2]
# imglist = []
Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)
# imglist.append(Bthresh)
# imglist.append(255-Bthresh)
morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)
goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)
# morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)
# imglist.append(morphedBthresh)
# imglist.append(goodmorphBthresh)
thresh = cv2.threshold(currentimgofatype, 0, 255, cv2.THRESH_OTSU)[1]
# imglist.append(thresh)
morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel6)
morphedthresh = cv2.morphologyEx(morphedthresh, cv2.MORPH_ERODE, kernel7)
# imglist.append(morphedthresh)
# anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)
anded2 = cv2.bitwise_and(morphedBthresh, 255-morphedthresh)
# imglist.append(anded1)
# imglist.append(anded2)
contours, other = cv2.findContours(anded2, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# print(other)
mask = np.full(gray.shape,fill_value=255, dtype=np.uint8)
for i, contour in enumerate(contours):
if (other[0][i][2] != -1 and other[0][i][3] == -1):
b = cv2.boundingRect(contour)
# image = cv2.rectangle(image, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3)
mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)
bingus = cv2.bitwise_or(goodmorphBthresh, mask)
return bingus
## ------------------------------specific to row summation deskewing------------------------------
@ -534,28 +730,18 @@ def sum_rows(img):
## ------------------------------active functions------------------------------
## ------------------------------cropping------------------------------
def morphologyCrop(image, special=False, withRectangle=False):
def morphologyCrop(image, withRectangle=False):
# convert to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
# threshold
thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\
thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]
if (special):
# apply morphology
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)
# morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((3,3), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
else:
# apply morphology
kernel = np.ones((7,7), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)
# apply morphology
kernel = np.ones((7,7), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)
# get largest contour
@ -580,8 +766,6 @@ def morphologyCrop(image, special=False, withRectangle=False):
# apply mask to input
result1 = image.copy()
if (special):
mask = cv2.blur(mask,(3,3))
result1 = cv2.bitwise_and(result1, mask)
# crop result
@ -614,7 +798,7 @@ def morphologyCrop(image, special=False, withRectangle=False):
# cv.imwrite("result.png",image)
#CAN ALSO TRY USING NUMPY VECTORIZATION
#CAN ALSO TRY USING NUMPY VECTORIZATION
#------------------------------------------------------------------------------------------
def selectiveSearchCrop(image):
img, scale = ResizeWithAspectRatio(image,300, retscale=True)
@ -635,12 +819,12 @@ def cannyEdgeCrop(image, lower = 100, upper = 255, threshold1 = 50, threshold2 =
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
scaled_gray = np.zeros(gray.shape, gray.dtype)
# for y in range(0,gray.shape[0]):
# for x in range(0,gray.shape[1]):
# scaled_gray[y][x] = colourscaler(gray[y][x], lower, upper)
scaled_gray = gray
blurred = cv2.GaussianBlur(scaled_gray, (15,15),0)
edged = cv2.Canny(blurred, threshold1, threshold2)
return edged
@ -655,7 +839,7 @@ def houghlineCrop(image):
cdstP = prepped.copy()
cdstPmargin = cdstP.copy()
linesP = cv2.HoughLinesP(dst1, 1, np.pi / 180, 30, None, 80, 30)
vmarginlines = WithinXDegrees(linesP, 7)
hmarginlines = WithinXDegrees(linesP, 7, baseangle=90)
vrect = lineBoundingRect(vmarginlines,asRect=False, returnint=True)
@ -678,7 +862,7 @@ def houghlineCrop(image):
# l = marginlines[i]
# cv2.line(cdstP, (int(l[0]), int(l[1])), (int(l[2]), int(l[3])), (255,0,0), 3, cv2.LINE_AA)
return cropped
@ -697,12 +881,12 @@ def rowsumdeskew(image, withangle=False):
src = cv2.threshold(src, 70, 255, cv2.THRESH_BINARY)[1]
src = ResizeWithAspectRatio(src, height=250)
angle = 0
finalangle = 0
while angle <= 360:
# Rotate the source image
img = rotate(src, angle)
img = rotate(src, angle)
# Crop the center 1/3rd of the image (roi is filled with text)
h,w = img.shape
buffer = min(h, w) - int(min(h,w)/1.5)
@ -730,12 +914,15 @@ def rowsumdeskew(image, withangle=False):
# cv2.destroyAllWindows()
if (withangle):
return rotate(image,finalangle), finalangle
return rotate(image, finalangle)
return rotate(image, finalangle)
def externaldeskew(image, fill=(0,0,0)):
def externaldeskew(image, fill=(0,0,0), alreadygray=False):
# image = io.imread(_img)
# print(type(image))
grayscale = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
if (alreadygray):
grayscale = image.copy()
else:
grayscale = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
grayscale = squarepad(grayscale,fill=255)
grayscale = ResizeWithAspectRatio(grayscale, height=300)
# print(type(grayscale))
@ -749,20 +936,22 @@ def externaldeskew(image, fill=(0,0,0)):
## ------------------------------Full deskewing and cropping------------------------------
def houghlineprocessing(image):
croppedanddeskewed, _ = houghlinedeskewandcrop(image)
##IF IT DOESN'T CHANGE THE IMAGE (CHANGE THE _ TO SOMETHING USEFUL), THEN CROPCLARIFYING SHOULD JUST DO THE TEXT ISOLATION SECTION AND NOT TRY AND WHITE OUT ANY BACKGROUND.
## IF THERE'S NO CROPPING, MAYBE EVEN JUMP RIGHT TO USING THE EXTERNAL DESKEW FIRST BEFORE TOSSING IT INTO CROPCLARIFYING
postprocessed = cropclarifying(croppedanddeskewed)
# return postprocessed
postprocessed = croptoblack(postprocessed)
postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)
final = externaldeskew(postprocessed, fill=(255,255,255))
# cv2.imshow("postprocessed", mf.ResizeWithAspectRatio(postprocessed, 1000))
# cv2.imshow("final", mf.ResizeWithAspectRatio(final, 1000))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
return final
###### DESIRE: CONVERT STUFF RELATED TO THE HOUGHLINE PROCESSING INTO C SINCE IT ONLY REALLY USES OPENCV

View File

@ -1,257 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
"/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
]
}
],
"source": [
"import cv2\n",
"import numpy as np\n",
"\n",
"import myfunctions as mf\n",
"\n",
"\n",
"import scipy.stats as st\n",
"import math"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# read image as grayscale\n",
"img = cv2.imread('./test_images/IMG_7605.jpg')\n",
"# img = mf.ResizeWithAspectRatio(img,1000)\n",
"# img = mf.rotate(img, 54)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def reduceColours(x):\n",
" b=10\n",
" c=1.2\n",
" x = x.astype(int)\n",
" value = ((x-b)*c) + (b*(c-1))\n",
" value = np.clip(value, 0, 255)\n",
" return value.astype(np.uint8)\n",
"\n",
"def bwadjustment(image):\n",
" # # print(image)\n",
" # gray = image.astype(int)\n",
" # gray += 1\n",
" # # print(gray)\n",
" # gray = np.emath.logn(1.0218, gray)\n",
" # # print(gray)\n",
" # gray = np.clip(gray, 0, 255)\n",
" # gray = gray.astype(np.uint8)\n",
" gray = reduceColours(image)\n",
" \n",
" return gray\n",
"\n",
"\n",
"\n",
"\n",
"def testingfunction(image):\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" \n",
" # sigma = 0.5\n",
" # v = np.median(image)\n",
" # lower = int(max(0, (1.0 - sigma) * v))\n",
" # upper = int(min(255, (1.0 + sigma) * v))\n",
" \n",
" # upper = 500\n",
" \n",
" \n",
" # thresh = cv2.Canny(gray, lower, upper, None, 3)\n",
" \n",
" gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 20)\n",
" \n",
" \n",
" return gray\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#####NEED TO WORK ON SCORING THE LINES SO IT PICKS THE CORRECT ORIENTATION (horizontal vs vertical) AND SO THAT THE CROPPING RECTANGLE MOVES/GET TRANSFORMED WITH IT\n",
"\n",
"\n",
"## CAN MAYBE ALSO USE NORMAL HOUGHLINE STUFF TO GET MORE LINES OR GET AN EXTRA BIT OF WEIGHTING OR SOMETHING"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"out = mf.houghlineprocessing(img)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# cropped, rotangle = houghlinedeskewandcrop(img)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# _, angle = mf.houghlinedeskew(img, withangle=True)\n",
"# print(angle)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# if (abs(rotangle - angle) - 90 <= 5):\n",
"# print(\"hi\")q"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"cv2.imshow(\"result2\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# prepped = mf.squareandthenresize(cropped, fill=255, width=1000)\n",
"# prepped, _ = mf.premorphCrop(prepped)\n",
"# prepped = mf.squareandthenresize(prepped, fill=255, width=1000)\n",
"# gray1 = cv2.cvtColor(prepped, cv2.COLOR_BGR2GRAY)\n",
"# dst1 = cv2.Canny(gray1, 0, 500, None, 3)\n",
"\n",
"# # cdstP = prepped.copy()\n",
"# # linesP = cv2.HoughLinesP(dst1, 1, np.pi / 180, 30, None, 90, 30)\n",
"# # if linesP is not None:\n",
"# # for i in range(0, len(linesP)):\n",
"# # l = linesP[i][0]\n",
"# # # anglesP[i] = mf.lineAngle(l)\n",
"# # cv2.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv2.LINE_AA)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"result2\", dst1)\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"### tasks. use contours to get the biggest contour and get a mask from it and then white out the external area. and then use thresholding or whatever to make the paper white. can try and get the mean colour of the paper area and then use that to autothreshold or something."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)\n",
"# thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]\n",
"# contours, heirarchy =cv2.findContours(thresh,cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n",
"# mx = (0,0,0,0)\n",
"# mx_area = 0\n",
"# for cont in contours:\n",
"# rect = cv2.boundingRect(cont)\n",
"# area = mf.rectArea(rect)\n",
"# if (area > mx_area):\n",
"# mx = rect\n",
"# mx_area = area\n",
"\n",
"# cropped = cv2.rectangle(cropped, (mx[0], mx[1]), (mx[0]+mx[2], mx[1]+mx[3]), (0,255,0), 3)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# # # view result\n",
"# # # cv2.imshow(\"threshold\", thresh)\n",
"# # # cv2.imshow(\"morph\", morph)\n",
"# # # cv2.imshow(\"mask\", mask)\n",
"# # cv2.imshow(\"result1\", mf.ResizeWithAspectRatio(cdstP,height=1000))\n",
"# cv2.imshow(\"result2\", cropped)\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

1240
code/autocropper/temp.ipynb Normal file

File diff suppressed because it is too large Load Diff