Update/fortify background whiteout and text clarification #25

Merged
ewellenr merged 2 commits from autocropper into main 2023-11-14 19:07:32 -05:00
2 changed files with 167 additions and 535 deletions

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 2312,
"metadata": {},
"outputs": [],
"source": [
@ -15,54 +15,69 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 2313,
"metadata": {},
"outputs": [],
"source": [
"img = cv2.imread('./test_images/IMG_7640.jpg')"
"# img = cv2.imread('/mnt/dataset/baseimages/1.jpg')\n",
"img = cv2.imread('/mnt/code/autocropper/test_images/IMG_7594.jpg')"
]
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 2314,
"metadata": {},
"outputs": [],
"source": [
"def whiteoutbackground(image):\n",
" # imagecpy = image.copy()\n",
" imagecpy = image.copy()\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" \n",
" # blur = cv2.blur(gray, (7,7))\n",
" \n",
" # window = 51\n",
" window = gray.shape[1]//8\n",
" window = min(gray.shape[1], gray.shape[0])//20\n",
" if window % 2 == 0:\n",
" window += 1\n",
" thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 2)\n",
" thresh1 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, window, 5)\n",
" thresh2 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]\n",
" thresh = cv2.bitwise_and(thresh1, thresh2)\n",
" # return thresh\n",
"\n",
" # dim = int(min(thresh.shape[0], thresh.shape[1])/400)\n",
" dim = 3\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
" morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)\n",
" # return morphedthresh\n",
" \n",
" \n",
" \n",
" contours, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" # contours1, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" contours2, heirarchy = cv2.findContours(morphedthresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" \n",
" biggestcontour = max(contours, key=cv2.contourArea)\n",
" # imagecpy = cv2.drawContours(imagecpy, [biggestcontour], -1, (0,255,0), thickness=3)\n",
" # biggestcontour1 = max(contours1, key=cv2.contourArea)\n",
" biggestcontour2 = max(contours2, key=cv2.contourArea)\n",
" # imagecpy = cv2.drawContours(imagecpy, [biggestcontour1], -1, (0,255,0), thickness=3)\n",
" # imagecpy = cv2.drawContours(imagecpy, [biggestcontour2], -1, (0,0,255), thickness=3)\n",
" # return imagecpy\n",
" \n",
" blank = np.full(thresh.shape, 255, dtype=np.uint8)\n",
" mask = blank.copy()\n",
" mask = cv2.drawContours(mask, [biggestcontour], -1, (0,0,0), thickness=cv2.FILLED)\n",
" \n",
" # mask = cv2.drawContours(mask, [biggestcontour1], -1, (0,0,0), thickness=cv2.FILLED)\n",
" mask = cv2.drawContours(mask, [biggestcontour2], -1, (0,0,0), thickness=cv2.FILLED)\n",
"\n",
" # return mask\n",
"\n",
" invertmask = 255 - mask\n",
" \n",
" \n",
" dim = int(min(invertmask.shape[0], invertmask.shape[1])/100)\n",
" dim = int(min(invertmask.shape[0], invertmask.shape[1])/200)\n",
" # # dim = 21\n",
" # print(dim)\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
" # invertmask = cv2.morphologyEx(invertmask, cv2.MORPH_DILATE, kernel)\n",
" mask = 255 - cv2.morphologyEx(invertmask, cv2.MORPH_ERODE, kernel, iterations=1)\n",
" # # return mask\n",
" # return mask\n",
" \n",
" mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)\n",
" whitedbackground = cv2.bitwise_or(image, mask)\n",
@ -75,16 +90,21 @@
" maskgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)\n",
" \n",
" \n",
" dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)\n",
"\n",
" invert = 255-maskgray\n",
" # dim = 21\n",
" # print(dim)\n",
" dim = int(min(maskgray.shape[0], maskgray.shape[1])/200)\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
" morphedmask = cv2.morphologyEx(invert, cv2.MORPH_OPEN, kernel, iterations=10)\n",
" morphedmask = cv2.morphologyEx(invert, cv2.MORPH_ERODE, kernel)\n",
" dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
" morphedmask = cv2.morphologyEx(morphedmask, cv2.MORPH_OPEN, kernel, iterations=7)\n",
" # return 255 - morphedmask\n",
" morphedmask = 255 - morphedmask\n",
"\n",
" finalmask = cv2.bitwise_or(morphedmask, maskgray)\n",
" # return finalmask\n",
" \n",
" # edgecontours, _ = cv2.findContours(255-morphedmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" finalmaskbgr = cv2.cvtColor(finalmask, cv2.COLOR_GRAY2BGR)\n",
@ -99,473 +119,30 @@
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def fillLines(image):\n",
" edges = cv2.Canny(image, 0, 500, 3)\n",
" # return edges\n",
" maxgap = int(min(image.shape[0], image.shape[1])/30)\n",
" # print(maxgap)\n",
" minlength = int(min(image.shape[0], image.shape[1])/2)\n",
" linesP = cv2.HoughLinesP(edges, 1, np.pi / 180, 50, None, minlength, maxgap)\n",
" for line in linesP:\n",
" l = line[0]\n",
" image = cv2.line(image, (l[0], l[1]), (l[2], l[3]), 0, thickness=1)\n",
" # colourimage = cv2.line(colourimage, (l[0], l[1]), (l[2], l[3]), (0,255,0), thickness=3)\n",
" return image\n",
"\n",
"\n",
"def removeCardinalLines(image, horizontal=False):\n",
" # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" axis = 0\n",
" if (horizontal):\n",
" cardinal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))\n",
" axis = 1\n",
" else:\n",
" cardinal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,15))\n",
" lines = cv2.morphologyEx(image, cv2.MORPH_OPEN, cardinal_kernel, iterations=2)\n",
" # lines = cv2.morphologyEx(lines, cv2.MORPH_OPEN, kernel, iterations=2)\n",
" # return lines\n",
"\n",
" mask = np.zeros(image.shape, dtype=np.uint8)\n",
" contours, _ = cv2.findContours(255-lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" # mask = cv2.drawContours(mask, contours, -1, 255, thickness=3)\n",
" # return mask\n",
" \n",
" \n",
" boxes = []\n",
" dims = np.array([])\n",
" rects = []\n",
" for contour in contours:\n",
" rect = cv2.minAreaRect(contour)\n",
" rect = list(rect)\n",
" rect[1]=list(rect[1])\n",
" if (rect[1][axis] > rect[1][1-axis]):\n",
" rect[2] = rect[2] -90\n",
" temp = rect[1][1]\n",
" rect[1][1]=rect[1][0]\n",
" rect[1][0]=temp\n",
" # print(rect)\n",
" rects.append(rect)\n",
" dims = np.append(dims, rect[1][axis])\n",
" \n",
" # box = cv2.boxPoints(rect)\n",
" # box = np.intp(box)\n",
" # boxes.append(box) \n",
" # mask = cv2.drawContours(mask, [box], -1, 255, thickness=2)\n",
" # break\n",
" # return mask\n",
" # print(dims)\n",
" meddim = np.median(dims)\n",
" # print(meddim)\n",
" \n",
" for rect in rects:\n",
" # print(rect[1][axis])\n",
" # print(meddim/2)\n",
" # print(rect[1][1-axis])\n",
" # print(rect[1][axis])\n",
" if (rect[1][axis] < meddim/2 and rect[1][1-axis] > image.shape[axis]/5):\n",
" adjustedrect = rect\n",
" adjustedrect[1][0] += 3\n",
" adjustedrect[1][1] += 3\n",
" box = cv2.boxPoints(adjustedrect)\n",
" box = np.intp(box)\n",
" # boxes.append(box) \n",
" # mask = cv2.drawContours(mask, [box], -1, 255, thickness=2)\n",
" image = cv2.drawContours(image, [box], -1, 255, thickness=cv2.FILLED)\n",
" \n",
" # return mask\n",
" \n",
" return image\n",
"\n",
"\n",
"def removeLinesFromText(image):\n",
" image = removeCardinalLines(image)\n",
" image = removeCardinalLines(image, horizontal=True)\n",
" return image\n",
" \n",
" \n",
" \n",
" colourimage = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)\n",
" imgcopy = image.copy()\n",
" \n",
" \n",
" for i in range(0,7):\n",
" imgcopy = fillLines(imgcopy)\n",
" \n",
" return imgcopy\n",
" \n",
" # maxgap = int(min(image.shape[0], image.shape[1])/20)\n",
" edges = cv2.Canny(imgcopy, 0, 500, 3)\n",
" # return edges\n",
" maxgap = int(min(imgcopy.shape[0], imgcopy.shape[1])/30)\n",
" # print(maxgap)\n",
" minlength = int(min(imgcopy.shape[0], imgcopy.shape[1])/2)\n",
" linesP = cv2.HoughLinesP(edges, 1, np.pi / 180, 50, None, minlength, maxgap)\n",
" \n",
" # print(len(linesP))\n",
" # print(linesP)\n",
" # mask = np.zeros(colourimage.shape, dtype=np.uint8)\n",
"\n",
" \n",
" for line in linesP:\n",
" l = line[0]\n",
" # colourimage = cv2.line(colourimage, (l[0], l[1]), (l[2], l[3]), (0,255,0), thickness=20)\n",
" image = cv2.line(image, (l[0], l[1]), (l[2], l[3]), 255, thickness=20)\n",
" # mask = cv2.line(mask, (l[0], l[1]), (l[2], l[3]), (255,255,255), thickness=3)\n",
" \n",
" # kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (4, 4))\n",
" # image = 255-cv2.morphologyEx(255-image, cv2.MORPH_OPEN, kernel)\n",
" # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))\n",
" # image = 255-cv2.morphologyEx(255-image, cv2.MORPH_DILATE, kernel)\n",
"\n",
" # return colourimage\n",
" return image\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def reduceColours(x, centering=127):\n",
" a=0.00008\n",
" b=40\n",
" c=256\n",
" x = x.astype(int)\n",
" # value = np.cbrt((x-centering)/a)+centering\n",
" value = -((c+4)/(1+np.exp((x-centering)/b)))+c\n",
" value = np.clip(value, 0, 255)\n",
" return value.astype(np.uint8)\n",
"\n",
"def bwadjustment(image, center=127):\n",
" gray = reduceColours(image,center)\n",
" \n",
" return gray"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"def textClarifying(image):\n",
" \n",
" ## Try using the LAB colour space???\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)\n",
" \n",
" lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)\n",
" hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)\n",
" \n",
" kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))\n",
" kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))\n",
" kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))\n",
" kernel4 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))\n",
" kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))\n",
" kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))\n",
" kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))\n",
" adaptivekernel = None\n",
" \n",
" # return lab[:,:,2]\n",
"\n",
" currentimgofatype = lab[:,:,0] # L-channel: expresses the brightness in the image\n",
" # currentimgofatype = lab[:,:,1] # A-channel: expresses variation of color in the image between red and green\n",
" # currentimgofatype = lab[:,:,2] # B-channel: expresses variation of color in the image between yellow and blue\n",
" \n",
" # currentimgofatype = hls[:,:,0]\n",
" # currentimgofatype = hls[:,:,1]\n",
" # currentimgofatype = hls[:,:,2]\n",
" # imglist = []\n",
"\n",
" Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)\n",
" \n",
" # return Bthresh\n",
"\n",
" contours, heirarchy = cv2.findContours(255-Bthresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" # imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)\n",
" # return imgcopy\n",
" \n",
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
"\n",
" for i, contour in enumerate(contours):\n",
" b = cv2.boundingRect(contour)\n",
" boundingboxes[i] = b\n",
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)\n",
" # return imgcopy\n",
" \n",
" epsilonvalue = np.median(boundingboxes, axis=0)[3]\n",
" \n",
" adaptivekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(epsilonvalue/15), int(epsilonvalue/15)))\n",
" \n",
" # imglist.append(Bthresh)\n",
" # imglist.append(255-Bthresh)\n",
" \n",
" morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)\n",
" # morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, adaptivekernel, iterations=2)\n",
" goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)\n",
" # goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, adaptivekernel, iterations=3)\n",
" # morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)\n",
" # imglist.append(morphedBthresh)\n",
" # imglist.append(goodmorphBthresh)\n",
" \n",
" \n",
" thresh = cv2.threshold(currentimgofatype, 0, 255, cv2.THRESH_OTSU)[1]\n",
" # imglist.append(thresh)\n",
" \n",
" morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel6)\n",
" morphedthresh = cv2.morphologyEx(morphedthresh, cv2.MORPH_ERODE, kernel7)\n",
" \n",
" \n",
" \n",
" # imglist.append(morphedthresh)\n",
" anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)\n",
" anded2 = cv2.bitwise_and(morphedBthresh, 255-morphedthresh)\n",
" # imglist.append(anded1)\n",
" # imglist.append(anded2)\n",
" \n",
" contours, other = cv2.findContours(anded2, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)\n",
" # print(other)\n",
" \n",
" mask = np.full(gray.shape,fill_value=255, dtype=np.uint8)\n",
" \n",
" for i, contour in enumerate(contours):\n",
" if (other[0][i][2] != -1 and other[0][i][3] == -1):\n",
" b = cv2.boundingRect(contour)\n",
" # image = cv2.rectangle(image, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3)\n",
" mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)\n",
" \n",
" bingus = cv2.bitwise_or(goodmorphBthresh, mask)\n",
" # bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel)\n",
" # imglist.append(bingus)\n",
" # return imglist\n",
" return bingus\n",
" \n",
" # imglist.append(image)\n",
" \n",
" # reversedxor = 255-cv2.bitwise_and(255-Bthresh, 255-morphedthresh)\n",
" # # imglist.append(reversedxor)\n",
" \n",
" # morphedanded = cv2.morphologyEx(anded1, cv2.MORPH_DILATE, kernel5)\n",
" # # imglist.append(morphedanded)\n",
" \n",
" # testout = cv2.bitwise_or(Bthresh, morphedanded)\n",
" # # imglist.append(testout)\n",
" \n",
" # # # blurred = cv2.GaussianBlur(anded, (21,21), 0)\n",
" # # # imglist.append(blurred)\n",
" \n",
" # # # blurthresh = cv2.adaptiveThreshold(255-blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)\n",
" # # # imglist.append(blurthresh)\n",
" \n",
" # # # Bthreshmasked = cv2.bitwise_and(gray, gray, mask=anded)\n",
" # # # imglist.append(Bthreshmasked)\n",
" \n",
" # # # thresh2 = cv2.threshold(Bthreshmasked, autothreshold, 255, cv2.THRESH_BINARY)[1]\n",
" # # # imglist.append(thresh2)\n",
" \n",
" \n",
" \n",
" # # xored = cv2.bitwise_xor(Bthresh, 255-thresh)\n",
" # # # imglist.append(xored)\n",
"\n",
" # # morphxor = cv2.morphologyEx(xored, cv2.MORPH_DILATE, kernel4, iterations=2)\n",
" # # morphxor = cv2.morphologyEx(morphxor, cv2.MORPH_ERODE, kernel5, iterations=2)\n",
" # # # morphxor = cv2.morphologyEx(morphxor, cv2.MORPH_ERODE, kernel2, iterations=2)\n",
" # # # imglist.append(morphxor)\n",
" \n",
" \n",
" # # comboed = cv2.bitwise_or(Bthresh, 255-morphxor)\n",
" # # # imglist.append(comboed)\n",
" # # # maybe SOMETHING CAN BE DONE SINCE THEY KIND OF GET A HALO OF WHITE AROUND THE TEXT\n",
" \n",
" # final = cv2.morphologyEx(testout, cv2.MORPH_OPEN,kernel3)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_OPEN,kernel3)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel3)\n",
" # # imglist.append(final)\n",
" # return imglist\n",
" # return final\n",
"\n",
" \n",
" autothreshold = np.clip(np.mean(gray)/1.5, 0, 255)\n",
" thresh1 = cv2.threshold(gray, autothreshold, 255, cv2.THRESH_TOZERO)[1]\n",
" # return thresh1\n",
" ### FLATTEN colours?\n",
" ## do a threshold gradient thing first?\n",
" centervalue = np.clip(np.mean(thresh1)/1.6, 0, 255)\n",
" grayflattened = bwadjustment(thresh1, centervalue)\n",
" # return grayflattened\n",
" thresh = cv2.adaptiveThreshold(grayflattened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 41, 25)\n",
" # autothreshold = np.clip(np.mean(grayflattened)/1.25, 0, 255)\n",
" # thresh = cv2.threshold(grayflattened, autothreshold, 255, cv2.THRESH_BINARY)[1]\n",
" # thresh = cv2.threshold(grayflattened, 0, 255, cv2.THRESH_OTSU)[1]\n",
" # return thresh\n",
" \n",
" kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))\n",
" kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))\n",
" kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))\n",
" \n",
" \n",
" # final = thresh\n",
" final = cv2.morphologyEx(thresh, cv2.MORPH_OPEN,kernel3)\n",
" final = cv2.morphologyEx(final, cv2.MORPH_OPEN,kernel3)\n",
" final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel3)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel3)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel2)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel1)\n",
" # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n",
" # final = 255-cv2.morphologyEx(255-final, cv2.MORPH_CLOSE,kernel)\n",
" \n",
" \n",
" reduced = cv2.morphologyEx(final, cv2.MORPH_DILATE, kernel1)\n",
" reduced = cv2.morphologyEx(reduced, cv2.MORPH_ERODE, kernel2)\n",
" reduced = cv2.morphologyEx(reduced, cv2.MORPH_DILATE, kernel2)\n",
" reduced = cv2.morphologyEx(reduced, cv2.MORPH_ERODE, kernel2)\n",
" reduced = 255 - cv2.morphologyEx(255-reduced, cv2.MORPH_OPEN, kernel3)\n",
" \n",
" # return reduced\n",
" contours, _ = cv2.findContours(255-reduced, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" \n",
" \n",
" mask = np.zeros(final.shape, dtype=np.uint8)\n",
" rects = []\n",
" for contour in contours:\n",
" b = cv2.boundingRect(contour)\n",
" rects.append(b)\n",
" mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 255, thickness=cv2.FILLED)\n",
" \n",
" # return mask\n",
" final = cv2.bitwise_or(final, 255-mask)\n",
" \n",
" return final\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# funtion to correct the median-angle to give it to the cv2.warpaffine() function\n",
"def anglecorrector(angle):\n",
" if 0 <= angle <= 90:\n",
" corrected_angle = angle - 90\n",
" elif -45 <= angle < 0:\n",
" corrected_angle = angle - 90\n",
" elif -90 <= angle < -45:\n",
" corrected_angle = 90 + angle\n",
" return corrected_angle"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):\n",
" borderType = cv2.BORDER_CONSTANT\n",
" out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)\n",
" return out\n",
"\n",
"def mergecontours(contours):\n",
" cont = np.vstack(contours)\n",
" finalcontour = cv2.convexHull(cont)\n",
" return finalcontour"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"def getSkewAngle(cvImage) -> float:\n",
" # Prep image, copy, convert to gray scale, blur, and threshold\n",
" newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))\n",
" # return newImage\n",
" gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)\n",
" blur = cv2.GaussianBlur(gray, (9, 9), 0)\n",
" thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]\n",
"\n",
" # Apply dilate to merge text into meaningful lines/paragraphs.\n",
" # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.\n",
" # But use smaller kernel on Y axis to separate between different blocks of text\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))\n",
" dilate = cv2.dilate(thresh, kernel, iterations=5)\n",
" # return dilate\n",
"\n",
" # Find all contours\n",
" contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n",
" contours = sorted(contours, key = cv2.contourArea, reverse = True)\n",
"\n",
" # Find largest contour and surround in min area box\n",
" largestContour = contours[0]\n",
"\n",
" mergedcontour = mergecontours(contours)\n",
"\n",
" # return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)\n",
" minAreaRect = cv2.minAreaRect(mergedcontour)\n",
" minAreaRect = list(minAreaRect)\n",
" minAreaRect[1] = list(minAreaRect[1])\n",
" if (minAreaRect[1][0] > minAreaRect[1][1]):\n",
" temp = minAreaRect[1][0]\n",
" minAreaRect[1][0] = minAreaRect[1][1]\n",
" minAreaRect[1][1] = temp\n",
" minAreaRect[2] -= 90\n",
" # return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)\n",
" # minAreaRect = cv2.minAreaRect(largestContour)\n",
"\n",
" box = cv2.boxPoints(minAreaRect)\n",
" box = np.intp(box) \n",
" newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)\n",
" # return newImage\n",
"\n",
" # Determine the angle. Convert it to the value that was originally used to obtain skewed image\n",
" angle = minAreaRect[-1]\n",
" # print(angle)\n",
" angle = anglecorrector(angle)+90\n",
" # print(angle)\n",
" return angle\n",
"\n",
"def minboxdeskew(img, fill=(0,0,0)):\n",
" colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)\n",
" angle = getSkewAngle(colourimg)\n",
" padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)\n",
" rotated = mf.rotate(padimg, angle, fill=fill)\n",
" return rotated"
]
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 2315,
"metadata": {},
"outputs": [],
"source": [
"def cropclarifying(image):\n",
" whitedbackground = mf.whiteoutbackground(image)\n",
" whitedbackground = whiteoutbackground(image)\n",
" # return whitedbackground\n",
" \n",
" textrefined = textClarifying(whitedbackground)\n",
"\n",
" textrefined = mf.textClarifying(whitedbackground)\n",
" # return textrefined\n",
" #maybe now is when I put in the line removing function\n",
" \n",
"\n",
" lineout = mf.removeLinesFromText(textrefined)\n",
" \n",
"\n",
" return lineout\n",
" # implement a function that's called refine text\n",
"\n",
"\n",
"\n",
" # implement a function that's called refine text"
]
},
{
"cell_type": "code",
"execution_count": 2316,
"metadata": {},
"outputs": [],
"source": [
"def houghlineprocessing(image):\n",
" croppedanddeskewed, _ = mf.houghlinedeskewandcrop(image)\n",
" \n",
@ -577,7 +154,7 @@
" # return postprocessed\n",
" \n",
" # final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
" final = minboxdeskew(postprocessed, fill=(255,255,255))\n",
" final = mf.receipttextdeskew(postprocessed, fill=(255,255,255))\n",
" \n",
" # final = mf.croptoblack(final)\n",
" \n",
@ -591,19 +168,21 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 2317,
"metadata": {},
"outputs": [],
"source": [
"# prepped, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
"outs = mf.houghlineprocessing(img)\n",
"# outs = houghlinedeskewandcrop(img)\n",
"# outs = outs[0]\n",
"# print(croprect)\n",
"#need to fix premorphCrop. it removes too much"
]
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 2318,
"metadata": {},
"outputs": [],
"source": [
@ -617,7 +196,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 2319,
"metadata": {},
"outputs": [],
"source": [
@ -629,32 +208,63 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 2320,
"metadata": {},
"outputs": [],
"source": [
"# for out in outs:\n",
"# if (out.shape[0] > out.shape[1]):\n",
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"# else:\\\n",
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, width=1000))\n",
"# key = cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()\n",
"# if (key == 107):\n",
"# break\n",
"if (isinstance(outs, np.ndarray)):\n",
" if (outs.shape[0] > outs.shape[1]):\n",
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, height=1350))\n",
" else:\n",
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n",
"else:\n",
" for i, out in enumerate(outs):\n",
" if (out.shape[0] > out.shape[1]):\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
"def showimgs(imgs):\n",
" if (isinstance(imgs, np.ndarray)):\n",
" if (imgs.shape[0] > imgs.shape[1]):\n",
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(imgs, height=1350))\n",
" else:\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(imgs, width=1000))\n",
" else:\n",
" for i, out in enumerate(imgs):\n",
" if (out.shape[0] > out.shape[1]):\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
" else:\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n",
" cv2.waitKey(0)\n",
" cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 2321,
"metadata": {},
"outputs": [],
"source": [
"showimgs(outs)"
]
},
{
"cell_type": "code",
"execution_count": 2322,
"metadata": {},
"outputs": [],
"source": [
"# # for out in outs:\n",
"# # if (out.shape[0] > out.shape[1]):\n",
"# # cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"# # else:\\\n",
"# # cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, width=1000))\n",
"# # key = cv2.waitKey(0)\n",
"# # cv2.destroyAllWindows()\n",
"# # if (key == 107):\n",
"# # break\n",
"# if (isinstance(outs, np.ndarray)):\n",
"# if (outs.shape[0] > outs.shape[1]):\n",
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, height=1350))\n",
"# else:\n",
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n",
"# else:\n",
"# for i, out in enumerate(outs):\n",
"# if (out.shape[0] > out.shape[1]):\n",
"# cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
"# else:\n",
"# cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
}
],

View File

@ -230,6 +230,11 @@ def lineBoundingRect(lines, asRect=False, returnint=False):
y1 = int(y1)
x2 = int(x2)
y2 = int(y2)
x1 = max(0, x1)
x2 = max(0,x2)
y1 = max(0, y1)
y2 = max(0, y2)
return (x1,y1,x2,y2)
# print(lines.max(0))
@ -390,6 +395,7 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle):
rotateddst1 = rotate(preppedimage, rotationangle)
rotatedbaseimage = rotate(baseimage, rotationangle)
sizemultiplier = rotatedbaseimage.shape[0]/rotateddst1.shape[0]
# print(sizemultiplier)
linesP = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 30, None, 90, 30)
@ -409,14 +415,15 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle):
else:
marginlines = vmarginlines
# print(marginlines)
rect = lineBoundingRect(marginlines,asRect=False, returnint=True)
# print(rect)
scaledrect = (int(rect[0]*sizemultiplier), int(rect[1]*sizemultiplier), int(rect[2]*sizemultiplier), int(rect[3]*sizemultiplier))
croppedbaseimage = rotatedbaseimage[scaledrect[1]:scaledrect[3], scaledrect[0]:scaledrect[2], :]
# print(croppedbaseimage.shape)
shrunkencbi, sizemultiplier = ResizeWithAspectRatio(croppedbaseimage, width=1000, retscale=True)
gray = cv2.cvtColor(shrunkencbi, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
@ -438,18 +445,20 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle):
return finalbaseimage, rotationangle
def houghlinedeskewandcrop(image):
dst1, croppedogimage = prepimageforhoughline(image) ## scaling and cropping occurs. need to also return the changes done
# print(dst1.shape)
canny, croppedogimage = prepimageforhoughline(image) ## scaling and cropping occurs. need to also return the changes done
# return canny, croppedogimage
# print(canny.shape)
# print(croppedogimage.shape)
## -----------------finding angle to deskew-----------------
rotationangle = houghlinedeskewangle(dst1)
rotationangle = houghlinedeskewangle(canny)
# print(rotationangle)
# -----------------end of finding angle to deskew-----------------
## -----------------deskewing and then cropping-----------------
return houghlinedeskewthencrop(croppedogimage, dst1, rotationangle)
return houghlinedeskewthencrop(croppedogimage, canny, rotationangle)
def bruteforceprocessrects(greaterrects, lesserrects):
# squaredgrects = np.array([mf.xywhrectto2prect(rect) for rect in greaterrects])
@ -489,40 +498,54 @@ def processrects(greaterrects, lesserrects):
return bruteforceprocessrects(greaterrects, lesserrects)
def whiteoutbackground(image):
# imagecpy = image.copy()
imagecpy = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# blur = cv2.blur(gray, (7,7))
# window = 51
window = gray.shape[1]//8
window = min(gray.shape[1], gray.shape[0])//20
if window % 2 == 0:
window += 1
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 2)
thresh1 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, window, 5)
thresh2 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]
thresh = cv2.bitwise_and(thresh1, thresh2)
# return thresh
# dim = int(min(thresh.shape[0], thresh.shape[1])/400)
dim = 3
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)
# return morphedthresh
contours, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# contours1, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours2, heirarchy = cv2.findContours(morphedthresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
biggestcontour = max(contours, key=cv2.contourArea)
# imagecpy = cv2.drawContours(imagecpy, [biggestcontour], -1, (0,255,0), thickness=3)
# biggestcontour1 = max(contours1, key=cv2.contourArea)
biggestcontour2 = max(contours2, key=cv2.contourArea)
# imagecpy = cv2.drawContours(imagecpy, [biggestcontour1], -1, (0,255,0), thickness=3)
# imagecpy = cv2.drawContours(imagecpy, [biggestcontour2], -1, (0,0,255), thickness=3)
# return imagecpy
blank = np.full(thresh.shape, 255, dtype=np.uint8)
mask = blank.copy()
mask = cv2.drawContours(mask, [biggestcontour], -1, (0,0,0), thickness=cv2.FILLED)
# mask = cv2.drawContours(mask, [biggestcontour1], -1, (0,0,0), thickness=cv2.FILLED)
mask = cv2.drawContours(mask, [biggestcontour2], -1, (0,0,0), thickness=cv2.FILLED)
# return mask
invertmask = 255 - mask
dim = int(min(invertmask.shape[0], invertmask.shape[1])/100)
dim = int(min(invertmask.shape[0], invertmask.shape[1])/200)
# # dim = 21
# print(dim)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
# invertmask = cv2.morphologyEx(invertmask, cv2.MORPH_DILATE, kernel)
mask = 255 - cv2.morphologyEx(invertmask, cv2.MORPH_ERODE, kernel, iterations=1)
# # return mask
# return mask
mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
whitedbackground = cv2.bitwise_or(image, mask)
@ -535,16 +558,21 @@ def whiteoutbackground(image):
maskgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)
invert = 255-maskgray
# dim = 21
# print(dim)
dim = int(min(maskgray.shape[0], maskgray.shape[1])/200)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
morphedmask = cv2.morphologyEx(invert, cv2.MORPH_OPEN, kernel, iterations=10)
morphedmask = cv2.morphologyEx(invert, cv2.MORPH_ERODE, kernel)
dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
morphedmask = cv2.morphologyEx(morphedmask, cv2.MORPH_OPEN, kernel, iterations=7)
# return 255 - morphedmask
morphedmask = 255 - morphedmask
finalmask = cv2.bitwise_or(morphedmask, maskgray)
# return finalmask
# edgecontours, _ = cv2.findContours(255-morphedmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
finalmaskbgr = cv2.cvtColor(finalmask, cv2.COLOR_GRAY2BGR)
@ -639,7 +667,7 @@ def cropclarifying(image):
return lineout
# implement a function that's called refine text
def croptoblack(image, extraborder=10):
invertedimage = cv2.bitwise_not(image)
blackpixels = cv2.findNonZero(invertedimage)
@ -688,12 +716,7 @@ def textClarifying(image):
# return lab[:,:,2]
currentimgofatype = lab[:,:,0] # L-channel: expresses the brightness in the image
# currentimgofatype = lab[:,:,1] # A-channel: expresses variation of color in the image between red and green
# currentimgofatype = lab[:,:,2] # B-channel: expresses variation of color in the image between yellow and blue
# currentimgofatype = hls[:,:,0]
# currentimgofatype = hls[:,:,1]
# currentimgofatype = hls[:,:,2]
# imglist = []
Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)
@ -733,12 +756,14 @@ def textClarifying(image):
morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel6)
morphedthresh = cv2.morphologyEx(morphedthresh, cv2.MORPH_ERODE, kernel7)
reducedthresh = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, adaptivekernel, iterations=1)
# imglist.append(morphedthresh)
# imglist.append(reducedthresh)
anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)
anded2 = cv2.bitwise_and(morphedBthresh, 255-morphedthresh)
anded2 = cv2.bitwise_and(reducedthresh, 255-morphedthresh)
# imglist.append(anded1)
# imglist.append(anded2)
@ -753,7 +778,8 @@ def textClarifying(image):
# image = cv2.rectangle(image, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3)
mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)
bingus = cv2.bitwise_or(goodmorphBthresh, mask)
# bingus = cv2.bitwise_or(goodmorphBthresh, mask)
bingus = cv2.bitwise_or(Bthresh, mask)
# bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel)
# imglist.append(bingus)
# return imglist
@ -1040,9 +1066,7 @@ def receipttextdeskew(img, fill=(0,0,0)):
## ------------------------------Full deskewing and cropping------------------------------
def houghlineprocessing(image):
croppedanddeskewed, _ = houghlinedeskewandcrop(image)
##IF IT DOESN'T CHANGE THE IMAGE (CHANGE THE _ TO SOMETHING USEFUL), THEN CROPCLARIFYING SHOULD JUST DO THE TEXT ISOLATION SECTION AND NOT TRY AND WHITE OUT ANY BACKGROUND.
## IF THERE'S NO CROPPING, MAYBE EVEN JUMP RIGHT TO USING THE EXTERNAL DESKEW FIRST BEFORE TOSSING IT INTO CROPCLARIFYING
postprocessed = cropclarifying(croppedanddeskewed)
# return postprocessed
postprocessed = croptoblack(postprocessed)
@ -1050,18 +1074,16 @@ def houghlineprocessing(image):
# postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)
# return postprocessed
# final = externaldeskew(postprocessed, fill=(255,255,255))
# final = mf.externaldeskew(postprocessed, fill=(255,255,255))
final = receipttextdeskew(postprocessed, fill=(255,255,255))
final = cv2.cvtColor(final, cv2.COLOR_GRAY2BGR)
# final = mf.croptoblack(final)
# cv2.imshow("postprocessed", mf.ResizeWithAspectRatio(postprocessed, 1000))
# cv2.imshow("final", mf.ResizeWithAspectRatio(final, 1000))
# cv2.waitKey(0)
# cv2.destroyAllWindows()
return final
###### DESIRE: CONVERT STUFF RELATED TO THE HOUGHLINE PROCESSING INTO C SINCE IT ONLY REALLY USES OPENCV