From c06408c783165c608443aac51be8156c691e1ca4 Mon Sep 17 00:00:00 2001 From: Ethan Wellenreiter Date: Tue, 14 Nov 2023 17:04:17 -0500 Subject: [PATCH 1/2] Updating the text clarification for a more faithful output Signed-off-by: Ethan Wellenreiter --- code/autocropper/houghlinedevspace.ipynb | 669 +++-------------------- code/autocropper/myfunctions.py | 49 +- 2 files changed, 99 insertions(+), 619 deletions(-) diff --git a/code/autocropper/houghlinedevspace.ipynb b/code/autocropper/houghlinedevspace.ipynb index f6313bc..fe6318d 100644 --- a/code/autocropper/houghlinedevspace.ipynb +++ b/code/autocropper/houghlinedevspace.ipynb @@ -2,9 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 17, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n", + " warnings.warn(_BETA_TRANSFORMS_WARNING)\n", + "/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n", + " warnings.warn(_BETA_TRANSFORMS_WARNING)\n" + ] + } + ], "source": [ "import cv2\n", "import myfunctions as mf\n", @@ -15,595 +26,30 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "img = cv2.imread('./test_images/IMG_7640.jpg')" + "img = cv2.imread('/mnt/dataset/baseimages/1.jpg')" ] }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "def whiteoutbackground(image):\n", - " # imagecpy = image.copy()\n", - " gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", - " \n", - " # blur = cv2.blur(gray, (7,7))\n", - " \n", - " # window = 51\n", - " window = gray.shape[1]//8\n", - " if window % 2 == 0:\n", - " window += 1\n", - " thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 2)\n", - " # return thresh\n", - " \n", - " \n", - " \n", - " contours, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", - " \n", - " biggestcontour = max(contours, key=cv2.contourArea)\n", - " # imagecpy = cv2.drawContours(imagecpy, [biggestcontour], -1, (0,255,0), thickness=3)\n", - " # return imagecpy\n", - " \n", - " blank = np.full(thresh.shape, 255, dtype=np.uint8)\n", - " mask = blank.copy()\n", - " mask = cv2.drawContours(mask, [biggestcontour], -1, (0,0,0), thickness=cv2.FILLED)\n", - " \n", - " invertmask = 255 - mask\n", - " \n", - " \n", - " dim = int(min(invertmask.shape[0], invertmask.shape[1])/100)\n", - " # # dim = 21\n", - " # print(dim)\n", - " kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n", - " # invertmask = cv2.morphologyEx(invertmask, cv2.MORPH_DILATE, kernel)\n", - " mask = 255 - cv2.morphologyEx(invertmask, cv2.MORPH_ERODE, kernel, iterations=1)\n", - " # # return mask\n", - " \n", - " mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)\n", - " whitedbackground = cv2.bitwise_or(image, mask)\n", - " # return whitedbackground\n", - " \n", - " gray2 = cv2.cvtColor(whitedbackground, cv2.COLOR_BGR2GRAY)\n", - "\n", - " canny = cv2.Canny(gray2, 0, 500, None, 3)\n", - " \n", - " maskgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)\n", - " \n", - " \n", - " dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)\n", - " invert = 255-maskgray\n", - " # dim = 21\n", - " # print(dim)\n", - " kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n", - " morphedmask = cv2.morphologyEx(invert, cv2.MORPH_OPEN, kernel, iterations=10)\n", - " # return 255 - morphedmask\n", - " morphedmask = 255 - morphedmask\n", - "\n", - " finalmask = cv2.bitwise_or(morphedmask, maskgray)\n", - " \n", - " # edgecontours, _ = cv2.findContours(255-morphedmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", - " finalmaskbgr = cv2.cvtColor(finalmask, cv2.COLOR_GRAY2BGR)\n", - " # return finalmaskbgr\n", - "\n", - " whitedbackground = cv2.bitwise_or(whitedbackground, finalmaskbgr)\n", - " # return whitedbackground\n", - " \n", - " test = cv2.inpaint(whitedbackground, finalmask, 3, cv2.INPAINT_TELEA)\n", - " return test" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "def fillLines(image):\n", - " edges = cv2.Canny(image, 0, 500, 3)\n", - " # return edges\n", - " maxgap = int(min(image.shape[0], image.shape[1])/30)\n", - " # print(maxgap)\n", - " minlength = int(min(image.shape[0], image.shape[1])/2)\n", - " linesP = cv2.HoughLinesP(edges, 1, np.pi / 180, 50, None, minlength, maxgap)\n", - " for line in linesP:\n", - " l = line[0]\n", - " image = cv2.line(image, (l[0], l[1]), (l[2], l[3]), 0, thickness=1)\n", - " # colourimage = cv2.line(colourimage, (l[0], l[1]), (l[2], l[3]), (0,255,0), thickness=3)\n", - " return image\n", - "\n", - "\n", - "def removeCardinalLines(image, horizontal=False):\n", - " # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n", - " axis = 0\n", - " if (horizontal):\n", - " cardinal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))\n", - " axis = 1\n", - " else:\n", - " cardinal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,15))\n", - " lines = cv2.morphologyEx(image, cv2.MORPH_OPEN, cardinal_kernel, iterations=2)\n", - " # lines = cv2.morphologyEx(lines, cv2.MORPH_OPEN, kernel, iterations=2)\n", - " # return lines\n", - "\n", - " mask = np.zeros(image.shape, dtype=np.uint8)\n", - " contours, _ = cv2.findContours(255-lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", - " # mask = cv2.drawContours(mask, contours, -1, 255, thickness=3)\n", - " # return mask\n", - " \n", - " \n", - " boxes = []\n", - " dims = np.array([])\n", - " rects = []\n", - " for contour in contours:\n", - " rect = cv2.minAreaRect(contour)\n", - " rect = list(rect)\n", - " rect[1]=list(rect[1])\n", - " if (rect[1][axis] > rect[1][1-axis]):\n", - " rect[2] = rect[2] -90\n", - " temp = rect[1][1]\n", - " rect[1][1]=rect[1][0]\n", - " rect[1][0]=temp\n", - " # print(rect)\n", - " rects.append(rect)\n", - " dims = np.append(dims, rect[1][axis])\n", - " \n", - " # box = cv2.boxPoints(rect)\n", - " # box = np.intp(box)\n", - " # boxes.append(box) \n", - " # mask = cv2.drawContours(mask, [box], -1, 255, thickness=2)\n", - " # break\n", - " # return mask\n", - " # print(dims)\n", - " meddim = np.median(dims)\n", - " # print(meddim)\n", - " \n", - " for rect in rects:\n", - " # print(rect[1][axis])\n", - " # print(meddim/2)\n", - " # print(rect[1][1-axis])\n", - " # print(rect[1][axis])\n", - " if (rect[1][axis] < meddim/2 and rect[1][1-axis] > image.shape[axis]/5):\n", - " adjustedrect = rect\n", - " adjustedrect[1][0] += 3\n", - " adjustedrect[1][1] += 3\n", - " box = cv2.boxPoints(adjustedrect)\n", - " box = np.intp(box)\n", - " # boxes.append(box) \n", - " # mask = cv2.drawContours(mask, [box], -1, 255, thickness=2)\n", - " image = cv2.drawContours(image, [box], -1, 255, thickness=cv2.FILLED)\n", - " \n", - " # return mask\n", - " \n", - " return image\n", - "\n", - "\n", - "def removeLinesFromText(image):\n", - " image = removeCardinalLines(image)\n", - " image = removeCardinalLines(image, horizontal=True)\n", - " return image\n", - " \n", - " \n", - " \n", - " colourimage = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)\n", - " imgcopy = image.copy()\n", - " \n", - " \n", - " for i in range(0,7):\n", - " imgcopy = fillLines(imgcopy)\n", - " \n", - " return imgcopy\n", - " \n", - " # maxgap = int(min(image.shape[0], image.shape[1])/20)\n", - " edges = cv2.Canny(imgcopy, 0, 500, 3)\n", - " # return edges\n", - " maxgap = int(min(imgcopy.shape[0], imgcopy.shape[1])/30)\n", - " # print(maxgap)\n", - " minlength = int(min(imgcopy.shape[0], imgcopy.shape[1])/2)\n", - " linesP = cv2.HoughLinesP(edges, 1, np.pi / 180, 50, None, minlength, maxgap)\n", - " \n", - " # print(len(linesP))\n", - " # print(linesP)\n", - " # mask = np.zeros(colourimage.shape, dtype=np.uint8)\n", - "\n", - " \n", - " for line in linesP:\n", - " l = line[0]\n", - " # colourimage = cv2.line(colourimage, (l[0], l[1]), (l[2], l[3]), (0,255,0), thickness=20)\n", - " image = cv2.line(image, (l[0], l[1]), (l[2], l[3]), 255, thickness=20)\n", - " # mask = cv2.line(mask, (l[0], l[1]), (l[2], l[3]), (255,255,255), thickness=3)\n", - " \n", - " # kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (4, 4))\n", - " # image = 255-cv2.morphologyEx(255-image, cv2.MORPH_OPEN, kernel)\n", - " # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))\n", - " # image = 255-cv2.morphologyEx(255-image, cv2.MORPH_DILATE, kernel)\n", - "\n", - " # return colourimage\n", - " return image\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "def reduceColours(x, centering=127):\n", - " a=0.00008\n", - " b=40\n", - " c=256\n", - " x = x.astype(int)\n", - " # value = np.cbrt((x-centering)/a)+centering\n", - " value = -((c+4)/(1+np.exp((x-centering)/b)))+c\n", - " value = np.clip(value, 0, 255)\n", - " return value.astype(np.uint8)\n", - "\n", - "def bwadjustment(image, center=127):\n", - " gray = reduceColours(image,center)\n", - " \n", - " return gray" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "def textClarifying(image):\n", - " \n", - " ## Try using the LAB colour space???\n", - " gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", - " autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)\n", - " \n", - " lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)\n", - " hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)\n", - " \n", - " kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))\n", - " kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))\n", - " kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))\n", - " kernel4 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))\n", - " kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))\n", - " kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))\n", - " kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))\n", - " adaptivekernel = None\n", - " \n", - " # return lab[:,:,2]\n", - "\n", - " currentimgofatype = lab[:,:,0] # L-channel: expresses the brightness in the image\n", - " # currentimgofatype = lab[:,:,1] # A-channel: expresses variation of color in the image between red and green\n", - " # currentimgofatype = lab[:,:,2] # B-channel: expresses variation of color in the image between yellow and blue\n", - " \n", - " # currentimgofatype = hls[:,:,0]\n", - " # currentimgofatype = hls[:,:,1]\n", - " # currentimgofatype = hls[:,:,2]\n", - " # imglist = []\n", - "\n", - " Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)\n", - " \n", - " # return Bthresh\n", - "\n", - " contours, heirarchy = cv2.findContours(255-Bthresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", - " # imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)\n", - " # return imgcopy\n", - " \n", - " boundingboxes = np.empty((len(contours), 4), dtype=int)\n", - "\n", - " for i, contour in enumerate(contours):\n", - " b = cv2.boundingRect(contour)\n", - " boundingboxes[i] = b\n", - " # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)\n", - " # return imgcopy\n", - " \n", - " epsilonvalue = np.median(boundingboxes, axis=0)[3]\n", - " \n", - " adaptivekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(epsilonvalue/15), int(epsilonvalue/15)))\n", - " \n", - " # imglist.append(Bthresh)\n", - " # imglist.append(255-Bthresh)\n", - " \n", - " morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)\n", - " # morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, adaptivekernel, iterations=2)\n", - " goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)\n", - " # goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, adaptivekernel, iterations=3)\n", - " # morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)\n", - " # imglist.append(morphedBthresh)\n", - " # imglist.append(goodmorphBthresh)\n", - " \n", - " \n", - " thresh = cv2.threshold(currentimgofatype, 0, 255, cv2.THRESH_OTSU)[1]\n", - " # imglist.append(thresh)\n", - " \n", - " morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel6)\n", - " morphedthresh = cv2.morphologyEx(morphedthresh, cv2.MORPH_ERODE, kernel7)\n", - " \n", - " \n", - " \n", - " # imglist.append(morphedthresh)\n", - " anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)\n", - " anded2 = cv2.bitwise_and(morphedBthresh, 255-morphedthresh)\n", - " # imglist.append(anded1)\n", - " # imglist.append(anded2)\n", - " \n", - " contours, other = cv2.findContours(anded2, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)\n", - " # print(other)\n", - " \n", - " mask = np.full(gray.shape,fill_value=255, dtype=np.uint8)\n", - " \n", - " for i, contour in enumerate(contours):\n", - " if (other[0][i][2] != -1 and other[0][i][3] == -1):\n", - " b = cv2.boundingRect(contour)\n", - " # image = cv2.rectangle(image, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3)\n", - " mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)\n", - " \n", - " bingus = cv2.bitwise_or(goodmorphBthresh, mask)\n", - " # bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel)\n", - " # imglist.append(bingus)\n", - " # return imglist\n", - " return bingus\n", - " \n", - " # imglist.append(image)\n", - " \n", - " # reversedxor = 255-cv2.bitwise_and(255-Bthresh, 255-morphedthresh)\n", - " # # imglist.append(reversedxor)\n", - " \n", - " # morphedanded = cv2.morphologyEx(anded1, cv2.MORPH_DILATE, kernel5)\n", - " # # imglist.append(morphedanded)\n", - " \n", - " # testout = cv2.bitwise_or(Bthresh, morphedanded)\n", - " # # imglist.append(testout)\n", - " \n", - " # # # blurred = cv2.GaussianBlur(anded, (21,21), 0)\n", - " # # # imglist.append(blurred)\n", - " \n", - " # # # blurthresh = cv2.adaptiveThreshold(255-blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)\n", - " # # # imglist.append(blurthresh)\n", - " \n", - " # # # Bthreshmasked = cv2.bitwise_and(gray, gray, mask=anded)\n", - " # # # imglist.append(Bthreshmasked)\n", - " \n", - " # # # thresh2 = cv2.threshold(Bthreshmasked, autothreshold, 255, cv2.THRESH_BINARY)[1]\n", - " # # # imglist.append(thresh2)\n", - " \n", - " \n", - " \n", - " # # xored = cv2.bitwise_xor(Bthresh, 255-thresh)\n", - " # # # imglist.append(xored)\n", - "\n", - " # # morphxor = cv2.morphologyEx(xored, cv2.MORPH_DILATE, kernel4, iterations=2)\n", - " # # morphxor = cv2.morphologyEx(morphxor, cv2.MORPH_ERODE, kernel5, iterations=2)\n", - " # # # morphxor = cv2.morphologyEx(morphxor, cv2.MORPH_ERODE, kernel2, iterations=2)\n", - " # # # imglist.append(morphxor)\n", - " \n", - " \n", - " # # comboed = cv2.bitwise_or(Bthresh, 255-morphxor)\n", - " # # # imglist.append(comboed)\n", - " # # # maybe SOMETHING CAN BE DONE SINCE THEY KIND OF GET A HALO OF WHITE AROUND THE TEXT\n", - " \n", - " # final = cv2.morphologyEx(testout, cv2.MORPH_OPEN,kernel3)\n", - " # final = cv2.morphologyEx(final, cv2.MORPH_OPEN,kernel3)\n", - " # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel3)\n", - " # # imglist.append(final)\n", - " # return imglist\n", - " # return final\n", - "\n", - " \n", - " autothreshold = np.clip(np.mean(gray)/1.5, 0, 255)\n", - " thresh1 = cv2.threshold(gray, autothreshold, 255, cv2.THRESH_TOZERO)[1]\n", - " # return thresh1\n", - " ### FLATTEN colours?\n", - " ## do a threshold gradient thing first?\n", - " centervalue = np.clip(np.mean(thresh1)/1.6, 0, 255)\n", - " grayflattened = bwadjustment(thresh1, centervalue)\n", - " # return grayflattened\n", - " thresh = cv2.adaptiveThreshold(grayflattened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 41, 25)\n", - " # autothreshold = np.clip(np.mean(grayflattened)/1.25, 0, 255)\n", - " # thresh = cv2.threshold(grayflattened, autothreshold, 255, cv2.THRESH_BINARY)[1]\n", - " # thresh = cv2.threshold(grayflattened, 0, 255, cv2.THRESH_OTSU)[1]\n", - " # return thresh\n", - " \n", - " kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))\n", - " kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))\n", - " kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))\n", - " \n", - " \n", - " # final = thresh\n", - " final = cv2.morphologyEx(thresh, cv2.MORPH_OPEN,kernel3)\n", - " final = cv2.morphologyEx(final, cv2.MORPH_OPEN,kernel3)\n", - " final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel3)\n", - " # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n", - " # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel3)\n", - " # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n", - " # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel2)\n", - " # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n", - " # final = cv2.morphologyEx(final, cv2.MORPH_ERODE,kernel1)\n", - " # final = cv2.morphologyEx(final, cv2.MORPH_DILATE,kernel2)\n", - " # final = 255-cv2.morphologyEx(255-final, cv2.MORPH_CLOSE,kernel)\n", - " \n", - " \n", - " reduced = cv2.morphologyEx(final, cv2.MORPH_DILATE, kernel1)\n", - " reduced = cv2.morphologyEx(reduced, cv2.MORPH_ERODE, kernel2)\n", - " reduced = cv2.morphologyEx(reduced, cv2.MORPH_DILATE, kernel2)\n", - " reduced = cv2.morphologyEx(reduced, cv2.MORPH_ERODE, kernel2)\n", - " reduced = 255 - cv2.morphologyEx(255-reduced, cv2.MORPH_OPEN, kernel3)\n", - " \n", - " # return reduced\n", - " contours, _ = cv2.findContours(255-reduced, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", - " \n", - " \n", - " mask = np.zeros(final.shape, dtype=np.uint8)\n", - " rects = []\n", - " for contour in contours:\n", - " b = cv2.boundingRect(contour)\n", - " rects.append(b)\n", - " mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 255, thickness=cv2.FILLED)\n", - " \n", - " # return mask\n", - " final = cv2.bitwise_or(final, 255-mask)\n", - " \n", - " return final\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "# funtion to correct the median-angle to give it to the cv2.warpaffine() function\n", - "def anglecorrector(angle):\n", - " if 0 <= angle <= 90:\n", - " corrected_angle = angle - 90\n", - " elif -45 <= angle < 0:\n", - " corrected_angle = angle - 90\n", - " elif -90 <= angle < -45:\n", - " corrected_angle = 90 + angle\n", - " return corrected_angle" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):\n", - " borderType = cv2.BORDER_CONSTANT\n", - " out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)\n", - " return out\n", - "\n", - "def mergecontours(contours):\n", - " cont = np.vstack(contours)\n", - " finalcontour = cv2.convexHull(cont)\n", - " return finalcontour" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "def getSkewAngle(cvImage) -> float:\n", - " # Prep image, copy, convert to gray scale, blur, and threshold\n", - " newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))\n", - " # return newImage\n", - " gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)\n", - " blur = cv2.GaussianBlur(gray, (9, 9), 0)\n", - " thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]\n", - "\n", - " # Apply dilate to merge text into meaningful lines/paragraphs.\n", - " # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.\n", - " # But use smaller kernel on Y axis to separate between different blocks of text\n", - " kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))\n", - " dilate = cv2.dilate(thresh, kernel, iterations=5)\n", - " # return dilate\n", - "\n", - " # Find all contours\n", - " contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n", - " contours = sorted(contours, key = cv2.contourArea, reverse = True)\n", - "\n", - " # Find largest contour and surround in min area box\n", - " largestContour = contours[0]\n", - "\n", - " mergedcontour = mergecontours(contours)\n", - "\n", - " # return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)\n", - " minAreaRect = cv2.minAreaRect(mergedcontour)\n", - " minAreaRect = list(minAreaRect)\n", - " minAreaRect[1] = list(minAreaRect[1])\n", - " if (minAreaRect[1][0] > minAreaRect[1][1]):\n", - " temp = minAreaRect[1][0]\n", - " minAreaRect[1][0] = minAreaRect[1][1]\n", - " minAreaRect[1][1] = temp\n", - " minAreaRect[2] -= 90\n", - " # return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)\n", - " # minAreaRect = cv2.minAreaRect(largestContour)\n", - "\n", - " box = cv2.boxPoints(minAreaRect)\n", - " box = np.intp(box) \n", - " newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)\n", - " # return newImage\n", - "\n", - " # Determine the angle. Convert it to the value that was originally used to obtain skewed image\n", - " angle = minAreaRect[-1]\n", - " # print(angle)\n", - " angle = anglecorrector(angle)+90\n", - " # print(angle)\n", - " return angle\n", - "\n", - "def minboxdeskew(img, fill=(0,0,0)):\n", - " colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)\n", - " angle = getSkewAngle(colourimg)\n", - " padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)\n", - " rotated = mf.rotate(padimg, angle, fill=fill)\n", - " return rotated" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "def cropclarifying(image):\n", - " whitedbackground = mf.whiteoutbackground(image)\n", - " # return whitedbackground\n", - " \n", - " textrefined = textClarifying(whitedbackground)\n", - " # return textrefined\n", - " #maybe now is when I put in the line removing function\n", - " \n", - " lineout = mf.removeLinesFromText(textrefined)\n", - " \n", - " return lineout\n", - " # implement a function that's called refine text\n", - "\n", - "\n", - "\n", - "def houghlineprocessing(image):\n", - " croppedanddeskewed, _ = mf.houghlinedeskewandcrop(image)\n", - " \n", - " postprocessed = cropclarifying(croppedanddeskewed)\n", - " # return postprocessed\n", - " postprocessed = mf.croptoblack(postprocessed)\n", - " \n", - " # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n", - " # return postprocessed\n", - " \n", - " # final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n", - " final = minboxdeskew(postprocessed, fill=(255,255,255))\n", - " \n", - " # final = mf.croptoblack(final)\n", - " \n", - " # cv2.imshow(\"postprocessed\", mf.ResizeWithAspectRatio(postprocessed, 1000))\n", - " # cv2.imshow(\"final\", mf.ResizeWithAspectRatio(final, 1000))\n", - " # cv2.waitKey(0)\n", - " # cv2.destroyAllWindows()\n", - " \n", - " return final" - ] - }, - { - "cell_type": "code", - "execution_count": 27, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# prepped, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n", "outs = mf.houghlineprocessing(img)\n", + "# outs = houghlinedeskewandcrop(img)\n", + "# outs = outs[0]\n", "# print(croprect)\n", "#need to fix premorphCrop. it removes too much" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -617,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -629,32 +75,63 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "# for out in outs:\n", - "# if (out.shape[0] > out.shape[1]):\n", - "# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n", - "# else:\\\n", - "# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, width=1000))\n", - "# key = cv2.waitKey(0)\n", - "# cv2.destroyAllWindows()\n", - "# if (key == 107):\n", - "# break\n", - "if (isinstance(outs, np.ndarray)):\n", - " if (outs.shape[0] > outs.shape[1]):\n", - " cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, height=1350))\n", - " else:\n", - " cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n", - "else:\n", - " for i, out in enumerate(outs):\n", - " if (out.shape[0] > out.shape[1]):\n", - " cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n", + "def showimgs(imgs):\n", + " if (isinstance(imgs, np.ndarray)):\n", + " if (imgs.shape[0] > imgs.shape[1]):\n", + " cv2.imshow(\"test\", mf.ResizeWithAspectRatio(imgs, height=1350))\n", " else:\n", - " cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n", - "cv2.waitKey(0)\n", - "cv2.destroyAllWindows()" + " cv2.imshow(\"test\", mf.ResizeWithAspectRatio(imgs, width=1000))\n", + " else:\n", + " for i, out in enumerate(imgs):\n", + " if (out.shape[0] > out.shape[1]):\n", + " cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n", + " else:\n", + " cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n", + " cv2.waitKey(0)\n", + " cv2.destroyAllWindows()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "showimgs(outs)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# # for out in outs:\n", + "# # if (out.shape[0] > out.shape[1]):\n", + "# # cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n", + "# # else:\\\n", + "# # cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, width=1000))\n", + "# # key = cv2.waitKey(0)\n", + "# # cv2.destroyAllWindows()\n", + "# # if (key == 107):\n", + "# # break\n", + "# if (isinstance(outs, np.ndarray)):\n", + "# if (outs.shape[0] > outs.shape[1]):\n", + "# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, height=1350))\n", + "# else:\n", + "# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n", + "# else:\n", + "# for i, out in enumerate(outs):\n", + "# if (out.shape[0] > out.shape[1]):\n", + "# cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n", + "# else:\n", + "# cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n", + "# cv2.waitKey(0)\n", + "# cv2.destroyAllWindows()" ] } ], diff --git a/code/autocropper/myfunctions.py b/code/autocropper/myfunctions.py index 98a1686..f4e9b70 100644 --- a/code/autocropper/myfunctions.py +++ b/code/autocropper/myfunctions.py @@ -230,6 +230,11 @@ def lineBoundingRect(lines, asRect=False, returnint=False): y1 = int(y1) x2 = int(x2) y2 = int(y2) + + x1 = max(0, x1) + x2 = max(0,x2) + y1 = max(0, y1) + y2 = max(0, y2) return (x1,y1,x2,y2) # print(lines.max(0)) @@ -390,6 +395,7 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle): rotateddst1 = rotate(preppedimage, rotationangle) rotatedbaseimage = rotate(baseimage, rotationangle) sizemultiplier = rotatedbaseimage.shape[0]/rotateddst1.shape[0] + # print(sizemultiplier) linesP = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 30, None, 90, 30) @@ -409,14 +415,15 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle): else: marginlines = vmarginlines - + # print(marginlines) rect = lineBoundingRect(marginlines,asRect=False, returnint=True) + # print(rect) scaledrect = (int(rect[0]*sizemultiplier), int(rect[1]*sizemultiplier), int(rect[2]*sizemultiplier), int(rect[3]*sizemultiplier)) croppedbaseimage = rotatedbaseimage[scaledrect[1]:scaledrect[3], scaledrect[0]:scaledrect[2], :] - + # print(croppedbaseimage.shape) shrunkencbi, sizemultiplier = ResizeWithAspectRatio(croppedbaseimage, width=1000, retscale=True) gray = cv2.cvtColor(shrunkencbi, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1] @@ -438,18 +445,20 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle): return finalbaseimage, rotationangle def houghlinedeskewandcrop(image): - dst1, croppedogimage = prepimageforhoughline(image) ## scaling and cropping occurs. need to also return the changes done - # print(dst1.shape) + canny, croppedogimage = prepimageforhoughline(image) ## scaling and cropping occurs. need to also return the changes done + # return canny, croppedogimage + # print(canny.shape) # print(croppedogimage.shape) ## -----------------finding angle to deskew----------------- - rotationangle = houghlinedeskewangle(dst1) + rotationangle = houghlinedeskewangle(canny) + # print(rotationangle) # -----------------end of finding angle to deskew----------------- ## -----------------deskewing and then cropping----------------- - return houghlinedeskewthencrop(croppedogimage, dst1, rotationangle) + return houghlinedeskewthencrop(croppedogimage, canny, rotationangle) def bruteforceprocessrects(greaterrects, lesserrects): # squaredgrects = np.array([mf.xywhrectto2prect(rect) for rect in greaterrects]) @@ -639,7 +648,7 @@ def cropclarifying(image): return lineout # implement a function that's called refine text - + def croptoblack(image, extraborder=10): invertedimage = cv2.bitwise_not(image) blackpixels = cv2.findNonZero(invertedimage) @@ -688,12 +697,7 @@ def textClarifying(image): # return lab[:,:,2] currentimgofatype = lab[:,:,0] # L-channel: expresses the brightness in the image - # currentimgofatype = lab[:,:,1] # A-channel: expresses variation of color in the image between red and green - # currentimgofatype = lab[:,:,2] # B-channel: expresses variation of color in the image between yellow and blue - - # currentimgofatype = hls[:,:,0] - # currentimgofatype = hls[:,:,1] - # currentimgofatype = hls[:,:,2] + # imglist = [] Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35) @@ -733,12 +737,14 @@ def textClarifying(image): morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel6) morphedthresh = cv2.morphologyEx(morphedthresh, cv2.MORPH_ERODE, kernel7) + reducedthresh = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, adaptivekernel, iterations=1) # imglist.append(morphedthresh) + # imglist.append(reducedthresh) anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh) - anded2 = cv2.bitwise_and(morphedBthresh, 255-morphedthresh) + anded2 = cv2.bitwise_and(reducedthresh, 255-morphedthresh) # imglist.append(anded1) # imglist.append(anded2) @@ -753,7 +759,8 @@ def textClarifying(image): # image = cv2.rectangle(image, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3) mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED) - bingus = cv2.bitwise_or(goodmorphBthresh, mask) + # bingus = cv2.bitwise_or(goodmorphBthresh, mask) + bingus = cv2.bitwise_or(Bthresh, mask) # bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel) # imglist.append(bingus) # return imglist @@ -1040,9 +1047,7 @@ def receipttextdeskew(img, fill=(0,0,0)): ## ------------------------------Full deskewing and cropping------------------------------ def houghlineprocessing(image): croppedanddeskewed, _ = houghlinedeskewandcrop(image) - ##IF IT DOESN'T CHANGE THE IMAGE (CHANGE THE _ TO SOMETHING USEFUL), THEN CROPCLARIFYING SHOULD JUST DO THE TEXT ISOLATION SECTION AND NOT TRY AND WHITE OUT ANY BACKGROUND. - ## IF THERE'S NO CROPPING, MAYBE EVEN JUMP RIGHT TO USING THE EXTERNAL DESKEW FIRST BEFORE TOSSING IT INTO CROPCLARIFYING - + postprocessed = cropclarifying(croppedanddeskewed) # return postprocessed postprocessed = croptoblack(postprocessed) @@ -1050,18 +1055,16 @@ def houghlineprocessing(image): # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR) # return postprocessed - # final = externaldeskew(postprocessed, fill=(255,255,255)) + # final = mf.externaldeskew(postprocessed, fill=(255,255,255)) final = receipttextdeskew(postprocessed, fill=(255,255,255)) - final = cv2.cvtColor(final, cv2.COLOR_GRAY2BGR) - # final = mf.croptoblack(final) - + # cv2.imshow("postprocessed", mf.ResizeWithAspectRatio(postprocessed, 1000)) # cv2.imshow("final", mf.ResizeWithAspectRatio(final, 1000)) # cv2.waitKey(0) # cv2.destroyAllWindows() - + return final ###### DESIRE: CONVERT STUFF RELATED TO THE HOUGHLINE PROCESSING INTO C SINCE IT ONLY REALLY USES OPENCV \ No newline at end of file From 70cabaabd49716b6f82e5db4df80acb69e415c19 Mon Sep 17 00:00:00 2001 From: Ethan Wellenreiter Date: Tue, 14 Nov 2023 19:06:18 -0500 Subject: [PATCH 2/2] Improved background whiteout Signed-off-by: Ethan Wellenreiter --- code/autocropper/houghlinedevspace.ipynb | 175 ++++++++++++++++++++--- code/autocropper/myfunctions.py | 43 ++++-- 2 files changed, 185 insertions(+), 33 deletions(-) diff --git a/code/autocropper/houghlinedevspace.ipynb b/code/autocropper/houghlinedevspace.ipynb index fe6318d..eb860d9 100644 --- a/code/autocropper/houghlinedevspace.ipynb +++ b/code/autocropper/houghlinedevspace.ipynb @@ -2,20 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2312, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n", - " warnings.warn(_BETA_TRANSFORMS_WARNING)\n", - "/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n", - " warnings.warn(_BETA_TRANSFORMS_WARNING)\n" - ] - } - ], + "outputs": [], "source": [ "import cv2\n", "import myfunctions as mf\n", @@ -26,16 +15,160 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 2313, "metadata": {}, "outputs": [], "source": [ - "img = cv2.imread('/mnt/dataset/baseimages/1.jpg')" + "# img = cv2.imread('/mnt/dataset/baseimages/1.jpg')\n", + "img = cv2.imread('/mnt/code/autocropper/test_images/IMG_7594.jpg')" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2314, + "metadata": {}, + "outputs": [], + "source": [ + "def whiteoutbackground(image):\n", + " imagecpy = image.copy()\n", + " gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", + " \n", + " # blur = cv2.blur(gray, (7,7))\n", + " \n", + " # window = 51\n", + " window = min(gray.shape[1], gray.shape[0])//20\n", + " if window % 2 == 0:\n", + " window += 1\n", + " thresh1 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, window, 5)\n", + " thresh2 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]\n", + " thresh = cv2.bitwise_and(thresh1, thresh2)\n", + " # return thresh\n", + "\n", + " # dim = int(min(thresh.shape[0], thresh.shape[1])/400)\n", + " dim = 3\n", + " kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n", + " morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)\n", + " # return morphedthresh\n", + " \n", + " \n", + " \n", + " # contours1, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", + " contours2, heirarchy = cv2.findContours(morphedthresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", + " \n", + " # biggestcontour1 = max(contours1, key=cv2.contourArea)\n", + " biggestcontour2 = max(contours2, key=cv2.contourArea)\n", + " # imagecpy = cv2.drawContours(imagecpy, [biggestcontour1], -1, (0,255,0), thickness=3)\n", + " # imagecpy = cv2.drawContours(imagecpy, [biggestcontour2], -1, (0,0,255), thickness=3)\n", + " # return imagecpy\n", + " \n", + " blank = np.full(thresh.shape, 255, dtype=np.uint8)\n", + " mask = blank.copy()\n", + " # mask = cv2.drawContours(mask, [biggestcontour1], -1, (0,0,0), thickness=cv2.FILLED)\n", + " mask = cv2.drawContours(mask, [biggestcontour2], -1, (0,0,0), thickness=cv2.FILLED)\n", + "\n", + " # return mask\n", + "\n", + " invertmask = 255 - mask\n", + " \n", + " \n", + " dim = int(min(invertmask.shape[0], invertmask.shape[1])/200)\n", + " # # dim = 21\n", + " # print(dim)\n", + " kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n", + " # invertmask = cv2.morphologyEx(invertmask, cv2.MORPH_DILATE, kernel)\n", + " mask = 255 - cv2.morphologyEx(invertmask, cv2.MORPH_ERODE, kernel, iterations=1)\n", + " # return mask\n", + " \n", + " mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)\n", + " whitedbackground = cv2.bitwise_or(image, mask)\n", + " # return whitedbackground\n", + " \n", + " gray2 = cv2.cvtColor(whitedbackground, cv2.COLOR_BGR2GRAY)\n", + "\n", + " canny = cv2.Canny(gray2, 0, 500, None, 3)\n", + " \n", + " maskgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)\n", + " \n", + " \n", + "\n", + " invert = 255-maskgray\n", + " # dim = 21\n", + " # print(dim)\n", + " dim = int(min(maskgray.shape[0], maskgray.shape[1])/200)\n", + " kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n", + " morphedmask = cv2.morphologyEx(invert, cv2.MORPH_ERODE, kernel)\n", + " dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)\n", + " kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n", + " morphedmask = cv2.morphologyEx(morphedmask, cv2.MORPH_OPEN, kernel, iterations=7)\n", + " # return 255 - morphedmask\n", + " morphedmask = 255 - morphedmask\n", + "\n", + " finalmask = cv2.bitwise_or(morphedmask, maskgray)\n", + " # return finalmask\n", + " \n", + " # edgecontours, _ = cv2.findContours(255-morphedmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", + " finalmaskbgr = cv2.cvtColor(finalmask, cv2.COLOR_GRAY2BGR)\n", + " # return finalmaskbgr\n", + "\n", + " whitedbackground = cv2.bitwise_or(whitedbackground, finalmaskbgr)\n", + " # return whitedbackground\n", + " \n", + " test = cv2.inpaint(whitedbackground, finalmask, 3, cv2.INPAINT_TELEA)\n", + " return test" + ] + }, + { + "cell_type": "code", + "execution_count": 2315, + "metadata": {}, + "outputs": [], + "source": [ + "def cropclarifying(image):\n", + " whitedbackground = whiteoutbackground(image)\n", + " # return whitedbackground\n", + "\n", + " textrefined = mf.textClarifying(whitedbackground)\n", + " # return textrefined\n", + " #maybe now is when I put in the line removing function\n", + "\n", + " lineout = mf.removeLinesFromText(textrefined)\n", + "\n", + " return lineout\n", + " # implement a function that's called refine text" + ] + }, + { + "cell_type": "code", + "execution_count": 2316, + "metadata": {}, + "outputs": [], + "source": [ + "def houghlineprocessing(image):\n", + " croppedanddeskewed, _ = mf.houghlinedeskewandcrop(image)\n", + " \n", + " postprocessed = cropclarifying(croppedanddeskewed)\n", + " # return postprocessed\n", + " postprocessed = mf.croptoblack(postprocessed)\n", + " \n", + " # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n", + " # return postprocessed\n", + " \n", + " # final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n", + " final = mf.receipttextdeskew(postprocessed, fill=(255,255,255))\n", + " \n", + " # final = mf.croptoblack(final)\n", + " \n", + " # cv2.imshow(\"postprocessed\", mf.ResizeWithAspectRatio(postprocessed, 1000))\n", + " # cv2.imshow(\"final\", mf.ResizeWithAspectRatio(final, 1000))\n", + " # cv2.waitKey(0)\n", + " # cv2.destroyAllWindows()\n", + " \n", + " return final" + ] + }, + { + "cell_type": "code", + "execution_count": 2317, "metadata": {}, "outputs": [], "source": [ @@ -49,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2318, "metadata": {}, "outputs": [], "source": [ @@ -63,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2319, "metadata": {}, "outputs": [], "source": [ @@ -75,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2320, "metadata": {}, "outputs": [], "source": [ @@ -97,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2321, "metadata": {}, "outputs": [], "source": [ @@ -106,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2322, "metadata": {}, "outputs": [], "source": [ diff --git a/code/autocropper/myfunctions.py b/code/autocropper/myfunctions.py index f4e9b70..f937da8 100644 --- a/code/autocropper/myfunctions.py +++ b/code/autocropper/myfunctions.py @@ -498,40 +498,54 @@ def processrects(greaterrects, lesserrects): return bruteforceprocessrects(greaterrects, lesserrects) def whiteoutbackground(image): - # imagecpy = image.copy() + imagecpy = image.copy() gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # blur = cv2.blur(gray, (7,7)) # window = 51 - window = gray.shape[1]//8 + window = min(gray.shape[1], gray.shape[0])//20 if window % 2 == 0: window += 1 - thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 2) + thresh1 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, window, 5) + thresh2 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1] + thresh = cv2.bitwise_and(thresh1, thresh2) # return thresh + + # dim = int(min(thresh.shape[0], thresh.shape[1])/400) + dim = 3 + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim)) + morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel) + # return morphedthresh - contours, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + # contours1, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + contours2, heirarchy = cv2.findContours(morphedthresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - biggestcontour = max(contours, key=cv2.contourArea) - # imagecpy = cv2.drawContours(imagecpy, [biggestcontour], -1, (0,255,0), thickness=3) + # biggestcontour1 = max(contours1, key=cv2.contourArea) + biggestcontour2 = max(contours2, key=cv2.contourArea) + # imagecpy = cv2.drawContours(imagecpy, [biggestcontour1], -1, (0,255,0), thickness=3) + # imagecpy = cv2.drawContours(imagecpy, [biggestcontour2], -1, (0,0,255), thickness=3) # return imagecpy blank = np.full(thresh.shape, 255, dtype=np.uint8) mask = blank.copy() - mask = cv2.drawContours(mask, [biggestcontour], -1, (0,0,0), thickness=cv2.FILLED) - + # mask = cv2.drawContours(mask, [biggestcontour1], -1, (0,0,0), thickness=cv2.FILLED) + mask = cv2.drawContours(mask, [biggestcontour2], -1, (0,0,0), thickness=cv2.FILLED) + + # return mask + invertmask = 255 - mask - dim = int(min(invertmask.shape[0], invertmask.shape[1])/100) + dim = int(min(invertmask.shape[0], invertmask.shape[1])/200) # # dim = 21 # print(dim) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim)) # invertmask = cv2.morphologyEx(invertmask, cv2.MORPH_DILATE, kernel) mask = 255 - cv2.morphologyEx(invertmask, cv2.MORPH_ERODE, kernel, iterations=1) - # # return mask + # return mask mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) whitedbackground = cv2.bitwise_or(image, mask) @@ -544,16 +558,21 @@ def whiteoutbackground(image): maskgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) - dim = int(min(maskgray.shape[0], maskgray.shape[1])/50) + invert = 255-maskgray # dim = 21 # print(dim) + dim = int(min(maskgray.shape[0], maskgray.shape[1])/200) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim)) - morphedmask = cv2.morphologyEx(invert, cv2.MORPH_OPEN, kernel, iterations=10) + morphedmask = cv2.morphologyEx(invert, cv2.MORPH_ERODE, kernel) + dim = int(min(maskgray.shape[0], maskgray.shape[1])/50) + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim)) + morphedmask = cv2.morphologyEx(morphedmask, cv2.MORPH_OPEN, kernel, iterations=7) # return 255 - morphedmask morphedmask = 255 - morphedmask finalmask = cv2.bitwise_or(morphedmask, maskgray) + # return finalmask # edgecontours, _ = cv2.findContours(255-morphedmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) finalmaskbgr = cv2.cvtColor(finalmask, cv2.COLOR_GRAY2BGR)