Basic update/bug fix for autocropping.

Still doesn't make the image binary (black and white with
only the text being black).

Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
This commit is contained in:
Ethan Wellenreiter 2023-12-10 15:33:10 -05:00
parent 7fe6adad99
commit 613e7780b8
2 changed files with 536 additions and 109 deletions

View File

@ -2,20 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 671,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
"/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
]
}
],
"outputs": [],
"source": [
"import cv2\n",
"import myfunctions as mf\n",
@ -26,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 672,
"metadata": {},
"outputs": [],
"source": [
@ -66,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 673,
"metadata": {},
"outputs": [],
"source": [
@ -88,7 +77,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 674,
"metadata": {},
"outputs": [],
"source": [
@ -102,18 +91,18 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 675,
"metadata": {},
"outputs": [],
"source": [
"img = cv2.imread('/mnt/dataset/baseimages/12.jpg')\n",
"img = cv2.imread('/mnt/dataset/baseimages/21.jpg')\n",
"# img = cv2.imread('/mnt/code/autocropper/test_images/IMG_7605.jpg')\n",
"testall = False"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 676,
"metadata": {},
"outputs": [],
"source": [
@ -333,7 +322,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 677,
"metadata": {},
"outputs": [],
"source": [
@ -472,7 +461,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 678,
"metadata": {},
"outputs": [],
"source": [
@ -493,17 +482,363 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 679,
"metadata": {},
"outputs": [],
"source": [
"def houghlinepcrop(baseimage, preppedimage, scalingmultiplier, returnrect=False):\n",
" hminlength = int(preppedimage.shape[1]/30)\n",
" hmaxgap = int(preppedimage.shape[1]/35)\n",
" \n",
" vminlength = int(preppedimage.shape[0]/25)\n",
" vmaxgap = int(preppedimage.shape[0]/35)\n",
" # print(dim)\n",
" \n",
" vrotatedlines = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 20, None, vminlength, vmaxgap)\n",
" hrotatedlines = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 20, None, hminlength, hmaxgap)\n",
" # if rotatedlines is None:\n",
" # print(\"hi\")\n",
" # return baseimage\n",
" vmarginlines = mf.WithinXDegrees(vrotatedlines, 7)\n",
" hmarginlines = mf.WithinXDegrees(hrotatedlines, 7, baseangle=90)\n",
" if (len(vmarginlines) != 0):\n",
" # print(\"hi\")\n",
" vrect = mf.lineBoundingRect(vmarginlines,asRect=False, returnint=True)\n",
" hmarginlines = mf.lineswithinrange(hmarginlines, (vrect[0], vrect[1]), (vrect[2],vrect[3]), x=True, y=False)\n",
" # print(vmarginlines)\n",
" # print(hmarginlines)\n",
" marginlines = np.append(vmarginlines, hmarginlines, axis=0)\n",
" \n",
" colourdst = cv2.cvtColor(preppedimage, cv2.COLOR_GRAY2BGR)\n",
" # print(len(marginlines))\n",
" # up = False\n",
" # if len(marginlines) != 0:\n",
" # # print(\"hi\")\n",
" # up = True\n",
" # for l in marginlines:\n",
" # cv2.line(colourdst, (int(l[0]), int(l[1])), (int(l[2]), int(l[3])), (0,0,255), 3, cv2.LINE_AA)\n",
" # # if up:\n",
" # # print(\"good\")\n",
" # # else:\n",
" # # print(\"bad\")\n",
" \n",
" # # print(\"bye\")\n",
" # return colourdst\n",
" rect = mf.lineBoundingRect(marginlines,asRect=False, returnint=True)\n",
" scaledrect = (int(rect[0]*scalingmultiplier), int(rect[1]*scalingmultiplier), int(rect[2]*scalingmultiplier), int(rect[3]*scalingmultiplier))\n",
" croppedbaseimage = baseimage[scaledrect[1]:scaledrect[3], scaledrect[0]:scaledrect[2], :]\n",
" if returnrect:\n",
" croprect = (scaledrect[0], scaledrect[1], scaledrect[2]-scaledrect[0], scaledrect[3]-scaledrect[1])\n",
" return croppedbaseimage, croprect\n",
" return croppedbaseimage"
]
},
{
"cell_type": "code",
"execution_count": 680,
"metadata": {},
"outputs": [],
"source": [
"def contourcrop(baseimage):\n",
" # return baseimage\n",
" shrunkencbi, sizemultiplier = mf.ResizeWithAspectRatio(baseimage, width=1000, retscale=True)\n",
" # gray = cv2.cvtColor(shrunkencbi, cv2.COLOR_BGR2GRAY)\n",
" gray = cv2.cvtColor(shrunkencbi, cv2.COLOR_BGR2LAB)[:,:,0]\n",
" # thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]\n",
" thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_TRIANGLE)[1]\n",
" # window = gray.shape[1]//7\n",
" # if window % 2 == 0:\n",
" # window += 1\n",
" # thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 10)\n",
" # return thresh\n",
"\n",
"\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))\n",
" # thresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel, iterations=2)\n",
" thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)\n",
" # return thresh\n",
" \n",
" contours, heirarchy = cv2.findContours(thresh,cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n",
" \n",
" # temp = cv2.drawContours(shrunkencbi, contours, -1, (0,255,0), thickness=3)\n",
" # biggestcontour = max(contours, key=cv2.contourArea)\n",
" # temp = cv2.drawContours(shrunkencbi, [biggestcontour], -1, (0,255,0), thickness=3)\n",
" \n",
" # return temp\n",
" \n",
" mx = (0,0,0,0)\n",
" mx_area = 0\n",
"\n",
" for i, cont in enumerate(contours):\n",
" rect = cv2.boundingRect(cont)\n",
" area = mf.rectArea(rect)\n",
" if (area > mx_area):\n",
" mx = rect\n",
" mx_area = area\n",
"\n",
" \n",
" scaledmx = (int(mx[0]*sizemultiplier), int(mx[1]*sizemultiplier), int(mx[2]*sizemultiplier), int(mx[3]*sizemultiplier))\n",
" finalbaseimage = baseimage[scaledmx[1]:scaledmx[1]+scaledmx[3], scaledmx[0]:scaledmx[0]+scaledmx[2], :]\n",
" return finalbaseimage"
]
},
{
"cell_type": "code",
"execution_count": 681,
"metadata": {},
"outputs": [],
"source": [
"def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle, mask): \n",
" rotatedbaseimage = mf.rotatewithexactpadding(baseimage, rotationangle, fill=(0,0,0))\n",
" rotateddst1 = mf.rotatewithexactpadding(preppedimage, rotationangle, fill=(0,0,0))\n",
" sizemultiplier = rotatedbaseimage.shape[0]/rotateddst1.shape[0]\n",
" \n",
" \n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" shrunkmask = cv2.morphologyEx(mask, cv2.MORPH_ERODE, kernel)\n",
" \n",
" rotatedmask = mf.rotatewithexactpadding(shrunkmask, rotationangle, fill=0)\n",
" \n",
" # rotatedbaseimage = cv2.inpaint(rotatedbaseimage, rotatedmask, 3, cv2.INPAINT_TELEA)\n",
" # return rotatedmask, 5\n",
" # return rotatedbaseimage, 5\n",
" \n",
"\n",
" croppedbaseimage, croprect = houghlinepcrop(rotatedbaseimage, rotateddst1, sizemultiplier, returnrect=True)\n",
" # return croppedbaseimage, 5\n",
" croppedmask = rotatedmask[croprect[1]:croprect[1]+croprect[3], croprect[0]:croprect[0]+croprect[2]]\n",
" \n",
" # return croppedmask, 5\n",
" # return croppedbaseimage, 5\n",
" # t = time.time()\n",
" adjustedimage = cv2.inpaint(croppedbaseimage, 255-croppedmask, 3, cv2.INPAINT_TELEA)\n",
" # print(\"inpaint time:\", time.time()-t)\n",
" \n",
" # return adjustedimage, 5\n",
"\n",
" finalbaseimage = contourcrop(adjustedimage)\n",
" # finalbaseimage = croppedbaseimage\n",
" \n",
" return finalbaseimage, rotationangle"
]
},
{
"cell_type": "code",
"execution_count": 682,
"metadata": {},
"outputs": [],
"source": [
"def premorphCrop(image, mask):\n",
" colourmask = cv2.cvtColor(255-mask, cv2.COLOR_GRAY2BGR)\n",
" image = cv2.bitwise_or(image, colourmask)\n",
" # croppedimage \n",
" \n",
" croprect = mf.croptoblack(255-mask, extraborder=0, returnrect=True)\n",
" \n",
" croppedimage = image[croprect[1]:croprect[1]+croprect[3], croprect[0]:croprect[0]+croprect[2], :]\n",
" # return croppedimage, (0,0,0,0)\n",
" #IDEA, try cropping the image for triangle threshold. not too sure how triangle threshold actually works\n",
" ogshape = croppedimage.shape\n",
" miniimage = mf.ResizeWithAspectRatio(croppedimage, width=1000) if (ogshape[1] > ogshape[0]) else mf.ResizeWithAspectRatio(croppedimage, height=1000)\n",
" # miniimage = croppedimage\n",
" \n",
" # convert to grayscale\n",
" # gray = cv2.cvtColor(miniimage,cv2.COLOR_BGR2GRAY)\n",
" gray = cv2.cvtColor(miniimage, cv2.COLOR_BGR2LAB)[:,:,0]\n",
"\n",
" window = gray.shape[1]//8\n",
" if window % 2 == 0:\n",
" window += 1\n",
" # print(window)\n",
" # gray = cv2.blur(gray, (11,11))\n",
"\n",
" # threshold\n",
" # thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\n",
" # thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]\n",
" # thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_TRIANGLE)[1]\n",
"\n",
" thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, window, 2)\n",
" \n",
" \n",
" # thresh = cv2.bitwise_and(thresh, thresh1)\n",
" # thresh = cv2.bitwise_or(thresh, 255-mask)\n",
" \n",
" \n",
"\n",
" # return thresh, (0,0,0,0)\n",
"\n",
" # apply morphology\n",
" kernel = np.ones((3,3), np.uint8)\n",
" morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel, iterations=2)\n",
" # morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
" kernel = np.ones((9,9), np.uint8)\n",
" morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel, iterations=1)\n",
" # kernel = np.ones((3,3), np.uint8)\n",
" # morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)\n",
"\n",
"\n",
" morph = cv2.resize(morph, (ogshape[1], ogshape[0]))\n",
" # morph = cv2.bitwise_and(bigbinary, mask)\n",
" # return morph, (0,0,0,0)\n",
" \n",
"\n",
" \n",
"\n",
"\n",
" # get largest contour\n",
" contours, heirarchy = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
" big_contour = max(contours, key=cv2.contourArea)\n",
" \n",
" # temp = cv2.drawContours(croppedimage, [big_contour], -1, (255,0,0), thickness=2)\n",
" # return temp, (0,0,0,0)\n",
"\n",
"\n",
" # get bounding box\n",
" x,y,w,h = cv2.boundingRect(big_contour)\n",
" x += croprect[0]\n",
" y += croprect[1]\n",
"\n",
" # draw filled contour on black background\n",
" mask = np.zeros((ogshape[0], ogshape[1]), dtype=np.uint8)\n",
" # mask = cv2.merge([mask,mask,mask])\n",
" # mask = cv2.blur(mask,(121,121))\n",
" cv2.drawContours(mask, [big_contour], -1, 255, cv2.FILLED)\n",
" mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)\n",
" borderType = cv2.BORDER_CONSTANT\n",
" mask = cv2.copyMakeBorder(mask, croprect[1], (image.shape[0]-(croprect[1]+croprect[3])), croprect[0], (image.shape[1]-(croprect[0]+croprect[2])), borderType, None, (0,0,0))\n",
" # return mask, (0,0,0,0)\n",
"\n",
" # apply mask to input\n",
" result1 = image.copy()\n",
" mask = cv2.blur(mask,(3,3))\n",
" result1 = cv2.bitwise_and(result1, mask)\n",
"\n",
" # crop result\n",
" result2 = result1[y:y+h, x:x+w]\n",
" return result2, (x,y,w,h)"
]
},
{
"cell_type": "code",
"execution_count": 683,
"metadata": {},
"outputs": [],
"source": [
"def prepimageforhoughline(image, returnrect=True):\n",
" prepped, scaler, hp, vp, mask = mf.squareandthenresize(image, fill=255, width=1000, returnscalerinfo=True, returnmask=True)\n",
" ogpreppedshape = prepped.shape\n",
" # t = time.time()\n",
" prepped, croprect = premorphCrop(prepped, mask)\n",
" # print(\"premorphCrop time:\", time.time()-t)\n",
" # return prepped\n",
" if (prepped.shape[1] > prepped.shape[0]):\n",
" prepped, preppedscaler = mf.ResizeWithAspectRatio(prepped, width=1000, retscale=True)\n",
" else:\n",
" prepped, preppedscaler = mf.ResizeWithAspectRatio(prepped, height=1000, retscale=True)\n",
" finalcroprect = (int(croprect[0]*scaler - hp), int(croprect[1]*scaler - vp), int(croprect[2]*scaler), int(croprect[3]*scaler))\n",
" gray1 = cv2.cvtColor(prepped, cv2.COLOR_BGR2GRAY)\n",
" padding=30\n",
" gray1 = mf.padWithColour(gray1, hpadding=padding, vpadding=padding, fill=0)\n",
" # return gray1\n",
" \n",
"\n",
" dst1 = cv2.Canny(gray1, 0, 500, None, 3)\n",
" # return dst1\n",
"\n",
" \n",
" kernel = np.ones((5,5), np.uint8)\n",
" out = cv2.morphologyEx(dst1, cv2.MORPH_DILATE, kernel)\n",
" out = cv2.blur(out, (5,5))\n",
" kernel = np.ones((6,6), np.uint8)\n",
" dst1 = cv2.morphologyEx(out, cv2.MORPH_ERODE, kernel)\n",
" # return dst1\n",
"\n",
" dst1 = cv2.Canny(dst1, 0, 500, None, 3)\n",
" dst1 = dst1[padding:-padding, padding:-padding]\n",
" # return dst1\n",
" \n",
" accompaniedimage = image[finalcroprect[1]:finalcroprect[1]+finalcroprect[3], finalcroprect[0]:finalcroprect[0]+finalcroprect[2], :]\n",
" if returnrect:\n",
" borderType = cv2.BORDER_CONSTANT\n",
" preppadding = [croprect[0], croprect[1], ogpreppedshape[1]-(croprect[0]+croprect[2]), ogpreppedshape[0]-(croprect[1]+croprect[3])]\n",
" preppadding = [int(s/preppedscaler) for s in preppadding]\n",
" # print(preppadding)\n",
" paddedprepped = cv2.copyMakeBorder(dst1, preppadding[1], preppadding[3], preppadding[0], preppadding[2], borderType, 0)\n",
" \n",
" squaredimage = mf.squarepad(image, fill=0)\n",
" mask = np.full((image.shape[0],image.shape[1]), fill_value=255, dtype=np.uint8)\n",
" mask = mf.squarepad(mask, fill=0)\n",
" \n",
" # print(finalcroprect)\n",
" \n",
" return dst1, accompaniedimage, paddedprepped, squaredimage, mask, finalcroprect\n",
" else:\n",
" return dst1, accompaniedimage"
]
},
{
"cell_type": "code",
"execution_count": 684,
"metadata": {},
"outputs": [],
"source": [
"def houghlinedeskewandcrop(image):\n",
" croppedcanny, croppedimage, canny, squaredimage, ogimagemask, rect = prepimageforhoughline(image, returnrect=True) ## scaling and cropping occurs. need to also return the changes done\n",
" # return prepimageforhoughline(image, returnrect=True), 5\n",
" # return canny, ogimage\n",
" # print(canny.shape)\n",
" # print(croppedogimage.shape)\n",
" # return croppedimage, 5\n",
"\n",
" ## -----------------finding angle to deskew-----------------\n",
" rotationangle = mf.houghlinedeskewangle(croppedcanny)\n",
" # print(croppedcanny.shape)\n",
" # print(abs(rotationangle))\n",
" if (croppedcanny.shape[0] > croppedcanny.shape[1]):\n",
" if (rotationangle > 45):\n",
" rotationangle -= 90\n",
" elif rotationangle < -45:\n",
" rotationangle += 90\n",
" # print(rotationangle)\n",
" # elif (croppedcanny.shape[1] > croppedcanny.shape[0]):\n",
" # if (rotationangle > 45):\n",
" # rotationangle -= 90\n",
" # elif rotationangle < -45:\n",
" # rotationangle += 90\n",
" # print(rotationangle)\n",
" \n",
" # return croppedimage, 5\n",
"\n",
" \n",
" # rotatorrect = findcroprectforangle(rect, angle)\n",
"\n",
" # -----------------end of finding angle to deskew-----------------\n",
"\n",
" ## -----------------deskewing and then cropping-----------------\n",
" outimg, angle = houghlinedeskewthencrop(squaredimage, canny, rotationangle, ogimagemask)\n",
" return outimg, angle"
]
},
{
"cell_type": "code",
"execution_count": 685,
"metadata": {},
"outputs": [],
"source": [
"def houghlineprocessing(image):\n",
" croppedanddeskewed, angle = mf.houghlinedeskewandcrop(image)\n",
" croppedanddeskewed, angle = houghlinedeskewandcrop(image)\n",
" # return croppedanddeskewed\n",
" \n",
" \n",
" # postprocessed = cropclarifying(croppedanddeskewed)\n",
" postprocessed = croppedanddeskewed\n",
" # postprocessed = croppedanddeskewed\n",
" # return postprocessed\n",
" # postprocessed = mf.croptoblack(postprocessed)\n",
" \n",
@ -512,7 +847,7 @@
" \n",
" # final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
" # rotangle = mf.receipttextdeskew(postprocessed, fill=(255,255,255), returnangle=True)\n",
" final = postprocessed\n",
" # final = postprocessed\n",
" \n",
" \n",
" # final = mf.croptoblack(final)\n",
@ -522,12 +857,13 @@
" # cv2.waitKey(0)\n",
" # cv2.destroyAllWindows()\n",
" \n",
" return final"
" # return final\n",
" return croppedanddeskewed"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 686,
"metadata": {},
"outputs": [],
"source": [
@ -536,17 +872,9 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 687,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.0\n"
]
}
],
"outputs": [],
"source": [
"# prepped, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
"outs = houghlineprocessing(img)\n",
@ -560,7 +888,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 688,
"metadata": {},
"outputs": [],
"source": [
@ -574,7 +902,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 689,
"metadata": {},
"outputs": [],
"source": [
@ -586,7 +914,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 690,
"metadata": {},
"outputs": [],
"source": [
@ -595,7 +923,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 691,
"metadata": {},
"outputs": [],
"source": [
@ -605,7 +933,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 692,
"metadata": {},
"outputs": [],
"source": [
@ -635,30 +963,14 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 693,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9740282517223996\n",
"-2.0053522829578814\n",
"-0.9740282517223996\n",
"0.0\n",
"0.9740282517223996\n",
"-0.9740282517223996\n",
"-0.011669615052326776\n",
"2.0053522829578814\n",
"0.0\n",
"0.0\n",
"0.0\n",
"-2.979380534680281\n",
"0.0\n",
"0.0\n",
"-2.0053522829578814\n",
"-11.000789666511807\n",
"average time: 0.19967518746852875(s)\n"
"average time: 0.42403123296540357(s)\n"
]
}
],
@ -669,7 +981,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 694,
"metadata": {},
"outputs": [],
"source": [
@ -679,7 +991,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 695,
"metadata": {},
"outputs": [],
"source": [
@ -688,7 +1000,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 696,
"metadata": {},
"outputs": [],
"source": [
@ -699,7 +1011,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},

View File

@ -28,7 +28,12 @@ def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA,
return cv2.resize(image, dim, interpolation=inter)
def squareandthenresize(image, fill=0, width=None, height=None, inter=cv2.INTER_AREA, returnscalerinfo=False):
def squareandthenresize(image, fill=0, width=None, height=None, inter=cv2.INTER_AREA, returnscalerinfo=False, returnmask=False):
if (returnmask):
h,w = image.shape[0], image.shape[1]
mask = np.full((h,w), fill_value=255, dtype=np.uint8)
mask = squarepad(mask, fill=0, returnoffset=False)
mask = ResizeWithAspectRatio(mask, width=width, height=height, inter=inter, retscale=False)
out = squarepad(image, fill=fill, returnoffset=returnscalerinfo)
if (returnscalerinfo):
squaredimage, hp, vp = out
@ -37,9 +42,13 @@ def squareandthenresize(image, fill=0, width=None, height=None, inter=cv2.INTER_
out = ResizeWithAspectRatio(squaredimage, width=width, height=height, inter=inter, retscale=returnscalerinfo)
if (returnscalerinfo):
finalimage, scaler = out
if (returnmask):
return finalimage, scaler, hp, vp, mask
return finalimage, scaler, hp, vp
else:
finalimage = out
if (returnmask):
return finalimage, mask
return finalimage
@ -209,6 +218,8 @@ def WithinXDegrees(lines, margin, baseangle=0):
# outlines = np.array([[]])
outlines = np.empty((0, 4))
# print(outlines.shape)
if lines is None:
return outlines
for line in lines:
# print(type(line))
# print(abs(lineAngle(line[0])))
@ -246,16 +257,32 @@ def lineswithinrange(lines, pt1, pt2, x=True, y=False):
if (x):
minx = min(pt1[0], pt2[0])
maxx = max(pt1[0], pt2[0])
out_lines = [line for line in out_lines if ((min(line[0],line[2]) >= minx) and (max(line[0],line[2]) <= maxx))]
out_lines = [list(line) for line in out_lines if ((min(line[0],line[2]) >= minx) and (max(line[0],line[2]) <= maxx))]
if (y):
miny = min(pt1[1], pt2[1])
maxy = max(pt1[1], pt2[1])
out_lines = [line for line in out_lines if ((min(line[1],line[3]) >= minx) and (max(line[1],line[3]) <= maxx))]
out_lines = [list(line) for line in out_lines if ((min(line[1],line[3]) >= minx) and (max(line[1],line[3]) <= maxx))]
if len(out_lines) == 0:
out_lines = np.empty((0, 4))
return out_lines
def premorphCrop(image):
def premorphCrop(image, mask):
colourmask = cv2.cvtColor(255-mask, cv2.COLOR_GRAY2BGR)
image = cv2.bitwise_or(image, colourmask)
# croppedimage
croprect = croptoblack(255-mask, extraborder=0, returnrect=True)
croppedimage = image[croprect[1]:croprect[1]+croprect[3], croprect[0]:croprect[0]+croprect[2], :]
# return croppedimage, (0,0,0,0)
#IDEA, try cropping the image for triangle threshold. not too sure how triangle threshold actually works
ogshape = croppedimage.shape
miniimage = ResizeWithAspectRatio(croppedimage, width=1000) if (ogshape[1] > ogshape[0]) else ResizeWithAspectRatio(croppedimage, height=1000)
# miniimage = croppedimage
# convert to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
# gray = cv2.cvtColor(miniimage,cv2.COLOR_BGR2GRAY)
gray = cv2.cvtColor(miniimage, cv2.COLOR_BGR2LAB)[:,:,0]
window = gray.shape[1]//8
if window % 2 == 0:
@ -265,43 +292,59 @@ def premorphCrop(image):
# threshold
# thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]
# thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]
# thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_TRIANGLE)[1]
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, window, 2)
# thresh = cv2.bitwise_and(thresh, thresh1)
# thresh = cv2.bitwise_or(thresh, 255-mask)
# return thresh
# return thresh, (0,0,0,0)
# apply morphology
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)
kernel = np.ones((3,3), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel, iterations=2)
# morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((3,3), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel, iterations=1)
# kernel = np.ones((3,3), np.uint8)
# morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# return morph
morph = cv2.resize(morph, (ogshape[1], ogshape[0]))
# morph = cv2.bitwise_and(bigbinary, mask)
# return morph, (0,0,0,0)
# get largest contour
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = contours[0] if len(contours) == 2 else contours[1]
area_thresh = 0
for c in contours:
area = cv2.contourArea(c)
if area > area_thresh:
area_thresh = area
big_contour = c
contours, heirarchy = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
big_contour = max(contours, key=cv2.contourArea)
# temp = cv2.drawContours(croppedimage, [big_contour], -1, (255,0,0), thickness=2)
# return temp, (0,0,0,0)
# get bounding box
x,y,w,h = cv2.boundingRect(big_contour)
x += croprect[0]
y += croprect[1]
# draw filled contour on black background
mask = np.zeros_like(gray)
mask = cv2.merge([mask,mask,mask])
mask = np.zeros((ogshape[0], ogshape[1]), dtype=np.uint8)
# mask = cv2.merge([mask,mask,mask])
# mask = cv2.blur(mask,(121,121))
cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)
cv2.drawContours(mask, [big_contour], -1, 255, cv2.FILLED)
mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
borderType = cv2.BORDER_CONSTANT
mask = cv2.copyMakeBorder(mask, croprect[1], (image.shape[0]-(croprect[1]+croprect[3])), croprect[0], (image.shape[1]-(croprect[0]+croprect[2])), borderType, None, (0,0,0))
# return mask, (0,0,0,0)
# apply mask to input
result1 = image.copy()
@ -343,17 +386,25 @@ def rotateLine(img, line, angle, returnint=True):
return (pt1[0], pt1[1], pt2[0], pt2[1])
def prepimageforhoughline(image, returnrect=True):
prepped, scaler, hp, vp = squareandthenresize(image, fill=255, width=1000, returnscalerinfo=True)
prepped, scaler, hp, vp, mask = squareandthenresize(image, fill=255, width=1000, returnscalerinfo=True, returnmask=True)
ogpreppedshape = prepped.shape
prepped, croprect = premorphCrop(prepped)
# t = time.time()
prepped, croprect = premorphCrop(prepped, mask)
# print("premorphCrop time:", time.time()-t)
# return prepped
if (prepped.shape[1] > prepped.shape[0]):
prepped, preppedscaler = ResizeWithAspectRatio(prepped, width=1000, retscale=True)
else:
prepped, preppedscaler = ResizeWithAspectRatio(prepped, height=1000, retscale=True)
finalcroprect = (int(croprect[0]*scaler - hp), int(croprect[1]*scaler - vp), int(croprect[2]*scaler), int(croprect[3]*scaler))
gray1 = cv2.cvtColor(prepped, cv2.COLOR_BGR2GRAY)
padding=30
gray1 = padWithColour(gray1, hpadding=padding, vpadding=padding, fill=0)
# return gray1
dst1 = cv2.Canny(gray1, 0, 500, None, 3)
# return dst1
kernel = np.ones((5,5), np.uint8)
@ -364,24 +415,32 @@ def prepimageforhoughline(image, returnrect=True):
# return dst1
dst1 = cv2.Canny(dst1, 0, 500, None, 3)
dst1 = dst1[padding:-padding, padding:-padding]
# return dst1
accompaniedimage = image[finalcroprect[1]:finalcroprect[1]+finalcroprect[3], finalcroprect[0]:finalcroprect[0]+finalcroprect[2], :]
if returnrect:
borderType = cv2.BORDER_CONSTANT
preppadding = [croprect[0], croprect[1], ogpreppedshape[1]-(croprect[0]+croprect[2]), ogpreppedshape[0]-(croprect[1]+croprect[3])]
preppadding = [int(s/preppedscaler) for s in preppadding]
# print(preppadding)
paddedprepped = cv2.copyMakeBorder(dst1, preppadding[1], preppadding[3], preppadding[0], preppadding[2], borderType, 0)
squaredimage = squarepad(image, fill=0)
mask = np.full((image.shape[0],image.shape[1]), fill_value=255, dtype=np.uint8)
mask = squarepad(mask, fill=0)
return dst1, accompaniedimage, paddedprepped, squaredimage, finalcroprect
# print(finalcroprect)
return dst1, accompaniedimage, paddedprepped, squaredimage, mask, finalcroprect
else:
return dst1, accompaniedimage
def houghlinedeskewangle(image):
lines = cv2.HoughLines(image, 1, np.pi/180, int(max(image.shape[0], image.shape[1])/6), None, 0, 0)
angles = np.zeros(len(lines))
angles = np.array([0])
if lines is not None:
angles = np.zeros(len(lines))
for i in range(0, len(lines)):
rho = lines[i][0][0]
theta = lines[i][0][1]
@ -429,35 +488,65 @@ def rotatewithexactpadding(img, angle, fill=(0,0,0)):
rotatedimg = rotate(baseimage, angle,fill=fill)
return rotatedimg
def houghlinepcrop(baseimage, preppedimage, scalingmultiplier):
rotatedlines = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 30, None, 90, 30)
def houghlinepcrop(baseimage, preppedimage, scalingmultiplier, returnrect=False):
hminlength = int(preppedimage.shape[1]/30)
hmaxgap = int(preppedimage.shape[1]/35)
vmarginlines = WithinXDegrees(rotatedlines, 7)
hmarginlines = WithinXDegrees(rotatedlines, 7, baseangle=90)
# vrect = lineBoundingRect(vmarginlines,asRect=False, returnint=True)
# hmarginlines = lineswithinrange(hmarginlines, (vrect[0], vrect[1]), (vrect[2],vrect[3]), x=True, y=False)
vminlength = int(preppedimage.shape[0]/25)
vmaxgap = int(preppedimage.shape[0]/35)
# print(dim)
vrotatedlines = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 20, None, vminlength, vmaxgap)
hrotatedlines = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 20, None, hminlength, hmaxgap)
# if rotatedlines is None:
# print("hi")
# return baseimage
vmarginlines = WithinXDegrees(vrotatedlines, 7)
hmarginlines = WithinXDegrees(hrotatedlines, 7, baseangle=90)
if (len(vmarginlines) != 0):
# print("hi")
vrect = lineBoundingRect(vmarginlines,asRect=False, returnint=True)
hmarginlines = lineswithinrange(hmarginlines, (vrect[0], vrect[1]), (vrect[2],vrect[3]), x=True, y=False)
# print(vmarginlines)
# print(hmarginlines)
marginlines = np.append(vmarginlines, hmarginlines, axis=0)
# colourdst = cv2.cvtColor(preppedimage, cv2.COLOR_GRAY2BGR)
# if marginlines is not None:
colourdst = cv2.cvtColor(preppedimage, cv2.COLOR_GRAY2BGR)
# print(len(marginlines))
# up = False
# if len(marginlines) != 0:
# # print("hi")
# up = True
# for l in marginlines:
# cv2.line(colourdst, (int(l[0]), int(l[1])), (int(l[2]), int(l[3])), (0,0,255), 3, cv2.LINE_AA)
# # if up:
# # print("good")
# # else:
# # print("bad")
# # print("bye")
# return colourdst
rect = lineBoundingRect(marginlines,asRect=False, returnint=True)
scaledrect = (int(rect[0]*scalingmultiplier), int(rect[1]*scalingmultiplier), int(rect[2]*scalingmultiplier), int(rect[3]*scalingmultiplier))
croppedbaseimage = baseimage[scaledrect[1]:scaledrect[3], scaledrect[0]:scaledrect[2], :]
if returnrect:
croprect = (scaledrect[0], scaledrect[1], scaledrect[2]-scaledrect[0], scaledrect[3]-scaledrect[1])
return croppedbaseimage, croprect
return croppedbaseimage
def contourcrop(baseimage):
# return baseimage
shrunkencbi, sizemultiplier = ResizeWithAspectRatio(baseimage, width=1000, retscale=True)
gray = cv2.cvtColor(shrunkencbi, cv2.COLOR_BGR2GRAY)
# gray = cv2.cvtColor(shrunkencbi, cv2.COLOR_BGR2GRAY)
gray = cv2.cvtColor(shrunkencbi, cv2.COLOR_BGR2LAB)[:,:,0]
# thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_TRIANGLE)[1]
# window = gray.shape[1]//7
# if window % 2 == 0:
# window += 1
# thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 10)
# return thresh
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
# thresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel, iterations=2)
@ -487,23 +576,46 @@ def contourcrop(baseimage):
finalbaseimage = baseimage[scaledmx[1]:scaledmx[1]+scaledmx[3], scaledmx[0]:scaledmx[0]+scaledmx[2], :]
return finalbaseimage
def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle, croprect):
def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle, mask):
rotatedbaseimage = rotatewithexactpadding(baseimage, rotationangle, fill=(0,0,0))
rotateddst1 = rotatewithexactpadding(preppedimage, rotationangle, fill=(0,0,0))
sizemultiplier = rotatedbaseimage.shape[0]/rotateddst1.shape[0]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
shrunkmask = cv2.morphologyEx(mask, cv2.MORPH_ERODE, kernel)
rotatedmask = rotatewithexactpadding(shrunkmask, rotationangle, fill=0)
# rotatedbaseimage = cv2.inpaint(rotatedbaseimage, rotatedmask, 3, cv2.INPAINT_TELEA)
# return rotatedmask, 5
# return rotatedbaseimage, 5
croppedbaseimage = houghlinepcrop(rotatedbaseimage, rotateddst1, sizemultiplier)
croppedbaseimage, croprect = houghlinepcrop(rotatedbaseimage, rotateddst1, sizemultiplier, returnrect=True)
# return croppedbaseimage, 5
croppedmask = rotatedmask[croprect[1]:croprect[1]+croprect[3], croprect[0]:croprect[0]+croprect[2]]
# return croppedmask, 5
# return croppedbaseimage, 5
# t = time.time()
adjustedimage = cv2.inpaint(croppedbaseimage, 255-croppedmask, 3, cv2.INPAINT_TELEA)
# print("inpaint time:", time.time()-t)
# return adjustedimage, 5
finalbaseimage = contourcrop(croppedbaseimage)
finalbaseimage = contourcrop(adjustedimage)
# finalbaseimage = croppedbaseimage
return finalbaseimage, rotationangle
def houghlinedeskewandcrop(image):
croppedcanny, croppedimage, canny, ogimage, rect = prepimageforhoughline(image, returnrect=True) ## scaling and cropping occurs. need to also return the changes done
croppedcanny, croppedimage, canny, squaredimage, ogimagemask, rect = prepimageforhoughline(image, returnrect=True) ## scaling and cropping occurs. need to also return the changes done
# return prepimageforhoughline(image, returnrect=True), 5
# return canny, ogimage
# print(canny.shape)
# print(croppedogimage.shape)
# return croppedimage, 5
## -----------------finding angle to deskew-----------------
rotationangle = houghlinedeskewangle(croppedcanny)
@ -521,6 +633,8 @@ def houghlinedeskewandcrop(image):
# elif rotationangle < -45:
# rotationangle += 90
# print(rotationangle)
# return croppedimage, 5
# rotatorrect = findcroprectforangle(rect, angle)
@ -528,7 +642,7 @@ def houghlinedeskewandcrop(image):
# -----------------end of finding angle to deskew-----------------
## -----------------deskewing and then cropping-----------------
outimg, angle = houghlinedeskewthencrop(ogimage, canny, rotationangle, rect)
outimg, angle = houghlinedeskewthencrop(squaredimage, canny, rotationangle, ogimagemask)
return outimg, angle
def bruteforceprocessrects(greaterrects, lesserrects):
@ -1154,7 +1268,7 @@ def houghlineprocessing(image):
# postprocessed = cropclarifying(croppedanddeskewed)
postprocessed = croppedanddeskewed
# postprocessed = croppedanddeskewed
# return postprocessed
# postprocessed = mf.croptoblack(postprocessed)
@ -1163,7 +1277,7 @@ def houghlineprocessing(image):
# final = mf.externaldeskew(postprocessed, fill=(255,255,255))
# rotangle = mf.receipttextdeskew(postprocessed, fill=(255,255,255), returnangle=True)
final = postprocessed
# final = postprocessed
# final = mf.croptoblack(final)
@ -1173,6 +1287,7 @@ def houghlineprocessing(image):
# cv2.waitKey(0)
# cv2.destroyAllWindows()
return final
# return final
return croppedanddeskewed
###### DESIRE: CONVERT STUFF RELATED TO THE HOUGHLINE PROCESSING INTO C SINCE IT ONLY REALLY USES OPENCV