First complete implementation of hough line deskewing

Now to work on hough line cropping.

Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
This commit is contained in:
Ethan Wellenreiter 2023-10-09 00:18:12 -04:00
parent a62f628cc1
commit b2f3e89014
10 changed files with 861 additions and 624 deletions

View File

@ -44,7 +44,8 @@
"\n",
"\n",
"import os\n",
"import cv2"
"import cv2\n",
"import myfunctions as mf"
]
},
{
@ -73,20 +74,20 @@
"metadata": {},
"outputs": [],
"source": [
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
" dim = None\n",
" (h, w) = image.shape[:2]\n",
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
"# dim = None\n",
"# (h, w) = image.shape[:2]\n",
"\n",
" if width is None and height is None:\n",
" return image\n",
" if width is None:\n",
" r = height / float(h)\n",
" dim = (int(w * r), height)\n",
" else:\n",
" r = width / float(w)\n",
" dim = (width, int(h * r))\n",
"# if width is None and height is None:\n",
"# return image\n",
"# if width is None:\n",
"# r = height / float(h)\n",
"# dim = (int(w * r), height)\n",
"# else:\n",
"# r = width / float(w)\n",
"# dim = (width, int(h * r))\n",
"\n",
" return cv2.resize(image, dim, interpolation=inter)"
"# return cv2.resize(image, dim, interpolation=inter)"
]
},
{
@ -171,7 +172,7 @@
" activeimage = active_dataset[index]['image']\n",
" # img = cv2.imread(active_dataset[index]['image'], 0)\n",
" open_cv_image = np.array(t.ToPILImage()(activeimage))\n",
" open_cv_image = ResizeWithAspectRatio(open_cv_image, 1000)\n",
" open_cv_image = mf.ResizeWithAspectRatio(open_cv_image, 1000)\n",
" # cv2.namedWindow(\"image\", cv2.WINDOW_NORMAL)\n",
" # cv2.setWindowProperty(\"image\", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)\n",
" cv2.imshow(\"image\", open_cv_image)\n",

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 335,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@ -13,115 +13,125 @@
},
{
"cell_type": "code",
"execution_count": 336,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#initially for deskewing and cropping. moving to a doc for just cropping now that deskewing"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"import math\n",
"import myfunctions as mf\n",
"\n",
"import scipy.stats as st"
]
},
{
"cell_type": "code",
"execution_count": 337,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA, retscale=False):\n",
" dim = None\n",
" (h, w) = image.shape[:2]\n",
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA, retscale=False):\n",
"# dim = None\n",
"# (h, w) = image.shape[:2]\n",
"\n",
" if width is None and height is None:\n",
" if (retscale == True):\n",
" return (image, 1)\n",
" return image\n",
" if width is None:\n",
" r = height / float(h)\n",
" dim = (int(w * r), height)\n",
" else:\n",
" r = width / float(w)\n",
" dim = (width, int(h * r))\n",
"# if width is None and height is None:\n",
"# if (retscale == True):\n",
"# return (image, 1)\n",
"# return image\n",
"# if width is None:\n",
"# r = height / float(h)\n",
"# dim = (int(w * r), height)\n",
"# else:\n",
"# r = width / float(w)\n",
"# dim = (width, int(h * r))\n",
"\n",
" if (retscale == True):\n",
" # print(\"hi\")\n",
" return (cv2.resize(image, dim, interpolation=inter), 1/r)\n",
" return cv2.resize(image, dim, interpolation=inter)\n",
"# if (retscale == True):\n",
"# # print(\"hi\")\n",
"# return (cv2.resize(image, dim, interpolation=inter), 1/r)\n",
"# return cv2.resize(image, dim, interpolation=inter)\n",
"\n",
"\n",
"class SquarePad:\n",
" def __init__(self, fill):\n",
" self.fill = fill\n",
"# class SquarePad:\n",
"# def __init__(self, fill):\n",
"# self.fill = fill\n",
" \n",
" def __call__(self, image):\n",
" w, h = image.shape[1], image.shape[0]\n",
" max_wh = np.max([w, h])\n",
" hp = int((max_wh - w) / 2)\n",
" vp = int((max_wh - h) / 2)\n",
" padding = (hp, vp, hp, vp)\n",
" return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, self.fill)\n",
"# def __call__(self, image):\n",
"# w, h = image.shape[1], image.shape[0]\n",
"# max_wh = np.max([w, h])\n",
"# hp = int((max_wh - w) / 2)\n",
"# vp = int((max_wh - h) / 2)\n",
"# padding = (hp, vp, hp, vp)\n",
"# return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, self.fill)\n",
" \n",
" \n",
" \n",
"def rotate(img, angle):\n",
" rows,cols = img.shape[0], img.shape[1]\n",
" M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
" dst = cv2.warpAffine(img,M,(cols,rows))\n",
" return dst"
"# def rotate(img, angle):\n",
"# rows,cols = img.shape[0], img.shape[1]\n",
"# M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
"# dst = cv2.warpAffine(img,M,(cols,rows))\n",
"# return dst"
]
},
{
"cell_type": "code",
"execution_count": 338,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"def morphologyCrop(image):\n",
" # convert to grayscale\n",
" gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
"# def morphologyCrop(image):\n",
"# # convert to grayscale\n",
"# gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
"\n",
" # threshold\n",
" thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\n",
"# # threshold\n",
"# thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\n",
"\n",
" # apply morphology\n",
" kernel = np.ones((7,7), np.uint8)\n",
" morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
" kernel = np.ones((9,9), np.uint8)\n",
" morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)\n",
"# # apply morphology\n",
"# kernel = np.ones((7,7), np.uint8)\n",
"# morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
"# kernel = np.ones((9,9), np.uint8)\n",
"# morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)\n",
"\n",
" # get largest contour\n",
" contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
" contours = contours[0] if len(contours) == 2 else contours[1]\n",
" area_thresh = 0\n",
" for c in contours:\n",
" area = cv2.contourArea(c)\n",
" if area > area_thresh:\n",
" area_thresh = area\n",
" big_contour = c\n",
"# # get largest contour\n",
"# contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
"# contours = contours[0] if len(contours) == 2 else contours[1]\n",
"# area_thresh = 0\n",
"# for c in contours:\n",
"# area = cv2.contourArea(c)\n",
"# if area > area_thresh:\n",
"# area_thresh = area\n",
"# big_contour = c\n",
"\n",
"\n",
" # get bounding box\n",
" x,y,w,h = cv2.boundingRect(big_contour)\n",
"# # get bounding box\n",
"# x,y,w,h = cv2.boundingRect(big_contour)\n",
"\n",
" # draw filled contour on black background\n",
" mask = np.zeros_like(gray)\n",
" mask = cv2.merge([mask,mask,mask])\n",
" cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)\n",
"# # draw filled contour on black background\n",
"# mask = np.zeros_like(gray)\n",
"# mask = cv2.merge([mask,mask,mask])\n",
"# cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)\n",
"\n",
" # apply mask to input\n",
" result1 = image.copy()\n",
" result1 = cv2.bitwise_and(result1, mask)\n",
"# # apply mask to input\n",
"# result1 = image.copy()\n",
"# result1 = cv2.bitwise_and(result1, mask)\n",
"\n",
" # crop result\n",
" result2 = result1[y:y+h, x:x+w]\n",
" return result2"
"# # crop result\n",
"# result2 = result1[y:y+h, x:x+w]\n",
"# return result2"
]
},
{
"cell_type": "code",
"execution_count": 339,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@ -134,71 +144,82 @@
},
{
"cell_type": "code",
"execution_count": 340,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def lineAngle(line):\n",
" # print(line)\n",
" angle = (math.atan2(line[3] - line[1], line[2] - line[0]) % np.pi) - (np.pi/2)\n",
" return angle\n",
"# def lineAngle(line):\n",
"# # print(line)\n",
"# angle = (math.atan2(line[3] - line[1], line[2] - line[0]) % np.pi) - (np.pi/2)\n",
"# return angle\n",
" \n",
"def WithinXDegrees(lines, margin):\n",
" # outlines = np.array([[]])\n",
" outlines = np.empty((0, 4))\n",
" # print(outlines.shape)\n",
" for line in lines:\n",
" # print(type(line))\n",
" # print(abs(lineAngle(line[0])))\n",
" if (np.rad2deg(abs(lineAngle(line[0]))) <= margin):\n",
" outlines = np.append(outlines, [line[0]], axis=0)\n",
" return outlines\n",
"# def WithinXDegrees(lines, margin):\n",
"# # outlines = np.array([[]])\n",
"# outlines = np.empty((0, 4))\n",
"# # print(outlines.shape)\n",
"# for line in lines:\n",
"# # print(type(line))\n",
"# # print(abs(lineAngle(line[0])))\n",
"# if (np.rad2deg(abs(lineAngle(line[0]))) <= margin):\n",
"# outlines = np.append(outlines, [line[0]], axis=0)\n",
"# return outlines\n",
"\n",
"def lineBoundingRect(lines):\n",
" maxvals = lines.max(0)\n",
" minvals = lines.min(0)\n",
" boundingrect = (min(minvals[0],minvals[2]), min(minvals[1],minvals[3]), max(maxvals[0],maxvals[2]),max(maxvals[1],maxvals[3]))\n",
" return boundingrect\n",
" # print(lines.max(0))\n",
" # print(type(lines))"
"# def lineBoundingRect(lines):\n",
"# maxvals = lines.max(0)\n",
"# minvals = lines.min(0)\n",
"# boundingrect = (min(minvals[0],minvals[2]), min(minvals[1],minvals[3]), max(maxvals[0],maxvals[2]),max(maxvals[1],maxvals[3]))\n",
"# return boundingrect\n",
"# # print(lines.max(0))\n",
"# # print(type(lines))"
]
},
{
"cell_type": "code",
"execution_count": 341,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"img = cv2.imread('./test_images/IMG_7605.jpg')\n",
"img = SquarePad(fill=255)(img)\n",
"img = rotate(img, 54)\n",
"img = morphologyCrop(img)"
"img = mf.SquarePad(fill=255)(img)\n",
"img = mf.rotate(img, 54)\n",
"img = mf.morphologyCrop(mf.ResizeWithAspectRatio(img,1000))"
]
},
{
"cell_type": "code",
"execution_count": 342,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", ResizeWithAspectRatio(SquarePad(fill=255)(img), 500))\n",
"# img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"# img = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)[1]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", mf.ResizeWithAspectRatio(mf.SquarePad(fill=255)(img), 1000))\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 343,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"resizedimg = ResizeWithAspectRatio(SquarePad(fill=255)(img), 500)\n",
"resizedimg = mf.ResizeWithAspectRatio(mf.SquarePad(fill=255)(img), 500)\n",
"\n",
"# cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", img)\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()\n",
"\n",
"gray = cv2.cvtColor(resizedimg ,cv2.COLOR_BGR2GRAY)\n",
"gray = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]\n",
"cdst = resizedimg.copy()\n",
"\n",
"\n",
@ -208,7 +229,7 @@
},
{
"cell_type": "code",
"execution_count": 344,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
@ -230,21 +251,21 @@
" # print(math.atan2(unroundedpt2[1] - unroundedpt1[1], unroundedpt2[0] - unroundedpt1[0]) % np.pi)\n",
" # print(lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1])))\n",
" # angles[i] = math.atan2(unroundedpt2[1] - unroundedpt1[1], unroundedpt2[0] - unroundedpt1[0]) % np.pi\n",
" angles[i] = lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1]))\n",
" angles[i] = mf.lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1]))\n",
" cv2.line(cdst, pt1, pt2, (0,0,255), 3, cv2.LINE_AA)"
]
},
{
"cell_type": "code",
"execution_count": 345,
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-45.26366581533504\n",
"-45.26366581533504\n"
"-56.7228217179515\n",
"-56.7228217179515\n"
]
}
],
@ -270,18 +291,18 @@
},
{
"cell_type": "code",
"execution_count": 346,
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", cdst)\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
"cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", cdst)\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 347,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
@ -292,16 +313,16 @@
},
{
"cell_type": "code",
"execution_count": 348,
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"rotatedimg = SquarePad(fill=255)(rotate(img, rotationangle))\n"
"rotatedimg = mf.SquarePad(fill=255)(mf.rotate(img, rotationangle))\n"
]
},
{
"cell_type": "code",
"execution_count": 349,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
@ -312,11 +333,11 @@
},
{
"cell_type": "code",
"execution_count": 350,
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"resizedrotatedimg = ResizeWithAspectRatio(rotatedimg, 500)\n",
"resizedrotatedimg = mf.ResizeWithAspectRatio(rotatedimg, 500)\n",
"gray1 = cv2.cvtColor(resizedrotatedimg, cv2.COLOR_BGR2GRAY)\n",
"dst1 = cv2.Canny(gray1, 0, 500, None, 3)\n",
"cdstP = resizedrotatedimg.copy()\n",
@ -326,7 +347,7 @@
},
{
"cell_type": "code",
"execution_count": 351,
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
@ -338,45 +359,37 @@
},
{
"cell_type": "code",
"execution_count": 352,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", cdstP)\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
"cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", cdstP)\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 353,
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(274.0, 75.0, 355.0, 458.0)\n"
]
}
],
"outputs": [],
"source": [
"# print(linesP)\n",
"marginlines = WithinXDegrees(linesP, 2)\n",
"marginlines = mf.WithinXDegrees(linesP, 2)\n",
"# print(marginlines)\n",
"# if marginlines is not None:\n",
"# for i in range(0, len(marginlines)):\n",
"# l = marginlines[i]\n",
"# cv2.line(cdstPmargin, (int(l[0]), int(l[1])), (int(l[2]), int(l[3])), (0,0,255), 3, cv2.LINE_AA)\n",
"if marginlines is not None:\n",
" for i in range(0, len(marginlines)):\n",
" l = marginlines[i]\n",
" cv2.line(cdstPmargin, (int(l[0]), int(l[1])), (int(l[2]), int(l[3])), (0,0,255), 3, cv2.LINE_AA)\n",
" \n",
"boundingrectout = lineBoundingRect(marginlines)\n",
"print(boundingrectout)\n",
"cdstPmargin = cv2.rectangle(cdstPmargin,(int(boundingrectout[0]),int(boundingrectout[1])),(int(boundingrectout[2]),int(boundingrectout[3])),(0,255,0),2)"
"# boundingrectout = mf.lineBoundingRect(marginlines)\n",
"# # print(boundingrectout)\n",
"# cdstPmargin = cv2.rectangle(cdstPmargin,(int(boundingrectout[0]),int(boundingrectout[1])),(int(boundingrectout[2]),int(boundingrectout[3])),(0,255,0),2)"
]
},
{
"cell_type": "code",
"execution_count": 354,
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [

View File

@ -197,20 +197,20 @@
"metadata": {},
"outputs": [],
"source": [
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
" dim = None\n",
" (h, w) = image.shape[:2]\n",
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
"# dim = None\n",
"# (h, w) = image.shape[:2]\n",
"\n",
" if width is None and height is None:\n",
" return image\n",
" if width is None:\n",
" r = height / float(h)\n",
" dim = (int(w * r), height)\n",
" else:\n",
" r = width / float(w)\n",
" dim = (width, int(h * r))\n",
"# if width is None and height is None:\n",
"# return image\n",
"# if width is None:\n",
"# r = height / float(h)\n",
"# dim = (int(w * r), height)\n",
"# else:\n",
"# r = width / float(w)\n",
"# dim = (width, int(h * r))\n",
"\n",
" return cv2.resize(image, dim, interpolation=inter)"
"# return cv2.resize(image, dim, interpolation=inter)"
]
},
{
@ -263,7 +263,7 @@
"rotation = model(adjustedtensorizedimage).item()\n",
"print(rotation)\n",
"rotatedimage = t.Resize(size=1000)(tvf.rotate(adjustedtensorizedimage, rotation))\n",
"# imS = ResizeWithAspectRatio(filereadimage, 1000)\n",
"# imS = mf.ResizeWithAspectRatio(filereadimage, 1000)\n",
"# imS = cv2.resize(filereadimage, (960, 540)) \n",
"open_cv_image = np.array(t.ToPILImage()(rotatedimage))\n",
"cv2.imshow(f'image', open_cv_image)\n",

View File

@ -2,20 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
"/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
]
}
],
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
@ -23,183 +12,19 @@
"import torch\n",
"import torchvision.transforms.functional as tvf\n",
"import torchvision.transforms.v2 as v2\n",
"import torchvision.transforms as t"
"import torchvision.transforms as t\n",
"import myfunctions as mf\n",
"\n",
"from skimage import io\n",
"from matplotlib import pyplot as plt\n",
"import time\n",
"\n",
"import myfunctions as mf"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA, retscale=False):\n",
" dim = None\n",
" (h, w) = image.shape[:2]\n",
"\n",
" if width is None and height is None:\n",
" if (retscale == True):\n",
" return (image, 1)\n",
" return image\n",
" if width is None:\n",
" r = height / float(h)\n",
" dim = (int(w * r), height)\n",
" else:\n",
" r = width / float(w)\n",
" dim = (width, int(h * r))\n",
"\n",
" if (retscale == True):\n",
" # print(\"hi\")\n",
" return (cv2.resize(image, dim, interpolation=inter), 1/r)\n",
" return cv2.resize(image, dim, interpolation=inter)\n",
"\n",
"# modify this or add a function/parameter to also return r, the scale factor"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"class SquarePad:\n",
" def __init__(self, fill):\n",
" self.fill = fill\n",
" \n",
" def __call__(self, image):\n",
" w, h = image.shape\n",
" max_wh = np.max([w, h])\n",
" hp = int((max_wh - w) / 2)\n",
" vp = int((max_wh - h) / 2)\n",
" padding = (hp, vp, hp, vp)\n",
" return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, self.fill)\n",
" \n",
"tensorize = v2.Compose([v2.ToImageTensor(), v2.ConvertImageDtype()])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# rotation checker tools\n",
"def rotate(img, angle):\n",
" rows,cols = img.shape[0], img.shape[1]\n",
" M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
" dst = cv2.warpAffine(img,M,(cols,rows))\n",
" return dst\n",
"\n",
"def sum_rows(img):\n",
" # Create a list to store the row sums\n",
" row_sums = []\n",
" # Iterate through the rows\n",
" for r in range(img.shape[0]-1):\n",
" # Sum the row\n",
" row_sum = sum(sum(img[r:r+1,:]))\n",
" # Add the sum to the list\n",
" row_sums.append(row_sum)\n",
" # Normalize range to (0,255)\n",
" row_sums = (row_sums/max(row_sums)) * 255\n",
" # Return\n",
" return row_sums\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# selective search rectangle tools\n",
"\n",
"import heapq as hq\n",
"\n",
"class MaxHeapObj(object):\n",
" def __init__(self, val): self.val = val\n",
" def __lt__(self, other): return self.val > other.val\n",
" def __eq__(self, other): return self.val == other.val\n",
" def __str__(self): return str(self.val)\n",
" \n",
"class MinHeap(object):\n",
" def __init__(self): self.h = []\n",
" def heappush(self, x): heapq.heappush(self.h, x)\n",
" def heappop(self): return heapq.heappop(self.h)\n",
" def __getitem__(self, i): return self.h[i]\n",
" def __len__(self): return len(self.h)\n",
" \n",
"class MaxHeap(MinHeap):\n",
" def heappush(self, x): heapq.heappush(self.h, MaxHeapObj(x))\n",
" def heappop(self): return heapq.heappop(self.h).val\n",
" def __getitem__(self, i): return self.h[i].val\n",
"\n",
"\n",
"\n",
"def rectArea(rect):\n",
" # print(rect)\n",
" return rect[2]*rect[3]\n",
"\n",
"def biggestRects(n, rects):\n",
" dict = {}\n",
" # outrects = np.zeros(shape=(n, 4))\n",
" for rect in rects:\n",
" dict[tuple(rect)] = rectArea(rect)\n",
" # maxh.heappush(rectArea(rect))\n",
" # print(maxh[0])\n",
" \n",
" \n",
" heap = [(-value, key) for key,value in dict.items()]\n",
" largest = hq.nsmallest(n, heap)\n",
" \n",
"\n",
" # hq.heapify(list(dict.items()))\n",
" # for i in range(0,n):\n",
" # outrects[i] = maxh.heappop()\n",
" # print(outrects)\n",
" return [key for value, key in largest]\n",
"\n",
"def overlapRect(rects):\n",
" leftwall = -1\n",
" rightwall = -1\n",
" topwall = -1\n",
" bottomwall = -1\n",
" for (x, y, w, h) in rects:\n",
" if (leftwall == -1):\n",
" leftwall = x\n",
" rightwall = x + w\n",
" topwall = y\n",
" bottomwall = y + h\n",
" continue\n",
" leftwall = max(leftwall, x)\n",
" rightwall = min(rightwall, x+w)\n",
" topwall = max(topwall, y)\n",
" bottomwall = min(bottomwall, y+h)\n",
" \n",
" if (topwall >= bottomwall or leftwall >= rightwall):\n",
" return (-1, -1, -1, -1)\n",
" return (leftwall, topwall, rightwall-leftwall, bottomwall-topwall)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# canny edge crop tools\n",
"\n",
"def clip(n, lower, upper):\n",
" return max(lower, min(n, upper))\n",
"\n",
"def colourscaler(n, min, max):\n",
" temp = n-min\n",
" diff = abs(max - min)\n",
" return clip((temp/diff)*255, 0, 255)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@ -209,195 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def morphologyCrop(image):\n",
" # convert to grayscale\n",
" gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
"\n",
" # threshold\n",
" thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\n",
"\n",
" # apply morphology\n",
" kernel = np.ones((7,7), np.uint8)\n",
" morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
" kernel = np.ones((9,9), np.uint8)\n",
" morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)\n",
"\n",
" # get largest contour\n",
" contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
" contours = contours[0] if len(contours) == 2 else contours[1]\n",
" area_thresh = 0\n",
" for c in contours:\n",
" area = cv2.contourArea(c)\n",
" if area > area_thresh:\n",
" area_thresh = area\n",
" big_contour = c\n",
"\n",
"\n",
" # get bounding box\n",
" x,y,w,h = cv2.boundingRect(big_contour)\n",
"\n",
" # draw filled contour on black background\n",
" mask = np.zeros_like(gray)\n",
" mask = cv2.merge([mask,mask,mask])\n",
" cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)\n",
"\n",
" # apply mask to input\n",
" result1 = image.copy()\n",
" result1 = cv2.bitwise_and(result1, mask)\n",
"\n",
" # crop result\n",
" result2 = result1[y:y+h, x:x+w]\n",
" return result2"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def rotator(image):\n",
" src = 255 - cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
" scores = []\n",
"\n",
"\n",
" # # square the image\n",
" # h,w = src.shape\n",
" # small_dimention = min(h,w)\n",
" # src = src[:small_dimention, :small_dimention]\n",
" src = SquarePad(fill=255)(src)\n",
"\n",
"\n",
" src = cv2.threshold(src, 70, 255, cv2.THRESH_BINARY)[1]\n",
" src = ResizeWithAspectRatio(src, height=250)\n",
" \n",
" angle = 0\n",
" finalangle = 0\n",
" while angle <= 360:\n",
" # Rotate the source image\n",
" img = rotate(src, angle) \n",
" # Crop the center 1/3rd of the image (roi is filled with text)\n",
" h,w = img.shape\n",
" buffer = min(h, w) - int(min(h,w)/1.5)\n",
" roi = img[int(h/2-buffer):int(h/2+buffer), int(w/2-buffer):int(w/2+buffer)]\n",
" # # Create background to draw transform on\n",
" # bg = np.zeros((buffer*2, buffer*2), np.uint8)\n",
" # Compute the sums of the rows\n",
" row_sums = sum_rows(roi)\n",
" # High score --> Zebra stripes\n",
" score = np.count_nonzero(row_sums)\n",
" scores.append(score)\n",
" # othercount = othercount + 1\n",
" # Image has best rotation\n",
" if score <= min(scores):\n",
" # count = count + 1\n",
" # Save the rotatied image\n",
" # print('found optimal rotation')\n",
" # best_rotation = img.copy()\n",
" finalangle = angle\n",
" # goodangle = angle\n",
" # k = display_data(roi, row_sums, buffer)\n",
" # if k == 27: break\n",
" # Increment angle and try again\n",
" angle += .75\n",
" # cv2.destroyAllWindows()\n",
" return rotate(image, finalangle) "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def selectiveSearchSegmentationImp(image):\n",
" ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()\n",
" ss.setBaseImage(image)\n",
" ss.switchToSelectiveSearchFast()\n",
" return ss.process()\n",
"\n",
"def selectiveSearchCrop(image):\n",
" img, scale = ResizeWithAspectRatio(image,300, retscale=True)\n",
" rects = selectiveSearchSegmentationImp(cv2.GaussianBlur(img, (15,15),0))\n",
" bigRects = biggestRects(20, rects)\n",
" overlaprectangle = overlapRect(bigRects)\n",
" if (overlaprectangle[0] == -1):\n",
" print(\"hi\")\n",
" return image\n",
" # print(image.shape)\n",
" finalrect = (int(overlaprectangle[0]*scale), int(overlaprectangle[1]*scale), int(overlaprectangle[2]*scale), int(overlaprectangle[3]*scale))\n",
" print(finalrect)\n",
" return image[finalrect[0]: finalrect[0]+finalrect[2], finalrect[1]: finalrect[1]+finalrect[3], :]\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def cannyEdgeCrop(image, lower = 100, upper = 255, threshold1 = 50, threshold2 = 350):\n",
" lower = max(0,lower)\n",
" upper = min(255, upper)\n",
" gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
"\n",
" scaled_gray = np.zeros(gray.shape, gray.dtype)\n",
" \n",
" # for y in range(0,gray.shape[0]):\n",
" # for x in range(0,gray.shape[1]):\n",
" # scaled_gray[y][x] = colourscaler(gray[y][x], lower, upper)\n",
" scaled_gray = gray\n",
" \n",
" blurred = cv2.GaussianBlur(scaled_gray, (15,15),0)\n",
" edged = cv2.Canny(blurred, threshold1, threshold2)\n",
" return edged"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from skimage import io\n",
"# import skimage.transform as st\n",
"# from skimage.transform import rotate\n",
"# from skimage.color import rgb2gray\n",
"from deskew import determine_skew\n",
"from matplotlib import pyplot as plt\n",
"\n",
"\n",
"\n",
"def deskew(image):\n",
" # image = io.imread(_img)\n",
" # print(type(image))\n",
" grayscale = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
" grayscale = SquarePad(fill=255)(grayscale)\n",
" grayscale = ResizeWithAspectRatio(grayscale, height=300)\n",
" # print(type(grayscale))\n",
" angle = determine_skew(grayscale)\n",
" # print(angle)\n",
" rotated = rotate(image, angle)\n",
" return rotated"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import time"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 19,
"metadata": {},
"outputs": [
{
@ -406,13 +43,13 @@
"True"
]
},
"execution_count": 15,
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cropped = morphologyCrop(img)\n",
"cropped = mf.morphologyCrop(img)\n",
"# rotated = deskew(cropped)\n",
"# cropped2 = morphologyCrop(rotated)\n",
"# cropped2 = selectiveSearchCrop(rotated)\n",
@ -423,7 +60,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [

View File

@ -0,0 +1,374 @@
import cv2
import numpy as np
import math
from deskew import determine_skew
import heapq as hq
import torchvision.transforms.v2 as v2
import scipy.stats as st
## ------------------------------helper functions------------------------------
def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA, retscale=False):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
if (retscale == True):
return (image, 1)
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
if (retscale == True):
# print("hi")
return (cv2.resize(image, dim, interpolation=inter), 1/r)
return cv2.resize(image, dim, interpolation=inter)
class SquarePad:
def __init__(self, fill):
self.fill = fill
def __call__(self, image):
w, h = image.shape[1], image.shape[0]
max_wh = np.max([w, h])
hp = int((max_wh - w) / 2)
vp = int((max_wh - h) / 2)
padding = (hp, vp, hp, vp)
return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, self.fill)
def rotate(img, angle):
rows,cols = img.shape[0], img.shape[1]
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
dst = cv2.warpAffine(img,M,(cols,rows))
return dst
def clip(n, lower, upper):
return max(lower, min(n, upper))
def colourscaler(n, min, max):
temp = n-min
diff = abs(max - min)
return clip((temp/diff)*255, 0, 255)
tensorize = v2.Compose([v2.ToImageTensor(), v2.ConvertImageDtype()]) ## for converting an image (usually PIL image) to a pytorch tensor
## ------------------------------for selective segmentation search crop------------------------------
def rectArea(rect):
# print(rect)
return rect[2]*rect[3]
def biggestRects(n, rects):
dict = {}
# outrects = np.zeros(shape=(n, 4))
for rect in rects:
dict[tuple(rect)] = rectArea(rect)
# maxh.heappush(rectArea(rect))
# print(maxh[0])
heap = [(-value, key) for key,value in dict.items()]
largest = hq.nsmallest(n, heap)
# hq.heapify(list(dict.items()))
# for i in range(0,n):
# outrects[i] = maxh.heappop()
# print(outrects)
return [key for value, key in largest]
def overlapRect(rects):
leftwall = -1
rightwall = -1
topwall = -1
bottomwall = -1
for (x, y, w, h) in rects:
if (leftwall == -1):
leftwall = x
rightwall = x + w
topwall = y
bottomwall = y + h
continue
leftwall = max(leftwall, x)
rightwall = min(rightwall, x+w)
topwall = max(topwall, y)
bottomwall = min(bottomwall, y+h)
if (topwall >= bottomwall or leftwall >= rightwall):
return (-1, -1, -1, -1)
return (leftwall, topwall, rightwall-leftwall, bottomwall-topwall)
def selectiveSearchSegmentationImp(image):
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(image)
ss.switchToSelectiveSearchFast()
return ss.process()
## ------------------------------specific to houghline crop------------------------------
def lineAngle(line):
# print(line)
angle = (math.atan2(line[3] - line[1], line[2] - line[0]) % np.pi) - (np.pi/2)
return angle
def WithinXDegrees(lines, margin):
# outlines = np.array([[]])
outlines = np.empty((0, 4))
# print(outlines.shape)
for line in lines:
# print(type(line))
# print(abs(lineAngle(line[0])))
if (np.rad2deg(abs(lineAngle(line[0]))) <= margin):
outlines = np.append(outlines, [line[0]], axis=0)
return outlines
def lineBoundingRect(lines):
maxvals = lines.max(0)
minvals = lines.min(0)
boundingrect = (min(minvals[0],minvals[2]), min(minvals[1],minvals[3]), max(maxvals[0],maxvals[2]),max(maxvals[1],maxvals[3]))
return boundingrect
# print(lines.max(0))
# print(type(lines))
def premorphCrop(image):
return morphologyCrop(image, special=True)
## ------------------------------specific to row summation deskewing------------------------------
def sum_rows(img):
# Create a list to store the row sums
row_sums = []
# Iterate through the rows
for r in range(img.shape[0]-1):
# Sum the row
row_sum = sum(sum(img[r:r+1,:]))
# Add the sum to the list
row_sums.append(row_sum)
# Normalize range to (0,255)
row_sums = (row_sums/max(row_sums)) * 255
# Return
return row_sums
## ------------------------------active functions------------------------------
## ------------------------------cropping------------------------------
def morphologyCrop(image, special=False):
# convert to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
# threshold
thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\
if (special):
# apply morphology
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)
# morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((2,2), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)
else:
# apply morphology
kernel = np.ones((7,7), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((9,9), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)
# get largest contour
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = contours[0] if len(contours) == 2 else contours[1]
area_thresh = 0
for c in contours:
area = cv2.contourArea(c)
if area > area_thresh:
area_thresh = area
big_contour = c
# get bounding box
x,y,w,h = cv2.boundingRect(big_contour)
# draw filled contour on black background
mask = np.zeros_like(gray)
mask = cv2.merge([mask,mask,mask])
# mask = cv2.blur(mask,(121,121))
cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)
# apply mask to input
result1 = image.copy()
if (special):
mask = cv2.blur(mask,(3,3))
result1 = cv2.bitwise_and(result1, mask)
# crop result
result2 = result1[y:y+h, x:x+w]
return result2
##### ------------------------------TEST CODE FOR SELECTIVESEARCHCROP------------------------------
# ## Test this code for the masking/colour squishing. it essentially can just speed up clipping the edges.
# #!/usr/local/bin/python3
# import cv2 as cv
# import numpy as np
# # Load the aerial image and convert to HSV colourspace
# image = cv.imread("aerial.png")
# hsv=cv.cvtColor(image,cv.COLOR_BGR2HSV)
# # Define lower and uppper limits of what we call "brown"
# brown_lo=np.array([10,0,0])
# brown_hi=np.array([20,255,255])
# # Mask image to only select browns
# mask=cv.inRange(hsv,brown_lo,brown_hi)
# # Change image to red where we found brown
# image[mask>0]=(0,0,255)
# cv.imwrite("result.png",image)
#CAN ALSO TRY USING NUMPY VECTORIZATION
#------------------------------------------------------------------------------------------
def selectiveSearchCrop(image):
img, scale = ResizeWithAspectRatio(image,300, retscale=True)
rects = selectiveSearchSegmentationImp(cv2.GaussianBlur(img, (15,15),0))
bigRects = biggestRects(20, rects)
overlaprectangle = overlapRect(bigRects)
if (overlaprectangle[0] == -1):
print("hi")
return image
# print(image.shape)
finalrect = (int(overlaprectangle[0]*scale), int(overlaprectangle[1]*scale), int(overlaprectangle[2]*scale), int(overlaprectangle[3]*scale))
print(finalrect)
return image[finalrect[0]: finalrect[0]+finalrect[2], finalrect[1]: finalrect[1]+finalrect[3], :]
def cannyEdgeCrop(image, lower = 100, upper = 255, threshold1 = 50, threshold2 = 350):
lower = max(0,lower)
upper = min(255, upper)
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
scaled_gray = np.zeros(gray.shape, gray.dtype)
# for y in range(0,gray.shape[0]):
# for x in range(0,gray.shape[1]):
# scaled_gray[y][x] = colourscaler(gray[y][x], lower, upper)
scaled_gray = gray
blurred = cv2.GaussianBlur(scaled_gray, (15,15),0)
edged = cv2.Canny(blurred, threshold1, threshold2)
return edged
## ------------------------------deskewing------------------------------
def rowsumdeskew(image):
src = 255 - cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
scores = []
# # square the image
# h,w = src.shape
# small_dimention = min(h,w)
# src = src[:small_dimention, :small_dimention]
src = SquarePad(fill=255)(src)
src = cv2.threshold(src, 70, 255, cv2.THRESH_BINARY)[1]
src = ResizeWithAspectRatio(src, height=250)
angle = 0
finalangle = 0
while angle <= 360:
# Rotate the source image
img = rotate(src, angle)
# Crop the center 1/3rd of the image (roi is filled with text)
h,w = img.shape
buffer = min(h, w) - int(min(h,w)/1.5)
roi = img[int(h/2-buffer):int(h/2+buffer), int(w/2-buffer):int(w/2+buffer)]
# # Create background to draw transform on
# bg = np.zeros((buffer*2, buffer*2), np.uint8)
# Compute the sums of the rows
row_sums = sum_rows(roi)
# High score --> Zebra stripes
score = np.count_nonzero(row_sums)
scores.append(score)
# othercount = othercount + 1
# Image has best rotation
if score <= min(scores):
# count = count + 1
# Save the rotatied image
# print('found optimal rotation')
# best_rotation = img.copy()
finalangle = angle
# goodangle = angle
# k = display_data(roi, row_sums, buffer)
# if k == 27: break
# Increment angle and try again
angle += .75
# cv2.destroyAllWindows()
return rotate(image, finalangle)
def externaldeskew(image):
# image = io.imread(_img)
# print(type(image))
grayscale = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
grayscale = SquarePad(fill=255)(grayscale)
grayscale = ResizeWithAspectRatio(grayscale, height=300)
# print(type(grayscale))
angle = determine_skew(grayscale)
# print(angle)
rotated = rotate(image, angle)
return rotated
def houghlinedeskew(image):
img = SquarePad(fill=255)(image)
img = premorphCrop(img)
gray = cv2.cvtColor(img ,cv2.COLOR_BGR2GRAY)
gray = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
cdst = image.copy()
dst = cv2.Canny(gray, 50, 200, None, 3)
lines = cv2.HoughLines(dst, 1, np.pi/180, 150, None, 0, 0)
angles = np.zeros(len(lines))
if lines is not None:
for i in range(0, len(lines)):
rho = lines[i][0][0]
theta = lines[i][0][1]
a = math.cos(theta)
b = math.sin(theta)
x0 = a * rho
y0 = b * rho
unroundedpt1 = (x0 + 1000*(-b), y0 + 1000*(a))
unroundedpt2 = (x0 - 1000*(-b), y0 - 1000*(a))
pt1 = (int(unroundedpt1[0]), int(unroundedpt1[1]))
pt2 = (int(unroundedpt2[0]), int(unroundedpt2[1]))
v1_theta = math.atan2(pt1[1], pt1[0])
v2_theta = math.atan2(pt2[1], pt2[0])
# print(math.atan2(unroundedpt2[1] - unroundedpt1[1], unroundedpt2[0] - unroundedpt1[0]) % np.pi)
# print(lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1])))
# angles[i] = math.atan2(unroundedpt2[1] - unroundedpt1[1], unroundedpt2[0] - unroundedpt1[0]) % np.pi
angles[i] = lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1]))
cv2.line(cdst, pt1, pt2, (0,0,255), 3, cv2.LINE_AA)
mode = st.mode(np.around(angles, decimals=2))[0]
rotationangle = np.rad2deg(mode)
image = rotate(image, rotationangle)
return image

View File

@ -1,5 +1,14 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## ORIGINAL FILE FOR SELECTIVE SEGMENTATION SEARCH"
]
},
{
"cell_type": "code",
"execution_count": 350,
@ -9,6 +18,7 @@
"import cv2\n",
"import numpy as np\n",
"from queue import PriorityQueue\n",
"import myfunctions as mf\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import random"
@ -20,20 +30,20 @@
"metadata": {},
"outputs": [],
"source": [
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
" dim = None\n",
" (h, w) = image.shape[:2]\n",
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
"# dim = None\n",
"# (h, w) = image.shape[:2]\n",
"\n",
" if width is None and height is None:\n",
" return image\n",
" if width is None:\n",
" r = height / float(h)\n",
" dim = (int(w * r), height)\n",
" else:\n",
" r = width / float(w)\n",
" dim = (width, int(h * r))\n",
"# if width is None and height is None:\n",
"# return image\n",
"# if width is None:\n",
"# r = height / float(h)\n",
"# dim = (int(w * r), height)\n",
"# else:\n",
"# r = width / float(w)\n",
"# dim = (width, int(h * r))\n",
"\n",
" return cv2.resize(image, dim, interpolation=inter)"
"# return cv2.resize(image, dim, interpolation=inter)"
]
},
{
@ -69,13 +79,13 @@
"metadata": {},
"outputs": [],
"source": [
"def clip(n, lower, upper):\n",
" return max(lower, min(n, upper))\n",
"# def clip(n, lower, upper):\n",
"# return max(lower, min(n, upper))\n",
"\n",
"def colourscaler(n, min, max):\n",
" temp = n-min\n",
" diff = abs(max - min)\n",
" return clip((temp/diff)*255, 0, 255)"
"# def colourscaler(n, min, max):\n",
"# temp = n-min\n",
"# diff = abs(max - min)\n",
"# return clip((temp/diff)*255, 0, 255)"
]
},
{
@ -131,11 +141,11 @@
"metadata": {},
"outputs": [],
"source": [
"def rotate(img, angle):\n",
" rows,cols = img.shape[0], img.shape[1]\n",
" M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
" dst = cv2.warpAffine(img,M,(cols,rows))\n",
" return dst"
"# def rotate(img, angle):\n",
"# rows,cols = img.shape[0], img.shape[1]\n",
"# M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
"# dst = cv2.warpAffine(img,M,(cols,rows))\n",
"# return dst"
]
},
{
@ -194,49 +204,49 @@
"metadata": {},
"outputs": [],
"source": [
"def rectArea(rect):\n",
" # print(rect)\n",
" return rect[2]*rect[3]\n",
"# def rectArea(rect):\n",
"# # print(rect)\n",
"# return rect[2]*rect[3]\n",
"\n",
"def biggestRects(n, rects):\n",
" dict = {}\n",
" # outrects = np.zeros(shape=(n, 4))\n",
" for rect in rects:\n",
" dict[tuple(rect)] = rectArea(rect)\n",
" # maxh.heappush(rectArea(rect))\n",
" # print(maxh[0])\n",
"# def biggestRects(n, rects):\n",
"# dict = {}\n",
"# # outrects = np.zeros(shape=(n, 4))\n",
"# for rect in rects:\n",
"# dict[tuple(rect)] = mf.rectArea(rect)\n",
"# # maxh.heappush(mf.rectArea(rect))\n",
"# # print(maxh[0])\n",
" \n",
" \n",
" heap = [(-value, key) for key,value in dict.items()]\n",
" largest = hq.nsmallest(n, heap)\n",
"# heap = [(-value, key) for key,value in dict.items()]\n",
"# largest = hq.nsmallest(n, heap)\n",
" \n",
"\n",
" # hq.heapify(list(dict.items()))\n",
" # for i in range(0,n):\n",
" # outrects[i] = maxh.heappop()\n",
" # print(outrects)\n",
" return [key for value, key in largest]\n",
"# # hq.heapify(list(dict.items()))\n",
"# # for i in range(0,n):\n",
"# # outrects[i] = maxh.heappop()\n",
"# # print(outrects)\n",
"# return [key for value, key in largest]\n",
"\n",
"def overlapRect(rects):\n",
" leftwall = -1\n",
" rightwall = -1\n",
" topwall = -1\n",
" bottomwall = -1\n",
" for (x, y, w, h) in rects:\n",
" if (leftwall == -1):\n",
" leftwall = x\n",
" rightwall = x + w\n",
" topwall = y\n",
" bottomwall = y + h\n",
" continue\n",
" leftwall = max(leftwall, x)\n",
" rightwall = min(rightwall, x+w)\n",
" topwall = max(topwall, y)\n",
" bottomwall = min(bottomwall, y+h)\n",
"# def overlapRect(rects):\n",
"# leftwall = -1\n",
"# rightwall = -1\n",
"# topwall = -1\n",
"# bottomwall = -1\n",
"# for (x, y, w, h) in rects:\n",
"# if (leftwall == -1):\n",
"# leftwall = x\n",
"# rightwall = x + w\n",
"# topwall = y\n",
"# bottomwall = y + h\n",
"# continue\n",
"# leftwall = max(leftwall, x)\n",
"# rightwall = min(rightwall, x+w)\n",
"# topwall = max(topwall, y)\n",
"# bottomwall = min(bottomwall, y+h)\n",
" \n",
" if (topwall >= bottomwall or leftwall >= rightwall):\n",
" return (-1, -1, -1, -1)\n",
" return (leftwall, topwall, rightwall-leftwall, bottomwall-topwall)"
"# if (topwall >= bottomwall or leftwall >= rightwall):\n",
"# return (-1, -1, -1, -1)\n",
"# return (leftwall, topwall, rightwall-leftwall, bottomwall-topwall)"
]
},
{
@ -258,11 +268,11 @@
"# _, thresholded = cv2.threshold(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 200, 255, cv2.THRESH_BINARY)\n",
"\n",
"rects = selectiveSearchSegmentationImp(cv2.GaussianBlur(ResizeWithAspectRatio(img,300), (15,15),0))\n",
"# rectArea(rects[0])\n",
"bigRects = biggestRects(20, rects)\n",
"# mf.rectArea(rects[0])\n",
"bigRects = mf.biggestRects(20, rects)\n",
"# print(bigRects)\n",
"\n",
"finalrect = overlapRect(bigRects)\n",
"finalrect = mf.overlapRect(bigRects)\n",
"print(finalrect)\n",
"output = ResizeWithAspectRatio(img,300)\n",
"for (x, y, w, h) in [finalrect]:\n",

View File

@ -1,5 +1,14 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# can probably be deleted or put somewhere. Was the original code for the rowsumdeskew"
]
},
{
"cell_type": "code",
"execution_count": 36,

View File

@ -1,5 +1,14 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ORIGINAL DOCUMENT FOR MORPHOLOGY CROP can maybe be deleted"
]
},
{
"cell_type": "code",
"execution_count": 27,
@ -19,6 +28,7 @@
"import torchvision.models as models\n",
"import torchvision.transforms as t\n",
"\n",
"import myfunctions as mf\n",
"\n",
"from PIL import Image"
]
@ -39,20 +49,20 @@
"metadata": {},
"outputs": [],
"source": [
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
" dim = None\n",
" (h, w) = image.shape[:2]\n",
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
"# dim = None\n",
"# (h, w) = image.shape[:2]\n",
"\n",
" if width is None and height is None:\n",
" return image\n",
" if width is None:\n",
" r = height / float(h)\n",
" dim = (int(w * r), height)\n",
" else:\n",
" r = width / float(w)\n",
" dim = (width, int(h * r))\n",
"# if width is None and height is None:\n",
"# return image\n",
"# if width is None:\n",
"# r = height / float(h)\n",
"# dim = (int(w * r), height)\n",
"# else:\n",
"# r = width / float(w)\n",
"# dim = (width, int(h * r))\n",
"\n",
" return cv2.resize(image, dim, interpolation=inter)"
"# return cv2.resize(image, dim, interpolation=inter)"
]
},
{
@ -112,7 +122,7 @@
"# cv2.imshow(\"morph\", morph)\n",
"# cv2.imshow(\"mask\", mask)\n",
"# cv2.imshow(\"result1\", result1)\n",
"resizedresult2 = ResizeWithAspectRatio(result2, 1000)\n",
"resizedresult2 = mf.ResizeWithAspectRatio(result2, 1000)\n",
"cv2.imwrite(\"./testing_space/cropped1.jpg\", resizedresult2)\n",
"cv2.imshow(\"result2\", resizedresult2)\n",
"cv2.waitKey(0)\n",
@ -287,7 +297,7 @@
"rotation = model(adjustedtensorizedimage).item()\n",
"print(rotation)\n",
"rotatedimage = t.Resize(size=1000)(tvf.rotate(adjustedtensorizedimage, rotation))\n",
"# imS = ResizeWithAspectRatio(filereadimage, 1000)\n",
"# imS = mf.ResizeWithAspectRatio(filereadimage, 1000)\n",
"# imS = cv2.resize(filereadimage, (960, 540)) \n",
"open_cv_image = np.array(t.ToPILImage()(rotatedimage))\n",
"cv2.imshow(f'image', open_cv_image)\n",

127
code/autocropper/temp.ipynb Normal file
View File

@ -0,0 +1,127 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 203,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"\n",
"import myfunctions as mf"
]
},
{
"cell_type": "code",
"execution_count": 204,
"metadata": {},
"outputs": [],
"source": [
"# read image as grayscale\n",
"img = cv2.imread('./test_images/IMG_7605.jpg')\n",
"img = mf.ResizeWithAspectRatio(mf.SquarePad(fill=255)(img),1000)\n",
"# img = mf.rotate(img, 54)"
]
},
{
"cell_type": "code",
"execution_count": 205,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", img)\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {},
"outputs": [],
"source": [
"# convert to grayscale\n",
"gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)\n",
"\n",
"# threshold\n",
"thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\\\n",
"\n",
"# apply morphology\n",
"kernel = np.ones((9,9), np.uint8)\n",
"morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)\n",
"# morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
"kernel = np.ones((11,11), np.uint8)\n",
"morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)\n",
"kernel = np.ones((2,2), np.uint8)\n",
"morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)\n",
"\n",
"# get largest contour\n",
"contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
"contours = contours[0] if len(contours) == 2 else contours[1]\n",
"area_thresh = 0\n",
"for c in contours:\n",
" area = cv2.contourArea(c)\n",
" if area > area_thresh:\n",
" area_thresh = area\n",
" big_contour = c\n",
"\n",
"\n",
"# get bounding box\n",
"x,y,w,h = cv2.boundingRect(big_contour)\n",
"\n",
"# draw filled contour on black background\n",
"mask = np.zeros_like(gray)\n",
"mask = cv2.merge([mask,mask,mask])\n",
"# mask = cv2.blur(mask,(121,121))\n",
"cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)\n",
"\n",
"# apply mask to input\n",
"result1 = img.copy()\n",
"mask = cv2.blur(mask,(5,5))\n",
"result1 = cv2.bitwise_and(result1, mask)\n",
"\n",
"# crop result\n",
"result2 = result1[y:y+h, x:x+w]\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {},
"outputs": [],
"source": [
"# view result\n",
"# cv2.imshow(\"threshold\", thresh)\n",
"# cv2.imshow(\"morph\", morph)\n",
"# cv2.imshow(\"mask\", mask)\n",
"cv2.imshow(\"result1\", result1)\n",
"# cv2.imshow(\"result2\", result2)\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -13,9 +13,20 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
"/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
]
}
],
"source": [
"import torch\n",
"from torch.utils.data import DataLoader\n",
@ -36,15 +47,17 @@
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import numpy as np\n",
"\n",
"import cv2\n",
"import numpy as np\n",
"import myfunctions as mf\n",
"\n",
"torch.cuda.empty_cache()\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -54,13 +67,56 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# print(array)\n",
"# print(counter)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"img = cv2.imread('./test_images/IMG_7605.jpg')\n",
"img = mf.ResizeWithAspectRatio(img, 1000)\n",
"# img = mf.ResizeWithAspectRatio(mf.SquarePad(fill=255)(img),1000)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"rotatedimg = mf.houghlinedeskew(img)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# out = mf.morphologyCrop(img)\n",
"# out = cv2.cvtColor(out, cv2.COLOR_BGR2GRAY)\n",
"# out = cv2.threshold(out, 200, 255, cv2.THRESH_BINARY)[1]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"cv2.imshow(\"result1\", rotatedimg)\n",
"# cv2.imshow(\"result2\", result2)\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
}
],
"metadata": {