First complete implementation of hough line deskewing
Now to work on hough line cropping. Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
This commit is contained in:
parent
a62f628cc1
commit
b2f3e89014
@ -44,7 +44,8 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import cv2"
|
||||
"import cv2\n",
|
||||
"import myfunctions as mf"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -73,20 +74,20 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
|
||||
" dim = None\n",
|
||||
" (h, w) = image.shape[:2]\n",
|
||||
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
|
||||
"# dim = None\n",
|
||||
"# (h, w) = image.shape[:2]\n",
|
||||
"\n",
|
||||
" if width is None and height is None:\n",
|
||||
" return image\n",
|
||||
" if width is None:\n",
|
||||
" r = height / float(h)\n",
|
||||
" dim = (int(w * r), height)\n",
|
||||
" else:\n",
|
||||
" r = width / float(w)\n",
|
||||
" dim = (width, int(h * r))\n",
|
||||
"# if width is None and height is None:\n",
|
||||
"# return image\n",
|
||||
"# if width is None:\n",
|
||||
"# r = height / float(h)\n",
|
||||
"# dim = (int(w * r), height)\n",
|
||||
"# else:\n",
|
||||
"# r = width / float(w)\n",
|
||||
"# dim = (width, int(h * r))\n",
|
||||
"\n",
|
||||
" return cv2.resize(image, dim, interpolation=inter)"
|
||||
"# return cv2.resize(image, dim, interpolation=inter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -171,7 +172,7 @@
|
||||
" activeimage = active_dataset[index]['image']\n",
|
||||
" # img = cv2.imread(active_dataset[index]['image'], 0)\n",
|
||||
" open_cv_image = np.array(t.ToPILImage()(activeimage))\n",
|
||||
" open_cv_image = ResizeWithAspectRatio(open_cv_image, 1000)\n",
|
||||
" open_cv_image = mf.ResizeWithAspectRatio(open_cv_image, 1000)\n",
|
||||
" # cv2.namedWindow(\"image\", cv2.WINDOW_NORMAL)\n",
|
||||
" # cv2.setWindowProperty(\"image\", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)\n",
|
||||
" cv2.imshow(\"image\", open_cv_image)\n",
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 335,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -13,115 +13,125 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 336,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#initially for deskewing and cropping. moving to a doc for just cropping now that deskewing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"import math\n",
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"import scipy.stats as st"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 337,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA, retscale=False):\n",
|
||||
" dim = None\n",
|
||||
" (h, w) = image.shape[:2]\n",
|
||||
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA, retscale=False):\n",
|
||||
"# dim = None\n",
|
||||
"# (h, w) = image.shape[:2]\n",
|
||||
"\n",
|
||||
" if width is None and height is None:\n",
|
||||
" if (retscale == True):\n",
|
||||
" return (image, 1)\n",
|
||||
" return image\n",
|
||||
" if width is None:\n",
|
||||
" r = height / float(h)\n",
|
||||
" dim = (int(w * r), height)\n",
|
||||
" else:\n",
|
||||
" r = width / float(w)\n",
|
||||
" dim = (width, int(h * r))\n",
|
||||
"# if width is None and height is None:\n",
|
||||
"# if (retscale == True):\n",
|
||||
"# return (image, 1)\n",
|
||||
"# return image\n",
|
||||
"# if width is None:\n",
|
||||
"# r = height / float(h)\n",
|
||||
"# dim = (int(w * r), height)\n",
|
||||
"# else:\n",
|
||||
"# r = width / float(w)\n",
|
||||
"# dim = (width, int(h * r))\n",
|
||||
"\n",
|
||||
" if (retscale == True):\n",
|
||||
" # print(\"hi\")\n",
|
||||
" return (cv2.resize(image, dim, interpolation=inter), 1/r)\n",
|
||||
" return cv2.resize(image, dim, interpolation=inter)\n",
|
||||
"# if (retscale == True):\n",
|
||||
"# # print(\"hi\")\n",
|
||||
"# return (cv2.resize(image, dim, interpolation=inter), 1/r)\n",
|
||||
"# return cv2.resize(image, dim, interpolation=inter)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class SquarePad:\n",
|
||||
" def __init__(self, fill):\n",
|
||||
" self.fill = fill\n",
|
||||
"# class SquarePad:\n",
|
||||
"# def __init__(self, fill):\n",
|
||||
"# self.fill = fill\n",
|
||||
" \n",
|
||||
" def __call__(self, image):\n",
|
||||
" w, h = image.shape[1], image.shape[0]\n",
|
||||
" max_wh = np.max([w, h])\n",
|
||||
" hp = int((max_wh - w) / 2)\n",
|
||||
" vp = int((max_wh - h) / 2)\n",
|
||||
" padding = (hp, vp, hp, vp)\n",
|
||||
" return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, self.fill)\n",
|
||||
"# def __call__(self, image):\n",
|
||||
"# w, h = image.shape[1], image.shape[0]\n",
|
||||
"# max_wh = np.max([w, h])\n",
|
||||
"# hp = int((max_wh - w) / 2)\n",
|
||||
"# vp = int((max_wh - h) / 2)\n",
|
||||
"# padding = (hp, vp, hp, vp)\n",
|
||||
"# return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, self.fill)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
"def rotate(img, angle):\n",
|
||||
" rows,cols = img.shape[0], img.shape[1]\n",
|
||||
" M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
|
||||
" dst = cv2.warpAffine(img,M,(cols,rows))\n",
|
||||
" return dst"
|
||||
"# def rotate(img, angle):\n",
|
||||
"# rows,cols = img.shape[0], img.shape[1]\n",
|
||||
"# M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
|
||||
"# dst = cv2.warpAffine(img,M,(cols,rows))\n",
|
||||
"# return dst"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 338,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def morphologyCrop(image):\n",
|
||||
" # convert to grayscale\n",
|
||||
" gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
|
||||
"# def morphologyCrop(image):\n",
|
||||
"# # convert to grayscale\n",
|
||||
"# gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
|
||||
"\n",
|
||||
" # threshold\n",
|
||||
" thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\n",
|
||||
"# # threshold\n",
|
||||
"# thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\n",
|
||||
"\n",
|
||||
" # apply morphology\n",
|
||||
" kernel = np.ones((7,7), np.uint8)\n",
|
||||
" morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
|
||||
" kernel = np.ones((9,9), np.uint8)\n",
|
||||
" morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)\n",
|
||||
"# # apply morphology\n",
|
||||
"# kernel = np.ones((7,7), np.uint8)\n",
|
||||
"# morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
|
||||
"# kernel = np.ones((9,9), np.uint8)\n",
|
||||
"# morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)\n",
|
||||
"\n",
|
||||
" # get largest contour\n",
|
||||
" contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
|
||||
" contours = contours[0] if len(contours) == 2 else contours[1]\n",
|
||||
" area_thresh = 0\n",
|
||||
" for c in contours:\n",
|
||||
" area = cv2.contourArea(c)\n",
|
||||
" if area > area_thresh:\n",
|
||||
" area_thresh = area\n",
|
||||
" big_contour = c\n",
|
||||
"# # get largest contour\n",
|
||||
"# contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
|
||||
"# contours = contours[0] if len(contours) == 2 else contours[1]\n",
|
||||
"# area_thresh = 0\n",
|
||||
"# for c in contours:\n",
|
||||
"# area = cv2.contourArea(c)\n",
|
||||
"# if area > area_thresh:\n",
|
||||
"# area_thresh = area\n",
|
||||
"# big_contour = c\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # get bounding box\n",
|
||||
" x,y,w,h = cv2.boundingRect(big_contour)\n",
|
||||
"# # get bounding box\n",
|
||||
"# x,y,w,h = cv2.boundingRect(big_contour)\n",
|
||||
"\n",
|
||||
" # draw filled contour on black background\n",
|
||||
" mask = np.zeros_like(gray)\n",
|
||||
" mask = cv2.merge([mask,mask,mask])\n",
|
||||
" cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)\n",
|
||||
"# # draw filled contour on black background\n",
|
||||
"# mask = np.zeros_like(gray)\n",
|
||||
"# mask = cv2.merge([mask,mask,mask])\n",
|
||||
"# cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)\n",
|
||||
"\n",
|
||||
" # apply mask to input\n",
|
||||
" result1 = image.copy()\n",
|
||||
" result1 = cv2.bitwise_and(result1, mask)\n",
|
||||
"# # apply mask to input\n",
|
||||
"# result1 = image.copy()\n",
|
||||
"# result1 = cv2.bitwise_and(result1, mask)\n",
|
||||
"\n",
|
||||
" # crop result\n",
|
||||
" result2 = result1[y:y+h, x:x+w]\n",
|
||||
" return result2"
|
||||
"# # crop result\n",
|
||||
"# result2 = result1[y:y+h, x:x+w]\n",
|
||||
"# return result2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 339,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -134,71 +144,82 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 340,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def lineAngle(line):\n",
|
||||
" # print(line)\n",
|
||||
" angle = (math.atan2(line[3] - line[1], line[2] - line[0]) % np.pi) - (np.pi/2)\n",
|
||||
" return angle\n",
|
||||
"# def lineAngle(line):\n",
|
||||
"# # print(line)\n",
|
||||
"# angle = (math.atan2(line[3] - line[1], line[2] - line[0]) % np.pi) - (np.pi/2)\n",
|
||||
"# return angle\n",
|
||||
" \n",
|
||||
"def WithinXDegrees(lines, margin):\n",
|
||||
" # outlines = np.array([[]])\n",
|
||||
" outlines = np.empty((0, 4))\n",
|
||||
" # print(outlines.shape)\n",
|
||||
" for line in lines:\n",
|
||||
" # print(type(line))\n",
|
||||
" # print(abs(lineAngle(line[0])))\n",
|
||||
" if (np.rad2deg(abs(lineAngle(line[0]))) <= margin):\n",
|
||||
" outlines = np.append(outlines, [line[0]], axis=0)\n",
|
||||
" return outlines\n",
|
||||
"# def WithinXDegrees(lines, margin):\n",
|
||||
"# # outlines = np.array([[]])\n",
|
||||
"# outlines = np.empty((0, 4))\n",
|
||||
"# # print(outlines.shape)\n",
|
||||
"# for line in lines:\n",
|
||||
"# # print(type(line))\n",
|
||||
"# # print(abs(lineAngle(line[0])))\n",
|
||||
"# if (np.rad2deg(abs(lineAngle(line[0]))) <= margin):\n",
|
||||
"# outlines = np.append(outlines, [line[0]], axis=0)\n",
|
||||
"# return outlines\n",
|
||||
"\n",
|
||||
"def lineBoundingRect(lines):\n",
|
||||
" maxvals = lines.max(0)\n",
|
||||
" minvals = lines.min(0)\n",
|
||||
" boundingrect = (min(minvals[0],minvals[2]), min(minvals[1],minvals[3]), max(maxvals[0],maxvals[2]),max(maxvals[1],maxvals[3]))\n",
|
||||
" return boundingrect\n",
|
||||
" # print(lines.max(0))\n",
|
||||
" # print(type(lines))"
|
||||
"# def lineBoundingRect(lines):\n",
|
||||
"# maxvals = lines.max(0)\n",
|
||||
"# minvals = lines.min(0)\n",
|
||||
"# boundingrect = (min(minvals[0],minvals[2]), min(minvals[1],minvals[3]), max(maxvals[0],maxvals[2]),max(maxvals[1],maxvals[3]))\n",
|
||||
"# return boundingrect\n",
|
||||
"# # print(lines.max(0))\n",
|
||||
"# # print(type(lines))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 341,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"img = cv2.imread('./test_images/IMG_7605.jpg')\n",
|
||||
"img = SquarePad(fill=255)(img)\n",
|
||||
"img = rotate(img, 54)\n",
|
||||
"img = morphologyCrop(img)"
|
||||
"img = mf.SquarePad(fill=255)(img)\n",
|
||||
"img = mf.rotate(img, 54)\n",
|
||||
"img = mf.morphologyCrop(mf.ResizeWithAspectRatio(img,1000))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 342,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", ResizeWithAspectRatio(SquarePad(fill=255)(img), 500))\n",
|
||||
"# img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
|
||||
"# img = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)[1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", mf.ResizeWithAspectRatio(mf.SquarePad(fill=255)(img), 1000))\n",
|
||||
"cv2.waitKey(0)\n",
|
||||
"cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 343,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"resizedimg = ResizeWithAspectRatio(SquarePad(fill=255)(img), 500)\n",
|
||||
"resizedimg = mf.ResizeWithAspectRatio(mf.SquarePad(fill=255)(img), 500)\n",
|
||||
"\n",
|
||||
"# cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", img)\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()\n",
|
||||
"\n",
|
||||
"gray = cv2.cvtColor(resizedimg ,cv2.COLOR_BGR2GRAY)\n",
|
||||
"gray = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]\n",
|
||||
"cdst = resizedimg.copy()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@ -208,7 +229,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 344,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -230,21 +251,21 @@
|
||||
" # print(math.atan2(unroundedpt2[1] - unroundedpt1[1], unroundedpt2[0] - unroundedpt1[0]) % np.pi)\n",
|
||||
" # print(lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1])))\n",
|
||||
" # angles[i] = math.atan2(unroundedpt2[1] - unroundedpt1[1], unroundedpt2[0] - unroundedpt1[0]) % np.pi\n",
|
||||
" angles[i] = lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1]))\n",
|
||||
" angles[i] = mf.lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1]))\n",
|
||||
" cv2.line(cdst, pt1, pt2, (0,0,255), 3, cv2.LINE_AA)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 345,
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-45.26366581533504\n",
|
||||
"-45.26366581533504\n"
|
||||
"-56.7228217179515\n",
|
||||
"-56.7228217179515\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -270,18 +291,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 346,
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", cdst)\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()"
|
||||
"cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", cdst)\n",
|
||||
"cv2.waitKey(0)\n",
|
||||
"cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 347,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -292,16 +313,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 348,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rotatedimg = SquarePad(fill=255)(rotate(img, rotationangle))\n"
|
||||
"rotatedimg = mf.SquarePad(fill=255)(mf.rotate(img, rotationangle))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 349,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -312,11 +333,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 350,
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"resizedrotatedimg = ResizeWithAspectRatio(rotatedimg, 500)\n",
|
||||
"resizedrotatedimg = mf.ResizeWithAspectRatio(rotatedimg, 500)\n",
|
||||
"gray1 = cv2.cvtColor(resizedrotatedimg, cv2.COLOR_BGR2GRAY)\n",
|
||||
"dst1 = cv2.Canny(gray1, 0, 500, None, 3)\n",
|
||||
"cdstP = resizedrotatedimg.copy()\n",
|
||||
@ -326,7 +347,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 351,
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -338,45 +359,37 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 352,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", cdstP)\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()"
|
||||
"cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", cdstP)\n",
|
||||
"cv2.waitKey(0)\n",
|
||||
"cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 353,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(274.0, 75.0, 355.0, 458.0)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# print(linesP)\n",
|
||||
"marginlines = WithinXDegrees(linesP, 2)\n",
|
||||
"marginlines = mf.WithinXDegrees(linesP, 2)\n",
|
||||
"# print(marginlines)\n",
|
||||
"# if marginlines is not None:\n",
|
||||
"# for i in range(0, len(marginlines)):\n",
|
||||
"# l = marginlines[i]\n",
|
||||
"# cv2.line(cdstPmargin, (int(l[0]), int(l[1])), (int(l[2]), int(l[3])), (0,0,255), 3, cv2.LINE_AA)\n",
|
||||
"if marginlines is not None:\n",
|
||||
" for i in range(0, len(marginlines)):\n",
|
||||
" l = marginlines[i]\n",
|
||||
" cv2.line(cdstPmargin, (int(l[0]), int(l[1])), (int(l[2]), int(l[3])), (0,0,255), 3, cv2.LINE_AA)\n",
|
||||
" \n",
|
||||
"boundingrectout = lineBoundingRect(marginlines)\n",
|
||||
"print(boundingrectout)\n",
|
||||
"cdstPmargin = cv2.rectangle(cdstPmargin,(int(boundingrectout[0]),int(boundingrectout[1])),(int(boundingrectout[2]),int(boundingrectout[3])),(0,255,0),2)"
|
||||
"# boundingrectout = mf.lineBoundingRect(marginlines)\n",
|
||||
"# # print(boundingrectout)\n",
|
||||
"# cdstPmargin = cv2.rectangle(cdstPmargin,(int(boundingrectout[0]),int(boundingrectout[1])),(int(boundingrectout[2]),int(boundingrectout[3])),(0,255,0),2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 354,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
||||
@ -197,20 +197,20 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
|
||||
" dim = None\n",
|
||||
" (h, w) = image.shape[:2]\n",
|
||||
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
|
||||
"# dim = None\n",
|
||||
"# (h, w) = image.shape[:2]\n",
|
||||
"\n",
|
||||
" if width is None and height is None:\n",
|
||||
" return image\n",
|
||||
" if width is None:\n",
|
||||
" r = height / float(h)\n",
|
||||
" dim = (int(w * r), height)\n",
|
||||
" else:\n",
|
||||
" r = width / float(w)\n",
|
||||
" dim = (width, int(h * r))\n",
|
||||
"# if width is None and height is None:\n",
|
||||
"# return image\n",
|
||||
"# if width is None:\n",
|
||||
"# r = height / float(h)\n",
|
||||
"# dim = (int(w * r), height)\n",
|
||||
"# else:\n",
|
||||
"# r = width / float(w)\n",
|
||||
"# dim = (width, int(h * r))\n",
|
||||
"\n",
|
||||
" return cv2.resize(image, dim, interpolation=inter)"
|
||||
"# return cv2.resize(image, dim, interpolation=inter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -263,7 +263,7 @@
|
||||
"rotation = model(adjustedtensorizedimage).item()\n",
|
||||
"print(rotation)\n",
|
||||
"rotatedimage = t.Resize(size=1000)(tvf.rotate(adjustedtensorizedimage, rotation))\n",
|
||||
"# imS = ResizeWithAspectRatio(filereadimage, 1000)\n",
|
||||
"# imS = mf.ResizeWithAspectRatio(filereadimage, 1000)\n",
|
||||
"# imS = cv2.resize(filereadimage, (960, 540)) \n",
|
||||
"open_cv_image = np.array(t.ToPILImage()(rotatedimage))\n",
|
||||
"cv2.imshow(f'image', open_cv_image)\n",
|
||||
|
||||
@ -2,20 +2,9 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
|
||||
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
|
||||
"/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
|
||||
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
@ -23,183 +12,19 @@
|
||||
"import torch\n",
|
||||
"import torchvision.transforms.functional as tvf\n",
|
||||
"import torchvision.transforms.v2 as v2\n",
|
||||
"import torchvision.transforms as t"
|
||||
"import torchvision.transforms as t\n",
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"from skimage import io\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"import myfunctions as mf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA, retscale=False):\n",
|
||||
" dim = None\n",
|
||||
" (h, w) = image.shape[:2]\n",
|
||||
"\n",
|
||||
" if width is None and height is None:\n",
|
||||
" if (retscale == True):\n",
|
||||
" return (image, 1)\n",
|
||||
" return image\n",
|
||||
" if width is None:\n",
|
||||
" r = height / float(h)\n",
|
||||
" dim = (int(w * r), height)\n",
|
||||
" else:\n",
|
||||
" r = width / float(w)\n",
|
||||
" dim = (width, int(h * r))\n",
|
||||
"\n",
|
||||
" if (retscale == True):\n",
|
||||
" # print(\"hi\")\n",
|
||||
" return (cv2.resize(image, dim, interpolation=inter), 1/r)\n",
|
||||
" return cv2.resize(image, dim, interpolation=inter)\n",
|
||||
"\n",
|
||||
"# modify this or add a function/parameter to also return r, the scale factor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class SquarePad:\n",
|
||||
" def __init__(self, fill):\n",
|
||||
" self.fill = fill\n",
|
||||
" \n",
|
||||
" def __call__(self, image):\n",
|
||||
" w, h = image.shape\n",
|
||||
" max_wh = np.max([w, h])\n",
|
||||
" hp = int((max_wh - w) / 2)\n",
|
||||
" vp = int((max_wh - h) / 2)\n",
|
||||
" padding = (hp, vp, hp, vp)\n",
|
||||
" return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, self.fill)\n",
|
||||
" \n",
|
||||
"tensorize = v2.Compose([v2.ToImageTensor(), v2.ConvertImageDtype()])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# rotation checker tools\n",
|
||||
"def rotate(img, angle):\n",
|
||||
" rows,cols = img.shape[0], img.shape[1]\n",
|
||||
" M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
|
||||
" dst = cv2.warpAffine(img,M,(cols,rows))\n",
|
||||
" return dst\n",
|
||||
"\n",
|
||||
"def sum_rows(img):\n",
|
||||
" # Create a list to store the row sums\n",
|
||||
" row_sums = []\n",
|
||||
" # Iterate through the rows\n",
|
||||
" for r in range(img.shape[0]-1):\n",
|
||||
" # Sum the row\n",
|
||||
" row_sum = sum(sum(img[r:r+1,:]))\n",
|
||||
" # Add the sum to the list\n",
|
||||
" row_sums.append(row_sum)\n",
|
||||
" # Normalize range to (0,255)\n",
|
||||
" row_sums = (row_sums/max(row_sums)) * 255\n",
|
||||
" # Return\n",
|
||||
" return row_sums\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# selective search rectangle tools\n",
|
||||
"\n",
|
||||
"import heapq as hq\n",
|
||||
"\n",
|
||||
"class MaxHeapObj(object):\n",
|
||||
" def __init__(self, val): self.val = val\n",
|
||||
" def __lt__(self, other): return self.val > other.val\n",
|
||||
" def __eq__(self, other): return self.val == other.val\n",
|
||||
" def __str__(self): return str(self.val)\n",
|
||||
" \n",
|
||||
"class MinHeap(object):\n",
|
||||
" def __init__(self): self.h = []\n",
|
||||
" def heappush(self, x): heapq.heappush(self.h, x)\n",
|
||||
" def heappop(self): return heapq.heappop(self.h)\n",
|
||||
" def __getitem__(self, i): return self.h[i]\n",
|
||||
" def __len__(self): return len(self.h)\n",
|
||||
" \n",
|
||||
"class MaxHeap(MinHeap):\n",
|
||||
" def heappush(self, x): heapq.heappush(self.h, MaxHeapObj(x))\n",
|
||||
" def heappop(self): return heapq.heappop(self.h).val\n",
|
||||
" def __getitem__(self, i): return self.h[i].val\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def rectArea(rect):\n",
|
||||
" # print(rect)\n",
|
||||
" return rect[2]*rect[3]\n",
|
||||
"\n",
|
||||
"def biggestRects(n, rects):\n",
|
||||
" dict = {}\n",
|
||||
" # outrects = np.zeros(shape=(n, 4))\n",
|
||||
" for rect in rects:\n",
|
||||
" dict[tuple(rect)] = rectArea(rect)\n",
|
||||
" # maxh.heappush(rectArea(rect))\n",
|
||||
" # print(maxh[0])\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" heap = [(-value, key) for key,value in dict.items()]\n",
|
||||
" largest = hq.nsmallest(n, heap)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # hq.heapify(list(dict.items()))\n",
|
||||
" # for i in range(0,n):\n",
|
||||
" # outrects[i] = maxh.heappop()\n",
|
||||
" # print(outrects)\n",
|
||||
" return [key for value, key in largest]\n",
|
||||
"\n",
|
||||
"def overlapRect(rects):\n",
|
||||
" leftwall = -1\n",
|
||||
" rightwall = -1\n",
|
||||
" topwall = -1\n",
|
||||
" bottomwall = -1\n",
|
||||
" for (x, y, w, h) in rects:\n",
|
||||
" if (leftwall == -1):\n",
|
||||
" leftwall = x\n",
|
||||
" rightwall = x + w\n",
|
||||
" topwall = y\n",
|
||||
" bottomwall = y + h\n",
|
||||
" continue\n",
|
||||
" leftwall = max(leftwall, x)\n",
|
||||
" rightwall = min(rightwall, x+w)\n",
|
||||
" topwall = max(topwall, y)\n",
|
||||
" bottomwall = min(bottomwall, y+h)\n",
|
||||
" \n",
|
||||
" if (topwall >= bottomwall or leftwall >= rightwall):\n",
|
||||
" return (-1, -1, -1, -1)\n",
|
||||
" return (leftwall, topwall, rightwall-leftwall, bottomwall-topwall)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# canny edge crop tools\n",
|
||||
"\n",
|
||||
"def clip(n, lower, upper):\n",
|
||||
" return max(lower, min(n, upper))\n",
|
||||
"\n",
|
||||
"def colourscaler(n, min, max):\n",
|
||||
" temp = n-min\n",
|
||||
" diff = abs(max - min)\n",
|
||||
" return clip((temp/diff)*255, 0, 255)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -209,195 +34,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def morphologyCrop(image):\n",
|
||||
" # convert to grayscale\n",
|
||||
" gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
|
||||
"\n",
|
||||
" # threshold\n",
|
||||
" thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\n",
|
||||
"\n",
|
||||
" # apply morphology\n",
|
||||
" kernel = np.ones((7,7), np.uint8)\n",
|
||||
" morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
|
||||
" kernel = np.ones((9,9), np.uint8)\n",
|
||||
" morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)\n",
|
||||
"\n",
|
||||
" # get largest contour\n",
|
||||
" contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
|
||||
" contours = contours[0] if len(contours) == 2 else contours[1]\n",
|
||||
" area_thresh = 0\n",
|
||||
" for c in contours:\n",
|
||||
" area = cv2.contourArea(c)\n",
|
||||
" if area > area_thresh:\n",
|
||||
" area_thresh = area\n",
|
||||
" big_contour = c\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # get bounding box\n",
|
||||
" x,y,w,h = cv2.boundingRect(big_contour)\n",
|
||||
"\n",
|
||||
" # draw filled contour on black background\n",
|
||||
" mask = np.zeros_like(gray)\n",
|
||||
" mask = cv2.merge([mask,mask,mask])\n",
|
||||
" cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)\n",
|
||||
"\n",
|
||||
" # apply mask to input\n",
|
||||
" result1 = image.copy()\n",
|
||||
" result1 = cv2.bitwise_and(result1, mask)\n",
|
||||
"\n",
|
||||
" # crop result\n",
|
||||
" result2 = result1[y:y+h, x:x+w]\n",
|
||||
" return result2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def rotator(image):\n",
|
||||
" src = 255 - cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
|
||||
" scores = []\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # # square the image\n",
|
||||
" # h,w = src.shape\n",
|
||||
" # small_dimention = min(h,w)\n",
|
||||
" # src = src[:small_dimention, :small_dimention]\n",
|
||||
" src = SquarePad(fill=255)(src)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" src = cv2.threshold(src, 70, 255, cv2.THRESH_BINARY)[1]\n",
|
||||
" src = ResizeWithAspectRatio(src, height=250)\n",
|
||||
" \n",
|
||||
" angle = 0\n",
|
||||
" finalangle = 0\n",
|
||||
" while angle <= 360:\n",
|
||||
" # Rotate the source image\n",
|
||||
" img = rotate(src, angle) \n",
|
||||
" # Crop the center 1/3rd of the image (roi is filled with text)\n",
|
||||
" h,w = img.shape\n",
|
||||
" buffer = min(h, w) - int(min(h,w)/1.5)\n",
|
||||
" roi = img[int(h/2-buffer):int(h/2+buffer), int(w/2-buffer):int(w/2+buffer)]\n",
|
||||
" # # Create background to draw transform on\n",
|
||||
" # bg = np.zeros((buffer*2, buffer*2), np.uint8)\n",
|
||||
" # Compute the sums of the rows\n",
|
||||
" row_sums = sum_rows(roi)\n",
|
||||
" # High score --> Zebra stripes\n",
|
||||
" score = np.count_nonzero(row_sums)\n",
|
||||
" scores.append(score)\n",
|
||||
" # othercount = othercount + 1\n",
|
||||
" # Image has best rotation\n",
|
||||
" if score <= min(scores):\n",
|
||||
" # count = count + 1\n",
|
||||
" # Save the rotatied image\n",
|
||||
" # print('found optimal rotation')\n",
|
||||
" # best_rotation = img.copy()\n",
|
||||
" finalangle = angle\n",
|
||||
" # goodangle = angle\n",
|
||||
" # k = display_data(roi, row_sums, buffer)\n",
|
||||
" # if k == 27: break\n",
|
||||
" # Increment angle and try again\n",
|
||||
" angle += .75\n",
|
||||
" # cv2.destroyAllWindows()\n",
|
||||
" return rotate(image, finalangle) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def selectiveSearchSegmentationImp(image):\n",
|
||||
" ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()\n",
|
||||
" ss.setBaseImage(image)\n",
|
||||
" ss.switchToSelectiveSearchFast()\n",
|
||||
" return ss.process()\n",
|
||||
"\n",
|
||||
"def selectiveSearchCrop(image):\n",
|
||||
" img, scale = ResizeWithAspectRatio(image,300, retscale=True)\n",
|
||||
" rects = selectiveSearchSegmentationImp(cv2.GaussianBlur(img, (15,15),0))\n",
|
||||
" bigRects = biggestRects(20, rects)\n",
|
||||
" overlaprectangle = overlapRect(bigRects)\n",
|
||||
" if (overlaprectangle[0] == -1):\n",
|
||||
" print(\"hi\")\n",
|
||||
" return image\n",
|
||||
" # print(image.shape)\n",
|
||||
" finalrect = (int(overlaprectangle[0]*scale), int(overlaprectangle[1]*scale), int(overlaprectangle[2]*scale), int(overlaprectangle[3]*scale))\n",
|
||||
" print(finalrect)\n",
|
||||
" return image[finalrect[0]: finalrect[0]+finalrect[2], finalrect[1]: finalrect[1]+finalrect[3], :]\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def cannyEdgeCrop(image, lower = 100, upper = 255, threshold1 = 50, threshold2 = 350):\n",
|
||||
" lower = max(0,lower)\n",
|
||||
" upper = min(255, upper)\n",
|
||||
" gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
|
||||
"\n",
|
||||
" scaled_gray = np.zeros(gray.shape, gray.dtype)\n",
|
||||
" \n",
|
||||
" # for y in range(0,gray.shape[0]):\n",
|
||||
" # for x in range(0,gray.shape[1]):\n",
|
||||
" # scaled_gray[y][x] = colourscaler(gray[y][x], lower, upper)\n",
|
||||
" scaled_gray = gray\n",
|
||||
" \n",
|
||||
" blurred = cv2.GaussianBlur(scaled_gray, (15,15),0)\n",
|
||||
" edged = cv2.Canny(blurred, threshold1, threshold2)\n",
|
||||
" return edged"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from skimage import io\n",
|
||||
"# import skimage.transform as st\n",
|
||||
"# from skimage.transform import rotate\n",
|
||||
"# from skimage.color import rgb2gray\n",
|
||||
"from deskew import determine_skew\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def deskew(image):\n",
|
||||
" # image = io.imread(_img)\n",
|
||||
" # print(type(image))\n",
|
||||
" grayscale = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)\n",
|
||||
" grayscale = SquarePad(fill=255)(grayscale)\n",
|
||||
" grayscale = ResizeWithAspectRatio(grayscale, height=300)\n",
|
||||
" # print(type(grayscale))\n",
|
||||
" angle = determine_skew(grayscale)\n",
|
||||
" # print(angle)\n",
|
||||
" rotated = rotate(image, angle)\n",
|
||||
" return rotated"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -406,13 +43,13 @@
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cropped = morphologyCrop(img)\n",
|
||||
"cropped = mf.morphologyCrop(img)\n",
|
||||
"# rotated = deskew(cropped)\n",
|
||||
"# cropped2 = morphologyCrop(rotated)\n",
|
||||
"# cropped2 = selectiveSearchCrop(rotated)\n",
|
||||
@ -423,7 +60,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
||||
374
code/autocropper/myfunctions.py
Normal file
374
code/autocropper/myfunctions.py
Normal file
@ -0,0 +1,374 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import math
|
||||
from deskew import determine_skew
|
||||
import heapq as hq
|
||||
import torchvision.transforms.v2 as v2
|
||||
import scipy.stats as st
|
||||
|
||||
## ------------------------------helper functions------------------------------
|
||||
def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA, retscale=False):
|
||||
dim = None
|
||||
(h, w) = image.shape[:2]
|
||||
|
||||
if width is None and height is None:
|
||||
if (retscale == True):
|
||||
return (image, 1)
|
||||
return image
|
||||
if width is None:
|
||||
r = height / float(h)
|
||||
dim = (int(w * r), height)
|
||||
else:
|
||||
r = width / float(w)
|
||||
dim = (width, int(h * r))
|
||||
|
||||
if (retscale == True):
|
||||
# print("hi")
|
||||
return (cv2.resize(image, dim, interpolation=inter), 1/r)
|
||||
return cv2.resize(image, dim, interpolation=inter)
|
||||
|
||||
|
||||
class SquarePad:
|
||||
def __init__(self, fill):
|
||||
self.fill = fill
|
||||
|
||||
def __call__(self, image):
|
||||
w, h = image.shape[1], image.shape[0]
|
||||
max_wh = np.max([w, h])
|
||||
hp = int((max_wh - w) / 2)
|
||||
vp = int((max_wh - h) / 2)
|
||||
padding = (hp, vp, hp, vp)
|
||||
return cv2.copyMakeBorder(image, vp, vp, hp, hp, cv2.BORDER_CONSTANT, self.fill)
|
||||
|
||||
|
||||
def rotate(img, angle):
|
||||
rows,cols = img.shape[0], img.shape[1]
|
||||
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
|
||||
dst = cv2.warpAffine(img,M,(cols,rows))
|
||||
return dst
|
||||
|
||||
|
||||
def clip(n, lower, upper):
|
||||
return max(lower, min(n, upper))
|
||||
|
||||
def colourscaler(n, min, max):
|
||||
temp = n-min
|
||||
diff = abs(max - min)
|
||||
return clip((temp/diff)*255, 0, 255)
|
||||
|
||||
tensorize = v2.Compose([v2.ToImageTensor(), v2.ConvertImageDtype()]) ## for converting an image (usually PIL image) to a pytorch tensor
|
||||
|
||||
## ------------------------------for selective segmentation search crop------------------------------
|
||||
def rectArea(rect):
|
||||
# print(rect)
|
||||
return rect[2]*rect[3]
|
||||
|
||||
def biggestRects(n, rects):
|
||||
dict = {}
|
||||
# outrects = np.zeros(shape=(n, 4))
|
||||
for rect in rects:
|
||||
dict[tuple(rect)] = rectArea(rect)
|
||||
# maxh.heappush(rectArea(rect))
|
||||
# print(maxh[0])
|
||||
|
||||
|
||||
heap = [(-value, key) for key,value in dict.items()]
|
||||
largest = hq.nsmallest(n, heap)
|
||||
|
||||
|
||||
# hq.heapify(list(dict.items()))
|
||||
# for i in range(0,n):
|
||||
# outrects[i] = maxh.heappop()
|
||||
# print(outrects)
|
||||
return [key for value, key in largest]
|
||||
|
||||
def overlapRect(rects):
|
||||
leftwall = -1
|
||||
rightwall = -1
|
||||
topwall = -1
|
||||
bottomwall = -1
|
||||
for (x, y, w, h) in rects:
|
||||
if (leftwall == -1):
|
||||
leftwall = x
|
||||
rightwall = x + w
|
||||
topwall = y
|
||||
bottomwall = y + h
|
||||
continue
|
||||
leftwall = max(leftwall, x)
|
||||
rightwall = min(rightwall, x+w)
|
||||
topwall = max(topwall, y)
|
||||
bottomwall = min(bottomwall, y+h)
|
||||
|
||||
if (topwall >= bottomwall or leftwall >= rightwall):
|
||||
return (-1, -1, -1, -1)
|
||||
return (leftwall, topwall, rightwall-leftwall, bottomwall-topwall)
|
||||
|
||||
|
||||
def selectiveSearchSegmentationImp(image):
|
||||
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
|
||||
ss.setBaseImage(image)
|
||||
ss.switchToSelectiveSearchFast()
|
||||
return ss.process()
|
||||
|
||||
|
||||
## ------------------------------specific to houghline crop------------------------------
|
||||
def lineAngle(line):
|
||||
# print(line)
|
||||
angle = (math.atan2(line[3] - line[1], line[2] - line[0]) % np.pi) - (np.pi/2)
|
||||
return angle
|
||||
|
||||
def WithinXDegrees(lines, margin):
|
||||
# outlines = np.array([[]])
|
||||
outlines = np.empty((0, 4))
|
||||
# print(outlines.shape)
|
||||
for line in lines:
|
||||
# print(type(line))
|
||||
# print(abs(lineAngle(line[0])))
|
||||
if (np.rad2deg(abs(lineAngle(line[0]))) <= margin):
|
||||
outlines = np.append(outlines, [line[0]], axis=0)
|
||||
return outlines
|
||||
|
||||
def lineBoundingRect(lines):
|
||||
maxvals = lines.max(0)
|
||||
minvals = lines.min(0)
|
||||
boundingrect = (min(minvals[0],minvals[2]), min(minvals[1],minvals[3]), max(maxvals[0],maxvals[2]),max(maxvals[1],maxvals[3]))
|
||||
return boundingrect
|
||||
# print(lines.max(0))
|
||||
# print(type(lines))
|
||||
|
||||
def premorphCrop(image):
|
||||
return morphologyCrop(image, special=True)
|
||||
|
||||
|
||||
|
||||
## ------------------------------specific to row summation deskewing------------------------------
|
||||
def sum_rows(img):
|
||||
# Create a list to store the row sums
|
||||
row_sums = []
|
||||
# Iterate through the rows
|
||||
for r in range(img.shape[0]-1):
|
||||
# Sum the row
|
||||
row_sum = sum(sum(img[r:r+1,:]))
|
||||
# Add the sum to the list
|
||||
row_sums.append(row_sum)
|
||||
# Normalize range to (0,255)
|
||||
row_sums = (row_sums/max(row_sums)) * 255
|
||||
# Return
|
||||
return row_sums
|
||||
|
||||
|
||||
## ------------------------------active functions------------------------------
|
||||
|
||||
## ------------------------------cropping------------------------------
|
||||
def morphologyCrop(image, special=False):
|
||||
# convert to grayscale
|
||||
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# threshold
|
||||
thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\
|
||||
|
||||
if (special):
|
||||
# apply morphology
|
||||
kernel = np.ones((9,9), np.uint8)
|
||||
morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)
|
||||
# morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
|
||||
kernel = np.ones((9,9), np.uint8)
|
||||
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
|
||||
kernel = np.ones((2,2), np.uint8)
|
||||
morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)
|
||||
else:
|
||||
# apply morphology
|
||||
kernel = np.ones((7,7), np.uint8)
|
||||
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
|
||||
kernel = np.ones((9,9), np.uint8)
|
||||
morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)
|
||||
|
||||
|
||||
# get largest contour
|
||||
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||||
contours = contours[0] if len(contours) == 2 else contours[1]
|
||||
area_thresh = 0
|
||||
for c in contours:
|
||||
area = cv2.contourArea(c)
|
||||
if area > area_thresh:
|
||||
area_thresh = area
|
||||
big_contour = c
|
||||
|
||||
|
||||
# get bounding box
|
||||
x,y,w,h = cv2.boundingRect(big_contour)
|
||||
|
||||
# draw filled contour on black background
|
||||
mask = np.zeros_like(gray)
|
||||
mask = cv2.merge([mask,mask,mask])
|
||||
# mask = cv2.blur(mask,(121,121))
|
||||
cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)
|
||||
|
||||
# apply mask to input
|
||||
result1 = image.copy()
|
||||
if (special):
|
||||
mask = cv2.blur(mask,(3,3))
|
||||
result1 = cv2.bitwise_and(result1, mask)
|
||||
|
||||
# crop result
|
||||
result2 = result1[y:y+h, x:x+w]
|
||||
return result2
|
||||
|
||||
|
||||
|
||||
##### ------------------------------TEST CODE FOR SELECTIVESEARCHCROP------------------------------
|
||||
# ## Test this code for the masking/colour squishing. it essentially can just speed up clipping the edges.
|
||||
# #!/usr/local/bin/python3
|
||||
# import cv2 as cv
|
||||
# import numpy as np
|
||||
|
||||
# # Load the aerial image and convert to HSV colourspace
|
||||
# image = cv.imread("aerial.png")
|
||||
# hsv=cv.cvtColor(image,cv.COLOR_BGR2HSV)
|
||||
|
||||
# # Define lower and uppper limits of what we call "brown"
|
||||
# brown_lo=np.array([10,0,0])
|
||||
# brown_hi=np.array([20,255,255])
|
||||
|
||||
# # Mask image to only select browns
|
||||
# mask=cv.inRange(hsv,brown_lo,brown_hi)
|
||||
|
||||
# # Change image to red where we found brown
|
||||
# image[mask>0]=(0,0,255)
|
||||
|
||||
# cv.imwrite("result.png",image)
|
||||
|
||||
#CAN ALSO TRY USING NUMPY VECTORIZATION
|
||||
#------------------------------------------------------------------------------------------
|
||||
def selectiveSearchCrop(image):
|
||||
img, scale = ResizeWithAspectRatio(image,300, retscale=True)
|
||||
rects = selectiveSearchSegmentationImp(cv2.GaussianBlur(img, (15,15),0))
|
||||
bigRects = biggestRects(20, rects)
|
||||
overlaprectangle = overlapRect(bigRects)
|
||||
if (overlaprectangle[0] == -1):
|
||||
print("hi")
|
||||
return image
|
||||
# print(image.shape)
|
||||
finalrect = (int(overlaprectangle[0]*scale), int(overlaprectangle[1]*scale), int(overlaprectangle[2]*scale), int(overlaprectangle[3]*scale))
|
||||
print(finalrect)
|
||||
return image[finalrect[0]: finalrect[0]+finalrect[2], finalrect[1]: finalrect[1]+finalrect[3], :]
|
||||
|
||||
def cannyEdgeCrop(image, lower = 100, upper = 255, threshold1 = 50, threshold2 = 350):
|
||||
lower = max(0,lower)
|
||||
upper = min(255, upper)
|
||||
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
|
||||
|
||||
scaled_gray = np.zeros(gray.shape, gray.dtype)
|
||||
|
||||
# for y in range(0,gray.shape[0]):
|
||||
# for x in range(0,gray.shape[1]):
|
||||
# scaled_gray[y][x] = colourscaler(gray[y][x], lower, upper)
|
||||
scaled_gray = gray
|
||||
|
||||
blurred = cv2.GaussianBlur(scaled_gray, (15,15),0)
|
||||
edged = cv2.Canny(blurred, threshold1, threshold2)
|
||||
return edged
|
||||
|
||||
|
||||
|
||||
## ------------------------------deskewing------------------------------
|
||||
def rowsumdeskew(image):
|
||||
src = 255 - cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
|
||||
scores = []
|
||||
|
||||
|
||||
# # square the image
|
||||
# h,w = src.shape
|
||||
# small_dimention = min(h,w)
|
||||
# src = src[:small_dimention, :small_dimention]
|
||||
src = SquarePad(fill=255)(src)
|
||||
|
||||
|
||||
src = cv2.threshold(src, 70, 255, cv2.THRESH_BINARY)[1]
|
||||
src = ResizeWithAspectRatio(src, height=250)
|
||||
|
||||
angle = 0
|
||||
finalangle = 0
|
||||
while angle <= 360:
|
||||
# Rotate the source image
|
||||
img = rotate(src, angle)
|
||||
# Crop the center 1/3rd of the image (roi is filled with text)
|
||||
h,w = img.shape
|
||||
buffer = min(h, w) - int(min(h,w)/1.5)
|
||||
roi = img[int(h/2-buffer):int(h/2+buffer), int(w/2-buffer):int(w/2+buffer)]
|
||||
# # Create background to draw transform on
|
||||
# bg = np.zeros((buffer*2, buffer*2), np.uint8)
|
||||
# Compute the sums of the rows
|
||||
row_sums = sum_rows(roi)
|
||||
# High score --> Zebra stripes
|
||||
score = np.count_nonzero(row_sums)
|
||||
scores.append(score)
|
||||
# othercount = othercount + 1
|
||||
# Image has best rotation
|
||||
if score <= min(scores):
|
||||
# count = count + 1
|
||||
# Save the rotatied image
|
||||
# print('found optimal rotation')
|
||||
# best_rotation = img.copy()
|
||||
finalangle = angle
|
||||
# goodangle = angle
|
||||
# k = display_data(roi, row_sums, buffer)
|
||||
# if k == 27: break
|
||||
# Increment angle and try again
|
||||
angle += .75
|
||||
# cv2.destroyAllWindows()
|
||||
return rotate(image, finalangle)
|
||||
|
||||
|
||||
def externaldeskew(image):
|
||||
# image = io.imread(_img)
|
||||
# print(type(image))
|
||||
grayscale = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
|
||||
grayscale = SquarePad(fill=255)(grayscale)
|
||||
grayscale = ResizeWithAspectRatio(grayscale, height=300)
|
||||
# print(type(grayscale))
|
||||
angle = determine_skew(grayscale)
|
||||
# print(angle)
|
||||
rotated = rotate(image, angle)
|
||||
return rotated
|
||||
|
||||
|
||||
def houghlinedeskew(image):
|
||||
img = SquarePad(fill=255)(image)
|
||||
img = premorphCrop(img)
|
||||
|
||||
gray = cv2.cvtColor(img ,cv2.COLOR_BGR2GRAY)
|
||||
gray = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
|
||||
cdst = image.copy()
|
||||
|
||||
|
||||
dst = cv2.Canny(gray, 50, 200, None, 3)
|
||||
lines = cv2.HoughLines(dst, 1, np.pi/180, 150, None, 0, 0)
|
||||
|
||||
angles = np.zeros(len(lines))
|
||||
if lines is not None:
|
||||
for i in range(0, len(lines)):
|
||||
rho = lines[i][0][0]
|
||||
theta = lines[i][0][1]
|
||||
a = math.cos(theta)
|
||||
b = math.sin(theta)
|
||||
x0 = a * rho
|
||||
y0 = b * rho
|
||||
unroundedpt1 = (x0 + 1000*(-b), y0 + 1000*(a))
|
||||
unroundedpt2 = (x0 - 1000*(-b), y0 - 1000*(a))
|
||||
pt1 = (int(unroundedpt1[0]), int(unroundedpt1[1]))
|
||||
pt2 = (int(unroundedpt2[0]), int(unroundedpt2[1]))
|
||||
v1_theta = math.atan2(pt1[1], pt1[0])
|
||||
v2_theta = math.atan2(pt2[1], pt2[0])
|
||||
# print(math.atan2(unroundedpt2[1] - unroundedpt1[1], unroundedpt2[0] - unroundedpt1[0]) % np.pi)
|
||||
# print(lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1])))
|
||||
# angles[i] = math.atan2(unroundedpt2[1] - unroundedpt1[1], unroundedpt2[0] - unroundedpt1[0]) % np.pi
|
||||
angles[i] = lineAngle((unroundedpt1[0], unroundedpt1[1], unroundedpt2[0], unroundedpt2[1]))
|
||||
cv2.line(cdst, pt1, pt2, (0,0,255), 3, cv2.LINE_AA)
|
||||
|
||||
|
||||
mode = st.mode(np.around(angles, decimals=2))[0]
|
||||
rotationangle = np.rad2deg(mode)
|
||||
image = rotate(image, rotationangle)
|
||||
return image
|
||||
|
||||
@ -1,5 +1,14 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## ORIGINAL FILE FOR SELECTIVE SEGMENTATION SEARCH"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 350,
|
||||
@ -9,6 +18,7 @@
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"from queue import PriorityQueue\n",
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import random"
|
||||
@ -20,20 +30,20 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
|
||||
" dim = None\n",
|
||||
" (h, w) = image.shape[:2]\n",
|
||||
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
|
||||
"# dim = None\n",
|
||||
"# (h, w) = image.shape[:2]\n",
|
||||
"\n",
|
||||
" if width is None and height is None:\n",
|
||||
" return image\n",
|
||||
" if width is None:\n",
|
||||
" r = height / float(h)\n",
|
||||
" dim = (int(w * r), height)\n",
|
||||
" else:\n",
|
||||
" r = width / float(w)\n",
|
||||
" dim = (width, int(h * r))\n",
|
||||
"# if width is None and height is None:\n",
|
||||
"# return image\n",
|
||||
"# if width is None:\n",
|
||||
"# r = height / float(h)\n",
|
||||
"# dim = (int(w * r), height)\n",
|
||||
"# else:\n",
|
||||
"# r = width / float(w)\n",
|
||||
"# dim = (width, int(h * r))\n",
|
||||
"\n",
|
||||
" return cv2.resize(image, dim, interpolation=inter)"
|
||||
"# return cv2.resize(image, dim, interpolation=inter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -69,13 +79,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def clip(n, lower, upper):\n",
|
||||
" return max(lower, min(n, upper))\n",
|
||||
"# def clip(n, lower, upper):\n",
|
||||
"# return max(lower, min(n, upper))\n",
|
||||
"\n",
|
||||
"def colourscaler(n, min, max):\n",
|
||||
" temp = n-min\n",
|
||||
" diff = abs(max - min)\n",
|
||||
" return clip((temp/diff)*255, 0, 255)"
|
||||
"# def colourscaler(n, min, max):\n",
|
||||
"# temp = n-min\n",
|
||||
"# diff = abs(max - min)\n",
|
||||
"# return clip((temp/diff)*255, 0, 255)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -131,11 +141,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def rotate(img, angle):\n",
|
||||
" rows,cols = img.shape[0], img.shape[1]\n",
|
||||
" M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
|
||||
" dst = cv2.warpAffine(img,M,(cols,rows))\n",
|
||||
" return dst"
|
||||
"# def rotate(img, angle):\n",
|
||||
"# rows,cols = img.shape[0], img.shape[1]\n",
|
||||
"# M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
|
||||
"# dst = cv2.warpAffine(img,M,(cols,rows))\n",
|
||||
"# return dst"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -194,49 +204,49 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def rectArea(rect):\n",
|
||||
" # print(rect)\n",
|
||||
" return rect[2]*rect[3]\n",
|
||||
"# def rectArea(rect):\n",
|
||||
"# # print(rect)\n",
|
||||
"# return rect[2]*rect[3]\n",
|
||||
"\n",
|
||||
"def biggestRects(n, rects):\n",
|
||||
" dict = {}\n",
|
||||
" # outrects = np.zeros(shape=(n, 4))\n",
|
||||
" for rect in rects:\n",
|
||||
" dict[tuple(rect)] = rectArea(rect)\n",
|
||||
" # maxh.heappush(rectArea(rect))\n",
|
||||
" # print(maxh[0])\n",
|
||||
"# def biggestRects(n, rects):\n",
|
||||
"# dict = {}\n",
|
||||
"# # outrects = np.zeros(shape=(n, 4))\n",
|
||||
"# for rect in rects:\n",
|
||||
"# dict[tuple(rect)] = mf.rectArea(rect)\n",
|
||||
"# # maxh.heappush(mf.rectArea(rect))\n",
|
||||
"# # print(maxh[0])\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" heap = [(-value, key) for key,value in dict.items()]\n",
|
||||
" largest = hq.nsmallest(n, heap)\n",
|
||||
"# heap = [(-value, key) for key,value in dict.items()]\n",
|
||||
"# largest = hq.nsmallest(n, heap)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # hq.heapify(list(dict.items()))\n",
|
||||
" # for i in range(0,n):\n",
|
||||
" # outrects[i] = maxh.heappop()\n",
|
||||
" # print(outrects)\n",
|
||||
" return [key for value, key in largest]\n",
|
||||
"# # hq.heapify(list(dict.items()))\n",
|
||||
"# # for i in range(0,n):\n",
|
||||
"# # outrects[i] = maxh.heappop()\n",
|
||||
"# # print(outrects)\n",
|
||||
"# return [key for value, key in largest]\n",
|
||||
"\n",
|
||||
"def overlapRect(rects):\n",
|
||||
" leftwall = -1\n",
|
||||
" rightwall = -1\n",
|
||||
" topwall = -1\n",
|
||||
" bottomwall = -1\n",
|
||||
" for (x, y, w, h) in rects:\n",
|
||||
" if (leftwall == -1):\n",
|
||||
" leftwall = x\n",
|
||||
" rightwall = x + w\n",
|
||||
" topwall = y\n",
|
||||
" bottomwall = y + h\n",
|
||||
" continue\n",
|
||||
" leftwall = max(leftwall, x)\n",
|
||||
" rightwall = min(rightwall, x+w)\n",
|
||||
" topwall = max(topwall, y)\n",
|
||||
" bottomwall = min(bottomwall, y+h)\n",
|
||||
"# def overlapRect(rects):\n",
|
||||
"# leftwall = -1\n",
|
||||
"# rightwall = -1\n",
|
||||
"# topwall = -1\n",
|
||||
"# bottomwall = -1\n",
|
||||
"# for (x, y, w, h) in rects:\n",
|
||||
"# if (leftwall == -1):\n",
|
||||
"# leftwall = x\n",
|
||||
"# rightwall = x + w\n",
|
||||
"# topwall = y\n",
|
||||
"# bottomwall = y + h\n",
|
||||
"# continue\n",
|
||||
"# leftwall = max(leftwall, x)\n",
|
||||
"# rightwall = min(rightwall, x+w)\n",
|
||||
"# topwall = max(topwall, y)\n",
|
||||
"# bottomwall = min(bottomwall, y+h)\n",
|
||||
" \n",
|
||||
" if (topwall >= bottomwall or leftwall >= rightwall):\n",
|
||||
" return (-1, -1, -1, -1)\n",
|
||||
" return (leftwall, topwall, rightwall-leftwall, bottomwall-topwall)"
|
||||
"# if (topwall >= bottomwall or leftwall >= rightwall):\n",
|
||||
"# return (-1, -1, -1, -1)\n",
|
||||
"# return (leftwall, topwall, rightwall-leftwall, bottomwall-topwall)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -258,11 +268,11 @@
|
||||
"# _, thresholded = cv2.threshold(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 200, 255, cv2.THRESH_BINARY)\n",
|
||||
"\n",
|
||||
"rects = selectiveSearchSegmentationImp(cv2.GaussianBlur(ResizeWithAspectRatio(img,300), (15,15),0))\n",
|
||||
"# rectArea(rects[0])\n",
|
||||
"bigRects = biggestRects(20, rects)\n",
|
||||
"# mf.rectArea(rects[0])\n",
|
||||
"bigRects = mf.biggestRects(20, rects)\n",
|
||||
"# print(bigRects)\n",
|
||||
"\n",
|
||||
"finalrect = overlapRect(bigRects)\n",
|
||||
"finalrect = mf.overlapRect(bigRects)\n",
|
||||
"print(finalrect)\n",
|
||||
"output = ResizeWithAspectRatio(img,300)\n",
|
||||
"for (x, y, w, h) in [finalrect]:\n",
|
||||
@ -1,5 +1,14 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# can probably be deleted or put somewhere. Was the original code for the rowsumdeskew"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
@ -1,5 +1,14 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# ORIGINAL DOCUMENT FOR MORPHOLOGY CROP can maybe be deleted"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
@ -19,6 +28,7 @@
|
||||
"import torchvision.models as models\n",
|
||||
"import torchvision.transforms as t\n",
|
||||
"\n",
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"from PIL import Image"
|
||||
]
|
||||
@ -39,20 +49,20 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
|
||||
" dim = None\n",
|
||||
" (h, w) = image.shape[:2]\n",
|
||||
"# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
|
||||
"# dim = None\n",
|
||||
"# (h, w) = image.shape[:2]\n",
|
||||
"\n",
|
||||
" if width is None and height is None:\n",
|
||||
" return image\n",
|
||||
" if width is None:\n",
|
||||
" r = height / float(h)\n",
|
||||
" dim = (int(w * r), height)\n",
|
||||
" else:\n",
|
||||
" r = width / float(w)\n",
|
||||
" dim = (width, int(h * r))\n",
|
||||
"# if width is None and height is None:\n",
|
||||
"# return image\n",
|
||||
"# if width is None:\n",
|
||||
"# r = height / float(h)\n",
|
||||
"# dim = (int(w * r), height)\n",
|
||||
"# else:\n",
|
||||
"# r = width / float(w)\n",
|
||||
"# dim = (width, int(h * r))\n",
|
||||
"\n",
|
||||
" return cv2.resize(image, dim, interpolation=inter)"
|
||||
"# return cv2.resize(image, dim, interpolation=inter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -112,7 +122,7 @@
|
||||
"# cv2.imshow(\"morph\", morph)\n",
|
||||
"# cv2.imshow(\"mask\", mask)\n",
|
||||
"# cv2.imshow(\"result1\", result1)\n",
|
||||
"resizedresult2 = ResizeWithAspectRatio(result2, 1000)\n",
|
||||
"resizedresult2 = mf.ResizeWithAspectRatio(result2, 1000)\n",
|
||||
"cv2.imwrite(\"./testing_space/cropped1.jpg\", resizedresult2)\n",
|
||||
"cv2.imshow(\"result2\", resizedresult2)\n",
|
||||
"cv2.waitKey(0)\n",
|
||||
@ -287,7 +297,7 @@
|
||||
"rotation = model(adjustedtensorizedimage).item()\n",
|
||||
"print(rotation)\n",
|
||||
"rotatedimage = t.Resize(size=1000)(tvf.rotate(adjustedtensorizedimage, rotation))\n",
|
||||
"# imS = ResizeWithAspectRatio(filereadimage, 1000)\n",
|
||||
"# imS = mf.ResizeWithAspectRatio(filereadimage, 1000)\n",
|
||||
"# imS = cv2.resize(filereadimage, (960, 540)) \n",
|
||||
"open_cv_image = np.array(t.ToPILImage()(rotatedimage))\n",
|
||||
"cv2.imshow(f'image', open_cv_image)\n",
|
||||
127
code/autocropper/temp.ipynb
Normal file
127
code/autocropper/temp.ipynb
Normal file
@ -0,0 +1,127 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 203,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import myfunctions as mf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 204,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# read image as grayscale\n",
|
||||
"img = cv2.imread('./test_images/IMG_7605.jpg')\n",
|
||||
"img = mf.ResizeWithAspectRatio(mf.SquarePad(fill=255)(img),1000)\n",
|
||||
"# img = mf.rotate(img, 54)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 205,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imshow(\"Detected Lines (in red) - Standard Hough Line Transform\", img)\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 206,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# convert to grayscale\n",
|
||||
"gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)\n",
|
||||
"\n",
|
||||
"# threshold\n",
|
||||
"thresh = cv2.threshold(gray, 170, 255, cv2.THRESH_BINARY)[1]\\\n",
|
||||
"\n",
|
||||
"# apply morphology\n",
|
||||
"kernel = np.ones((9,9), np.uint8)\n",
|
||||
"morph = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)\n",
|
||||
"# morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
|
||||
"kernel = np.ones((11,11), np.uint8)\n",
|
||||
"morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)\n",
|
||||
"kernel = np.ones((2,2), np.uint8)\n",
|
||||
"morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)\n",
|
||||
"\n",
|
||||
"# get largest contour\n",
|
||||
"contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
|
||||
"contours = contours[0] if len(contours) == 2 else contours[1]\n",
|
||||
"area_thresh = 0\n",
|
||||
"for c in contours:\n",
|
||||
" area = cv2.contourArea(c)\n",
|
||||
" if area > area_thresh:\n",
|
||||
" area_thresh = area\n",
|
||||
" big_contour = c\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# get bounding box\n",
|
||||
"x,y,w,h = cv2.boundingRect(big_contour)\n",
|
||||
"\n",
|
||||
"# draw filled contour on black background\n",
|
||||
"mask = np.zeros_like(gray)\n",
|
||||
"mask = cv2.merge([mask,mask,mask])\n",
|
||||
"# mask = cv2.blur(mask,(121,121))\n",
|
||||
"cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)\n",
|
||||
"\n",
|
||||
"# apply mask to input\n",
|
||||
"result1 = img.copy()\n",
|
||||
"mask = cv2.blur(mask,(5,5))\n",
|
||||
"result1 = cv2.bitwise_and(result1, mask)\n",
|
||||
"\n",
|
||||
"# crop result\n",
|
||||
"result2 = result1[y:y+h, x:x+w]\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 207,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# view result\n",
|
||||
"# cv2.imshow(\"threshold\", thresh)\n",
|
||||
"# cv2.imshow(\"morph\", morph)\n",
|
||||
"# cv2.imshow(\"mask\", mask)\n",
|
||||
"cv2.imshow(\"result1\", result1)\n",
|
||||
"# cv2.imshow(\"result2\", result2)\n",
|
||||
"cv2.waitKey(0)\n",
|
||||
"cv2.destroyAllWindows()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -13,9 +13,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
|
||||
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
|
||||
"/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
|
||||
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"from torch.utils.data import DataLoader\n",
|
||||
@ -36,15 +47,17 @@
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"torch.cuda.empty_cache()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -54,13 +67,56 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# print(array)\n",
|
||||
"# print(counter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"img = cv2.imread('./test_images/IMG_7605.jpg')\n",
|
||||
"img = mf.ResizeWithAspectRatio(img, 1000)\n",
|
||||
"# img = mf.ResizeWithAspectRatio(mf.SquarePad(fill=255)(img),1000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rotatedimg = mf.houghlinedeskew(img)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# out = mf.morphologyCrop(img)\n",
|
||||
"# out = cv2.cvtColor(out, cv2.COLOR_BGR2GRAY)\n",
|
||||
"# out = cv2.threshold(out, 200, 255, cv2.THRESH_BINARY)[1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cv2.imshow(\"result1\", rotatedimg)\n",
|
||||
"# cv2.imshow(\"result2\", result2)\n",
|
||||
"cv2.waitKey(0)\n",
|
||||
"cv2.destroyAllWindows()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user