receipt_indexer/code/autocropper/temp.ipynb
Ethan Wellenreiter abe1b2358d First testing steps towards dewarping.
Too hard. High level math. For later.

Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
2023-10-30 00:36:17 -04:00

1241 lines
43 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
"/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
]
}
],
"source": [
"import cv2\n",
"import numpy as np\n",
"\n",
"import myfunctions as mf\n",
"\n",
"\n",
"import scipy.stats as st\n",
"import math"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# read image as grayscale\n",
"img = cv2.imread('./test_images/IMG_7605.jpg')\n",
"# img = mf.ResizeWithAspectRatio(img,1000)\n",
"# img = mf.rotate(img, 54)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def reduceColours(x):\n",
" b=10\n",
" c=1.2\n",
" x = x.astype(int)\n",
" value = ((x-b)*c) + (b*(c-1))\n",
" value = np.clip(value, 0, 255)\n",
" return value.astype(np.uint8)\n",
"\n",
"def bwadjustment(image):\n",
" # # print(image)\n",
" # gray = image.astype(int)\n",
" # gray += 1\n",
" # # print(gray)\n",
" # gray = np.emath.logn(1.0218, gray)\n",
" # # print(gray)\n",
" # gray = np.clip(gray, 0, 255)\n",
" # gray = gray.astype(np.uint8)\n",
" gray = reduceColours(image)\n",
" \n",
" return gray\n",
"\n",
"\n",
"\n",
"\n",
"def testingfunction(image):\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" \n",
" # sigma = 0.5\n",
" # v = np.median(image)\n",
" # lower = int(max(0, (1.0 - sigma) * v))\n",
" # upper = int(min(255, (1.0 + sigma) * v))\n",
" \n",
" # upper = 500\n",
" \n",
" \n",
" # thresh = cv2.Canny(gray, lower, upper, None, 3)\n",
" \n",
" gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 20)\n",
" \n",
" \n",
" return gray\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#####NEED TO WORK ON SCORING THE LINES SO IT PICKS THE CORRECT ORIENTATION (horizontal vs vertical) AND SO THAT THE CROPPING RECTANGLE MOVES/GET TRANSFORMED WITH IT\n",
"\n",
"\n",
"## CAN MAYBE ALSO USE NORMAL HOUGHLINE STUFF TO GET MORE LINES OR GET AN EXTRA BIT OF WEIGHTING OR SOMETHING"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### NON-UTILITY FUNCTIONS"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### IGNORED UTILITY FUNCTIONS\n",
"def resize_to_screen(src, maxw=1280, maxh=700, copy=False):\n",
"\n",
" height, width = src.shape[:2]\n",
"\n",
" scl_x = float(width)/maxw\n",
" scl_y = float(height)/maxh\n",
"\n",
" scl = int(np.ceil(max(scl_x, scl_y)))\n",
"\n",
" if scl > 1.0:\n",
" inv_scl = 1.0/scl\n",
" img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA)\n",
" elif copy:\n",
" img = src.copy()\n",
" else:\n",
" img = src\n",
"\n",
" return img\n",
"\n",
"def main():\n",
"\n",
" if len(sys.argv) < 2:\n",
" print 'usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]'\n",
" sys.exit(0)\n",
"\n",
" if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file':\n",
" cv2.namedWindow(WINDOW_NAME)\n",
"\n",
" outfiles = []\n",
"\n",
" for imgfile in sys.argv[1:]:\n",
"\n",
" img = cv2.imread(imgfile)\n",
" small = resize_to_screen(img)\n",
" basename = os.path.basename(imgfile)\n",
" name, _ = os.path.splitext(basename)\n",
"\n",
" print 'loaded', basename, 'with size', imgsize(img),\n",
" print 'and resized to', imgsize(small)\n",
"\n",
" if DEBUG_LEVEL >= 3:\n",
" debug_show(name, 0.0, 'original', small)\n",
"\n",
" pagemask, page_outline = get_page_extents(small)\n",
"\n",
" cinfo_list = get_contours(name, small, pagemask, 'text')\n",
" spans = assemble_spans(name, small, pagemask, cinfo_list)\n",
"\n",
" if len(spans) < 3:\n",
" print ' detecting lines because only', len(spans), 'text spans'\n",
" cinfo_list = get_contours(name, small, pagemask, 'line')\n",
" spans2 = assemble_spans(name, small, pagemask, cinfo_list)\n",
" if len(spans2) > len(spans):\n",
" spans = spans2\n",
"\n",
" if len(spans) < 1:\n",
" print 'skipping', name, 'because only', len(spans), 'spans'\n",
" continue\n",
"\n",
" span_points = sample_spans(small.shape, spans)\n",
"\n",
" print ' got', len(spans), 'spans',\n",
" print 'with', sum([len(pts) for pts in span_points]), 'points.'\n",
"\n",
" corners, ycoords, xcoords = keypoints_from_samples(name, small,\n",
" pagemask,\n",
" page_outline,\n",
" span_points)\n",
"\n",
" rough_dims, span_counts, params = get_default_params(corners,\n",
" ycoords, xcoords)\n",
"\n",
" dstpoints = np.vstack((corners[0].reshape((1, 1, 2)),) +\n",
" tuple(span_points))\n",
"\n",
" params = optimize_params(name, small,\n",
" dstpoints,\n",
" span_counts, params)\n",
"\n",
" page_dims = get_page_dims(corners, rough_dims, params)\n",
"\n",
" outfile = remap_image(name, img, small, page_dims, params)\n",
"\n",
" outfiles.append(outfile)\n",
"\n",
" print ' wrote', outfile\n",
" print\n",
"\n",
" print 'to convert to PDF (requires ImageMagick):'\n",
" print ' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf'\n",
"\n",
"\n",
"if __name__ == '__main__':\n",
" main()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#!/usr/bin/env python\n",
"######################################################################\n",
"# page_dewarp.py - Proof-of-concept of page-dewarping based on a\n",
"# \"cubic sheet\" model. Requires OpenCV (version 3 or greater),\n",
"# PIL/Pillow, and scipy.optimize.\n",
"######################################################################\n",
"# Author: Matt Zucker\n",
"# Date: July 2016\n",
"# License: MIT License (see LICENSE.txt)\n",
"######################################################################\n",
"\n",
"import os\n",
"import sys\n",
"import datetime\n",
"import cv2\n",
"from PIL import Image\n",
"import numpy as np\n",
"import scipy.optimize\n",
"\n",
"# for some reason pylint complains about cv2 members being undefined :(\n",
"# pylint: disable=E1101\n",
"\n",
"OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image\n",
"OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance\n",
"REMAP_DECIMATE = 16 # downscaling factor for remapping image\n",
"\n",
"ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px\n",
"\n",
"TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour\n",
"TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour\n",
"TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio\n",
"TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour\n",
"\n",
"EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span\n",
"EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours\n",
"EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length)\n",
"EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours\n",
"\n",
"RVEC_IDX = slice(0, 3) # index of rvec in params vector\n",
"TVEC_IDX = slice(3, 6) # index of tvec in params vector\n",
"CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector\n",
"\n",
"SPAN_MIN_WIDTH = 30 # minimum reduced px width for span\n",
"SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans\n",
"FOCAL_LENGTH = 1.2 # normalized focal length of camera\n",
"\n",
"DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all\n",
"DEBUG_OUTPUT = 'file' # file, screen, both\n",
"\n",
"WINDOW_NAME = 'Dewarp' # Window name for visualization\n",
"\n",
"# nice color palette for visualizing contours, etc.\n",
"CCOLORS = [\n",
" (255, 0, 0),\n",
" (255, 63, 0),\n",
" (255, 127, 0),\n",
" (255, 191, 0),\n",
" (255, 255, 0),\n",
" (191, 255, 0),\n",
" (127, 255, 0),\n",
" (63, 255, 0),\n",
" (0, 255, 0),\n",
" (0, 255, 63),\n",
" (0, 255, 127),\n",
" (0, 255, 191),\n",
" (0, 255, 255),\n",
" (0, 191, 255),\n",
" (0, 127, 255),\n",
" (0, 63, 255),\n",
" (0, 0, 255),\n",
" (63, 0, 255),\n",
" (127, 0, 255),\n",
" (191, 0, 255),\n",
" (255, 0, 255),\n",
" (255, 0, 191),\n",
" (255, 0, 127),\n",
" (255, 0, 63),\n",
"]\n",
"\n",
"# default intrinsic parameter matrix\n",
"K = np.array([\n",
" [FOCAL_LENGTH, 0, 0],\n",
" [0, FOCAL_LENGTH, 0],\n",
" [0, 0, 1]], dtype=np.float32)\n",
"\n",
"\n",
"def debug_show(name, step, text, display):\n",
"\n",
" if DEBUG_OUTPUT != 'screen':\n",
" filetext = text.replace(' ', '_')\n",
" outfile = name + '_debug_' + str(step) + '_' + filetext + '.png'\n",
" cv2.imwrite(outfile, display)\n",
"\n",
" if DEBUG_OUTPUT != 'file':\n",
"\n",
" image = display.copy()\n",
" height = image.shape[0]\n",
"\n",
" cv2.putText(image, text, (16, height-16),\n",
" cv2.FONT_HERSHEY_SIMPLEX, 1.0,\n",
" (0, 0, 0), 3, cv2.LINE_AA)\n",
"\n",
" cv2.putText(image, text, (16, height-16),\n",
" cv2.FONT_HERSHEY_SIMPLEX, 1.0,\n",
" (255, 255, 255), 1, cv2.LINE_AA)\n",
"\n",
" cv2.imshow(WINDOW_NAME, image)\n",
"\n",
" while cv2.waitKey(5) < 0:\n",
" pass\n",
"\n",
"\n",
"def round_nearest_multiple(i, factor):\n",
" i = int(i)\n",
" rem = i % factor\n",
" if not rem:\n",
" return i\n",
" else:\n",
" return i + factor - rem\n",
"\n",
"\n",
"def pix2norm(shape, pts):\n",
" height, width = shape[:2]\n",
" scl = 2.0/(max(height, width))\n",
" offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5\n",
" return (pts - offset) * scl\n",
"\n",
"\n",
"def norm2pix(shape, pts, as_integer):\n",
" height, width = shape[:2]\n",
" scl = max(height, width)*0.5\n",
" offset = np.array([0.5*width, 0.5*height],\n",
" dtype=pts.dtype).reshape((-1, 1, 2))\n",
" rval = pts * scl + offset\n",
" if as_integer:\n",
" return (rval + 0.5).astype(int)\n",
" else:\n",
" return rval\n",
"\n",
"\n",
"def fltp(point):\n",
" return tuple(point.astype(int).flatten())\n",
"\n",
"\n",
"def draw_correspondences(img, dstpoints, projpts):\n",
"\n",
" display = img.copy()\n",
" dstpoints = norm2pix(img.shape, dstpoints, True)\n",
" projpts = norm2pix(img.shape, projpts, True)\n",
"\n",
" for pts, color in [(projpts, (255, 0, 0)),\n",
" (dstpoints, (0, 0, 255))]:\n",
"\n",
" for point in pts:\n",
" cv2.circle(display, fltp(point), 3, color, -1, cv2.LINE_AA)\n",
"\n",
" for point_a, point_b in zip(projpts, dstpoints):\n",
" cv2.line(display, fltp(point_a), fltp(point_b),\n",
" (255, 255, 255), 1, cv2.LINE_AA)\n",
"\n",
" return display\n",
"\n",
"\n",
"def get_default_params(corners, ycoords, xcoords):\n",
"\n",
" # page width and height\n",
" page_width = np.linalg.norm(corners[1] - corners[0])\n",
" page_height = np.linalg.norm(corners[-1] - corners[0])\n",
" rough_dims = (page_width, page_height)\n",
"\n",
" # our initial guess for the cubic has no slope\n",
" cubic_slopes = [0.0, 0.0]\n",
"\n",
" # object points of flat page in 3D coordinates\n",
" corners_object3d = np.array([\n",
" [0, 0, 0],\n",
" [page_width, 0, 0],\n",
" [page_width, page_height, 0],\n",
" [0, page_height, 0]])\n",
"\n",
" # estimate rotation and translation from four 2D-to-3D point\n",
" # correspondences\n",
" _, rvec, tvec = cv2.solvePnP(corners_object3d,\n",
" corners, K, np.zeros(5))\n",
"\n",
" span_counts = [len(xc) for xc in xcoords]\n",
"\n",
" params = np.hstack((np.array(rvec).flatten(),\n",
" np.array(tvec).flatten(),\n",
" np.array(cubic_slopes).flatten(),\n",
" ycoords.flatten()) +\n",
" tuple(xcoords))\n",
"\n",
" return rough_dims, span_counts, params\n",
"\n",
"\n",
"def project_xy(xy_coords, pvec):\n",
"\n",
" # get cubic polynomial coefficients given\n",
" #\n",
" # f(0) = 0, f'(0) = alpha\n",
" # f(1) = 0, f'(1) = beta\n",
"\n",
" alpha, beta = tuple(pvec[CUBIC_IDX])\n",
"\n",
" poly = np.array([\n",
" alpha + beta,\n",
" -2*alpha - beta,\n",
" alpha,\n",
" 0])\n",
"\n",
" xy_coords = xy_coords.reshape((-1, 2))\n",
" z_coords = np.polyval(poly, xy_coords[:, 0])\n",
"\n",
" objpoints = np.hstack((xy_coords, z_coords.reshape((-1, 1))))\n",
"\n",
" image_points, _ = cv2.projectPoints(objpoints,\n",
" pvec[RVEC_IDX],\n",
" pvec[TVEC_IDX],\n",
" K, np.zeros(5))\n",
"\n",
" return image_points\n",
"\n",
"\n",
"def project_keypoints(pvec, keypoint_index):\n",
"\n",
" xy_coords = pvec[keypoint_index]\n",
" xy_coords[0, :] = 0\n",
"\n",
" return project_xy(xy_coords, pvec)\n",
"\n",
"\n",
"def box(width, height):\n",
" return np.ones((height, width), dtype=np.uint8)\n",
"\n",
"\n",
"\n",
"def get_mask(name, small, pagemask, masktype):\n",
"\n",
" sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY)\n",
"\n",
" if masktype == 'text':\n",
"\n",
" mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,\n",
" cv2.THRESH_BINARY_INV,\n",
" ADAPTIVE_WINSZ,\n",
" 25)\n",
"\n",
" if DEBUG_LEVEL >= 3:\n",
" debug_show(name, 0.1, 'thresholded', mask)\n",
"\n",
" mask = cv2.dilate(mask, box(9, 1))\n",
"\n",
" if DEBUG_LEVEL >= 3:\n",
" debug_show(name, 0.2, 'dilated', mask)\n",
"\n",
" mask = cv2.erode(mask, box(1, 3))\n",
"\n",
" if DEBUG_LEVEL >= 3:\n",
" debug_show(name, 0.3, 'eroded', mask)\n",
"\n",
" else:\n",
"\n",
" mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,\n",
" cv2.THRESH_BINARY_INV,\n",
" ADAPTIVE_WINSZ,\n",
" 7)\n",
"\n",
" if DEBUG_LEVEL >= 3:\n",
" debug_show(name, 0.4, 'thresholded', mask)\n",
"\n",
" mask = cv2.erode(mask, box(3, 1), iterations=3)\n",
"\n",
" if DEBUG_LEVEL >= 3:\n",
" debug_show(name, 0.5, 'eroded', mask)\n",
"\n",
" mask = cv2.dilate(mask, box(8, 2))\n",
"\n",
" if DEBUG_LEVEL >= 3:\n",
" debug_show(name, 0.6, 'dilated', mask)\n",
"\n",
" return np.minimum(mask, pagemask)\n",
"\n",
"\n",
"def interval_measure_overlap(int_a, int_b):\n",
" return min(int_a[1], int_b[1]) - max(int_a[0], int_b[0])\n",
"\n",
"\n",
"def angle_dist(angle_b, angle_a):\n",
"\n",
" diff = angle_b - angle_a\n",
"\n",
" while diff > np.pi:\n",
" diff -= 2*np.pi\n",
"\n",
" while diff < -np.pi:\n",
" diff += 2*np.pi\n",
"\n",
" return np.abs(diff)\n",
"\n",
"\n",
"def blob_mean_and_tangent(contour):\n",
"\n",
" moments = cv2.moments(contour)\n",
"\n",
" area = moments['m00']\n",
"\n",
" mean_x = moments['m10'] / area\n",
" mean_y = moments['m01'] / area\n",
"\n",
" moments_matrix = np.array([\n",
" [moments['mu20'], moments['mu11']],\n",
" [moments['mu11'], moments['mu02']]\n",
" ]) / area\n",
"\n",
" _, svd_u, _ = cv2.SVDecomp(moments_matrix)\n",
"\n",
" center = np.array([mean_x, mean_y])\n",
" tangent = svd_u[:, 0].flatten().copy()\n",
"\n",
" return center, tangent\n",
"\n",
"\n",
"class ContourInfo(object):\n",
"\n",
" def __init__(self, contour, rect, mask):\n",
"\n",
" self.contour = contour\n",
" self.rect = rect\n",
" self.mask = mask\n",
"\n",
" self.center, self.tangent = blob_mean_and_tangent(contour)\n",
"\n",
" self.angle = np.arctan2(self.tangent[1], self.tangent[0])\n",
"\n",
" clx = [self.proj_x(point) for point in contour]\n",
"\n",
" lxmin = min(clx)\n",
" lxmax = max(clx)\n",
"\n",
" self.local_xrng = (lxmin, lxmax)\n",
"\n",
" self.point0 = self.center + self.tangent * lxmin\n",
" self.point1 = self.center + self.tangent * lxmax\n",
"\n",
" self.pred = None\n",
" self.succ = None\n",
"\n",
" def proj_x(self, point):\n",
" return np.dot(self.tangent, point.flatten()-self.center)\n",
"\n",
" def local_overlap(self, other):\n",
" xmin = self.proj_x(other.point0)\n",
" xmax = self.proj_x(other.point1)\n",
" return interval_measure_overlap(self.local_xrng, (xmin, xmax))\n",
"\n",
"\n",
"def generate_candidate_edge(cinfo_a, cinfo_b):\n",
"\n",
" # we want a left of b (so a's successor will be b and b's\n",
" # predecessor will be a) make sure right endpoint of b is to the\n",
" # right of left endpoint of a.\n",
" if cinfo_a.point0[0] > cinfo_b.point1[0]:\n",
" tmp = cinfo_a\n",
" cinfo_a = cinfo_b\n",
" cinfo_b = tmp\n",
"\n",
" x_overlap_a = cinfo_a.local_overlap(cinfo_b)\n",
" x_overlap_b = cinfo_b.local_overlap(cinfo_a)\n",
"\n",
" overall_tangent = cinfo_b.center - cinfo_a.center\n",
" overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0])\n",
"\n",
" delta_angle = max(angle_dist(cinfo_a.angle, overall_angle),\n",
" angle_dist(cinfo_b.angle, overall_angle)) * 180/np.pi\n",
"\n",
" # we want the largest overlap in x to be small\n",
" x_overlap = max(x_overlap_a, x_overlap_b)\n",
"\n",
" dist = np.linalg.norm(cinfo_b.point0 - cinfo_a.point1)\n",
"\n",
" if (dist > EDGE_MAX_LENGTH or\n",
" x_overlap > EDGE_MAX_OVERLAP or\n",
" delta_angle > EDGE_MAX_ANGLE):\n",
" return None\n",
" else:\n",
" score = dist + delta_angle*EDGE_ANGLE_COST\n",
" return (score, cinfo_a, cinfo_b)\n",
"\n",
"\n",
"def make_tight_mask(contour, xmin, ymin, width, height):\n",
"\n",
" tight_mask = np.zeros((height, width), dtype=np.uint8)\n",
" tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2))\n",
"\n",
" cv2.drawContours(tight_mask, [tight_contour], 0,\n",
" (1, 1, 1), -1)\n",
"\n",
" return tight_mask\n",
"\n",
"\n",
"def get_contours(name, small, pagemask, masktype):\n",
"\n",
" mask = get_mask(name, small, pagemask, masktype)\n",
"\n",
" _, contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,\n",
" cv2.CHAIN_APPROX_NONE)\n",
"\n",
" contours_out = []\n",
"\n",
" for contour in contours:\n",
"\n",
" rect = cv2.boundingRect(contour)\n",
" xmin, ymin, width, height = rect\n",
"\n",
" if (width < TEXT_MIN_WIDTH or\n",
" height < TEXT_MIN_HEIGHT or\n",
" width < TEXT_MIN_ASPECT*height):\n",
" continue\n",
"\n",
" tight_mask = make_tight_mask(contour, xmin, ymin, width, height)\n",
"\n",
" if tight_mask.sum(axis=0).max() > TEXT_MAX_THICKNESS:\n",
" continue\n",
"\n",
" contours_out.append(ContourInfo(contour, rect, tight_mask))\n",
"\n",
" if DEBUG_LEVEL >= 2:\n",
" visualize_contours(name, small, contours_out)\n",
"\n",
" return contours_out\n",
"\n",
"\n",
"def assemble_spans(name, small, pagemask, cinfo_list):\n",
"\n",
" # sort list\n",
" cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1])\n",
"\n",
" # generate all candidate edges\n",
" candidate_edges = []\n",
"\n",
" for i, cinfo_i in enumerate(cinfo_list):\n",
" for j in range(i):\n",
" # note e is of the form (score, left_cinfo, right_cinfo)\n",
" edge = generate_candidate_edge(cinfo_i, cinfo_list[j])\n",
" if edge is not None:\n",
" candidate_edges.append(edge)\n",
"\n",
" # sort candidate edges by score (lower is better)\n",
" candidate_edges.sort()\n",
"\n",
" # for each candidate edge\n",
" for _, cinfo_a, cinfo_b in candidate_edges:\n",
" # if left and right are unassigned, join them\n",
" if cinfo_a.succ is None and cinfo_b.pred is None:\n",
" cinfo_a.succ = cinfo_b\n",
" cinfo_b.pred = cinfo_a\n",
"\n",
" # generate list of spans as output\n",
" spans = []\n",
"\n",
" # until we have removed everything from the list\n",
" while cinfo_list:\n",
"\n",
" # get the first on the list\n",
" cinfo = cinfo_list[0]\n",
"\n",
" # keep following predecessors until none exists\n",
" while cinfo.pred:\n",
" cinfo = cinfo.pred\n",
"\n",
" # start a new span\n",
" cur_span = []\n",
"\n",
" width = 0.0\n",
"\n",
" # follow successors til end of span\n",
" while cinfo:\n",
" # remove from list (sadly making this loop *also* O(n^2)\n",
" cinfo_list.remove(cinfo)\n",
" # add to span\n",
" cur_span.append(cinfo)\n",
" width += cinfo.local_xrng[1] - cinfo.local_xrng[0]\n",
" # set successor\n",
" cinfo = cinfo.succ\n",
"\n",
" # add if long enough\n",
" if width > SPAN_MIN_WIDTH:\n",
" spans.append(cur_span)\n",
"\n",
" if DEBUG_LEVEL >= 2:\n",
" visualize_spans(name, small, pagemask, spans)\n",
"\n",
" return spans\n",
"\n",
"\n",
"def sample_spans(shape, spans):\n",
"\n",
" span_points = []\n",
"\n",
" for span in spans:\n",
"\n",
" contour_points = []\n",
"\n",
" for cinfo in span:\n",
"\n",
" yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1))\n",
" totals = (yvals * cinfo.mask).sum(axis=0)\n",
" means = totals / cinfo.mask.sum(axis=0)\n",
"\n",
" xmin, ymin = cinfo.rect[:2]\n",
"\n",
" step = SPAN_PX_PER_STEP\n",
" start = ((len(means)-1) % step) / 2\n",
"\n",
" contour_points += [(x+xmin, means[x]+ymin)\n",
" for x in range(start, len(means), step)]\n",
"\n",
" contour_points = np.array(contour_points,\n",
" dtype=np.float32).reshape((-1, 1, 2))\n",
"\n",
" contour_points = pix2norm(shape, contour_points)\n",
"\n",
" span_points.append(contour_points)\n",
"\n",
" return span_points\n",
"\n",
"\n",
"def keypoints_from_samples(name, small, pagemask, page_outline,\n",
" span_points):\n",
"\n",
" all_evecs = np.array([[0.0, 0.0]])\n",
" all_weights = 0\n",
"\n",
" for points in span_points:\n",
"\n",
" _, evec = cv2.PCACompute(points.reshape((-1, 2)),\n",
" None, maxComponents=1)\n",
"\n",
" weight = np.linalg.norm(points[-1] - points[0])\n",
"\n",
" all_evecs += evec * weight\n",
" all_weights += weight\n",
"\n",
" evec = all_evecs / all_weights\n",
"\n",
" x_dir = evec.flatten()\n",
"\n",
" if x_dir[0] < 0:\n",
" x_dir = -x_dir\n",
"\n",
" y_dir = np.array([-x_dir[1], x_dir[0]])\n",
"\n",
" pagecoords = cv2.convexHull(page_outline)\n",
" pagecoords = pix2norm(pagemask.shape, pagecoords.reshape((-1, 1, 2)))\n",
" pagecoords = pagecoords.reshape((-1, 2))\n",
"\n",
" px_coords = np.dot(pagecoords, x_dir)\n",
" py_coords = np.dot(pagecoords, y_dir)\n",
"\n",
" px0 = px_coords.min()\n",
" px1 = px_coords.max()\n",
"\n",
" py0 = py_coords.min()\n",
" py1 = py_coords.max()\n",
"\n",
" p00 = px0 * x_dir + py0 * y_dir\n",
" p10 = px1 * x_dir + py0 * y_dir\n",
" p11 = px1 * x_dir + py1 * y_dir\n",
" p01 = px0 * x_dir + py1 * y_dir\n",
"\n",
" corners = np.vstack((p00, p10, p11, p01)).reshape((-1, 1, 2))\n",
"\n",
" ycoords = []\n",
" xcoords = []\n",
"\n",
" for points in span_points:\n",
" pts = points.reshape((-1, 2))\n",
" px_coords = np.dot(pts, x_dir)\n",
" py_coords = np.dot(pts, y_dir)\n",
" ycoords.append(py_coords.mean() - py0)\n",
" xcoords.append(px_coords - px0)\n",
"\n",
" if DEBUG_LEVEL >= 2:\n",
" visualize_span_points(name, small, span_points, corners)\n",
"\n",
" return corners, np.array(ycoords), xcoords\n",
"\n",
"\n",
"def visualize_contours(name, small, cinfo_list):\n",
"\n",
" regions = np.zeros_like(small)\n",
"\n",
" for j, cinfo in enumerate(cinfo_list):\n",
"\n",
" cv2.drawContours(regions, [cinfo.contour], 0,\n",
" CCOLORS[j % len(CCOLORS)], -1)\n",
"\n",
" mask = (regions.max(axis=2) != 0)\n",
"\n",
" display = small.copy()\n",
" display[mask] = (display[mask]/2) + (regions[mask]/2)\n",
"\n",
" for j, cinfo in enumerate(cinfo_list):\n",
" color = CCOLORS[j % len(CCOLORS)]\n",
" color = tuple([c/4 for c in color])\n",
"\n",
" cv2.circle(display, fltp(cinfo.center), 3,\n",
" (255, 255, 255), 1, cv2.LINE_AA)\n",
"\n",
" cv2.line(display, fltp(cinfo.point0), fltp(cinfo.point1),\n",
" (255, 255, 255), 1, cv2.LINE_AA)\n",
"\n",
" debug_show(name, 1, 'contours', display)\n",
"\n",
"\n",
"def visualize_spans(name, small, pagemask, spans):\n",
"\n",
" regions = np.zeros_like(small)\n",
"\n",
" for i, span in enumerate(spans):\n",
" contours = [cinfo.contour for cinfo in span]\n",
" cv2.drawContours(regions, contours, -1,\n",
" CCOLORS[i*3 % len(CCOLORS)], -1)\n",
"\n",
" mask = (regions.max(axis=2) != 0)\n",
"\n",
" display = small.copy()\n",
" display[mask] = (display[mask]/2) + (regions[mask]/2)\n",
" display[pagemask == 0] /= 4\n",
"\n",
" debug_show(name, 2, 'spans', display)\n",
"\n",
"\n",
"def visualize_span_points(name, small, span_points, corners):\n",
"\n",
" display = small.copy()\n",
"\n",
" for i, points in enumerate(span_points):\n",
"\n",
" points = norm2pix(small.shape, points, False)\n",
"\n",
" mean, small_evec = cv2.PCACompute(points.reshape((-1, 2)),\n",
" None,\n",
" maxComponents=1)\n",
"\n",
" dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1)))\n",
" dpm = np.dot(mean.flatten(), small_evec.flatten())\n",
"\n",
" point0 = mean + small_evec * (dps.min()-dpm)\n",
" point1 = mean + small_evec * (dps.max()-dpm)\n",
"\n",
" for point in points:\n",
" cv2.circle(display, fltp(point), 3,\n",
" CCOLORS[i % len(CCOLORS)], -1, cv2.LINE_AA)\n",
"\n",
" cv2.line(display, fltp(point0), fltp(point1),\n",
" (255, 255, 255), 1, cv2.LINE_AA)\n",
"\n",
" cv2.polylines(display, [norm2pix(small.shape, corners, True)],\n",
" True, (255, 255, 255))\n",
"\n",
" debug_show(name, 3, 'span points', display)\n",
"\n",
"\n",
"def imgsize(img):\n",
" height, width = img.shape[:2]\n",
" return '{}x{}'.format(width, height)\n",
"\n",
"\n",
"def make_keypoint_index(span_counts):\n",
"\n",
" nspans = len(span_counts)\n",
" npts = sum(span_counts)\n",
" keypoint_index = np.zeros((npts+1, 2), dtype=int)\n",
" start = 1\n",
"\n",
" for i, count in enumerate(span_counts):\n",
" end = start + count\n",
" keypoint_index[start:start+end, 1] = 8+i\n",
" start = end\n",
"\n",
" keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans\n",
"\n",
" return keypoint_index\n",
"\n",
"\n",
"def optimize_params(name, small, dstpoints, span_counts, params):\n",
"\n",
" keypoint_index = make_keypoint_index(span_counts)\n",
"\n",
" def objective(pvec):\n",
" ppts = project_keypoints(pvec, keypoint_index)\n",
" return np.sum((dstpoints - ppts)**2)\n",
"\n",
" print ' initial objective is', objective(params)\n",
"\n",
" if DEBUG_LEVEL >= 1:\n",
" projpts = project_keypoints(params, keypoint_index)\n",
" display = draw_correspondences(small, dstpoints, projpts)\n",
" debug_show(name, 4, 'keypoints before', display)\n",
"\n",
" print ' optimizing', len(params), 'parameters...'\n",
" start = datetime.datetime.now()\n",
" res = scipy.optimize.minimize(objective, params,\n",
" method='Powell')\n",
" end = datetime.datetime.now()\n",
" print ' optimization took', round((end-start).total_seconds(), 2), 'sec.'\n",
" print ' final objective is', res.fun\n",
" params = res.x\n",
"\n",
" if DEBUG_LEVEL >= 1:\n",
" projpts = project_keypoints(params, keypoint_index)\n",
" display = draw_correspondences(small, dstpoints, projpts)\n",
" debug_show(name, 5, 'keypoints after', display)\n",
"\n",
" return params\n",
"\n",
"\n",
"def get_page_dims(corners, rough_dims, params):\n",
"\n",
" dst_br = corners[2].flatten()\n",
"\n",
" dims = np.array(rough_dims)\n",
"\n",
" def objective(dims):\n",
" proj_br = project_xy(dims, params)\n",
" return np.sum((dst_br - proj_br.flatten())**2)\n",
"\n",
" res = scipy.optimize.minimize(objective, dims, method='Powell')\n",
" dims = res.x\n",
"\n",
" print ' got page dims', dims[0], 'x', dims[1]\n",
"\n",
" return dims\n",
"\n",
"\n",
"def remap_image(name, img, small, page_dims, params):\n",
"\n",
" height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0]\n",
" height = round_nearest_multiple(height, REMAP_DECIMATE)\n",
"\n",
" width = round_nearest_multiple(height * page_dims[0] / page_dims[1],\n",
" REMAP_DECIMATE)\n",
"\n",
" print ' output will be {}x{}'.format(width, height)\n",
"\n",
" height_small = height / REMAP_DECIMATE\n",
" width_small = width / REMAP_DECIMATE\n",
"\n",
" page_x_range = np.linspace(0, page_dims[0], width_small)\n",
" page_y_range = np.linspace(0, page_dims[1], height_small)\n",
"\n",
" page_x_coords, page_y_coords = np.meshgrid(page_x_range, page_y_range)\n",
"\n",
" page_xy_coords = np.hstack((page_x_coords.flatten().reshape((-1, 1)),\n",
" page_y_coords.flatten().reshape((-1, 1))))\n",
"\n",
" page_xy_coords = page_xy_coords.astype(np.float32)\n",
"\n",
" image_points = project_xy(page_xy_coords, params)\n",
" image_points = norm2pix(img.shape, image_points, False)\n",
"\n",
" image_x_coords = image_points[:, 0, 0].reshape(page_x_coords.shape)\n",
" image_y_coords = image_points[:, 0, 1].reshape(page_y_coords.shape)\n",
"\n",
" image_x_coords = cv2.resize(image_x_coords, (width, height),\n",
" interpolation=cv2.INTER_CUBIC)\n",
"\n",
" image_y_coords = cv2.resize(image_y_coords, (width, height),\n",
" interpolation=cv2.INTER_CUBIC)\n",
"\n",
" img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)\n",
"\n",
" remapped = cv2.remap(img_gray, image_x_coords, image_y_coords,\n",
" cv2.INTER_CUBIC,\n",
" None, cv2.BORDER_REPLICATE)\n",
"\n",
" thresh = cv2.adaptiveThreshold(remapped, 255, cv2.ADAPTIVE_THRESH_MEAN_C,\n",
" cv2.THRESH_BINARY, ADAPTIVE_WINSZ, 25)\n",
"\n",
" pil_image = Image.fromarray(thresh)\n",
" pil_image = pil_image.convert('1')\n",
"\n",
" threshfile = name + '_thresh.png'\n",
" pil_image.save(threshfile, dpi=(OUTPUT_DPI, OUTPUT_DPI))\n",
"\n",
" if DEBUG_LEVEL >= 1:\n",
" height = small.shape[0]\n",
" width = int(round(height * float(thresh.shape[1])/thresh.shape[0]))\n",
" display = cv2.resize(thresh, (width, height),\n",
" interpolation=cv2.INTER_AREA)\n",
" debug_show(name, 6, 'output', display)\n",
"\n",
" return threshfile\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"PAGE_MARGIN_X = 0 # reduced px to ignore near L/R edge\n",
"PAGE_MARGIN_Y = 0 # reduced px to ignore near T/B edge\n",
"\n",
"\n",
"def get_page_extents(small):\n",
"\n",
" height, width = small.shape[:2]\n",
"\n",
" xmin = PAGE_MARGIN_X\n",
" ymin = PAGE_MARGIN_Y\n",
" xmax = width-PAGE_MARGIN_X\n",
" ymax = height-PAGE_MARGIN_Y\n",
"\n",
" page = np.zeros((height, width), dtype=np.uint8)\n",
" cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1)\n",
"\n",
" outline = np.array([\n",
" [xmin, ymin],\n",
" [xmin, ymax],\n",
" [xmax, ymax],\n",
" [xmax, ymin]])\n",
"\n",
" return page, outline\n",
"\n",
"\n",
"def cubicsheetdewarp(image):\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def houghlineprocessing(image):\n",
" croppedanddeskewed, _ = mf.houghlinedeskewandcrop(image)\n",
" ##IF IT DOESN'T CHANGE THE IMAGE (CHANGE THE _ TO SOMETHING USEFUL), THEN CROPCLARIFYING SHOULD JUST DO THE TEXT ISOLATION SECTION AND NOT TRY AND WHITE OUT ANY BACKGROUND. \n",
" ## IF THERE'S NO CROPPING, MAYBE EVEN JUMP RIGHT TO USING THE EXTERNAL DESKEW FIRST BEFORE TOSSING IT INTO CROPCLARIFYING\n",
" \n",
" postprocessed = mf.cropclarifying(croppedanddeskewed)\n",
" # return postprocessed\n",
" \n",
" # dewarp here\n",
" postprocessed = cubicsheetdewarp(postprocessed)\n",
" \n",
" return postprocessed\n",
" \n",
" postprocessed = mf.croptoblack(postprocessed)\n",
" \n",
" postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
" \n",
" final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
" \n",
" # cv2.imshow(\"postprocessed\", mf.ResizeWithAspectRatio(postprocessed, 1000))\n",
" # cv2.imshow(\"final\", mf.ResizeWithAspectRatio(final, 1000))\n",
" # cv2.waitKey(0)\n",
" # cv2.destroyAllWindows()\n",
" \n",
" return final"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"out = houghlineprocessing(img)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# cropped, rotangle = houghlinedeskewandcrop(img)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# _, angle = mf.houghlinedeskew(img, withangle=True)\n",
"# print(angle)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# if (abs(rotangle - angle) - 90 <= 5):\n",
"# print(\"hi\")q"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"cv2.imshow(\"result2\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# prepped = mf.squareandthenresize(cropped, fill=255, width=1000)\n",
"# prepped, _ = mf.premorphCrop(prepped)\n",
"# prepped = mf.squareandthenresize(prepped, fill=255, width=1000)\n",
"# gray1 = cv2.cvtColor(prepped, cv2.COLOR_BGR2GRAY)\n",
"# dst1 = cv2.Canny(gray1, 0, 500, None, 3)\n",
"\n",
"# # cdstP = prepped.copy()\n",
"# # linesP = cv2.HoughLinesP(dst1, 1, np.pi / 180, 30, None, 90, 30)\n",
"# # if linesP is not None:\n",
"# # for i in range(0, len(linesP)):\n",
"# # l = linesP[i][0]\n",
"# # # anglesP[i] = mf.lineAngle(l)\n",
"# # cv2.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv2.LINE_AA)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"result2\", dst1)\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"### tasks. use contours to get the biggest contour and get a mask from it and then white out the external area. and then use thresholding or whatever to make the paper white. can try and get the mean colour of the paper area and then use that to autothreshold or something."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)\n",
"# thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]\n",
"# contours, heirarchy =cv2.findContours(thresh,cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n",
"# mx = (0,0,0,0)\n",
"# mx_area = 0\n",
"# for cont in contours:\n",
"# rect = cv2.boundingRect(cont)\n",
"# area = mf.rectArea(rect)\n",
"# if (area > mx_area):\n",
"# mx = rect\n",
"# mx_area = area\n",
"\n",
"# cropped = cv2.rectangle(cropped, (mx[0], mx[1]), (mx[0]+mx[2], mx[1]+mx[3]), (0,255,0), 3)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# # # view result\n",
"# # # cv2.imshow(\"threshold\", thresh)\n",
"# # # cv2.imshow(\"morph\", morph)\n",
"# # # cv2.imshow(\"mask\", mask)\n",
"# # cv2.imshow(\"result1\", mf.ResizeWithAspectRatio(cdstP,height=1000))\n",
"# cv2.imshow(\"result2\", cropped)\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}