V1 of line extractor #8

Merged
ewellenr merged 2 commits from textextractor-test into textextractor 2023-10-24 15:08:24 -04:00
14 changed files with 764 additions and 119 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 412 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 426 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 550 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 168 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 139 KiB

View File

@ -0,0 +1,292 @@
import cv2
import numpy as np
import sys
sys.path.insert(0, '../../autocropper')
import myfunctions as mf
## helper functions
def rectcenterpt(rect, xywhrect=True, retint=False):
if (xywhrect):
x = rect[0] + rect[2]/2
y = rect[1] + rect[3]/2
else:
x = (rect[0]+rect[2])/2
y = (rect[1]+rect[3])/2
if (retint):
x = int(x)
y = int(y)
return (x,y)
def containsamount(outerrect, innerrect, percentage=1):
tinyrect = mf.overlapRect([outerrect, innerrect])
tinyarea = tinyrect[2]*tinyrect[3]
innerrectarea = innerrect[2]*innerrect[3]
if (tinyarea/innerrectarea >= percentage):
return True
return False
def aboveandbelow(outerrect, innerrect):
if (outerrect[1] < innerrect[1] and outerrect[1]+outerrect[3] > innerrect[1]+innerrect[3]):
return True
return False
## Below code is an almost direct copy from https://github.com/scrunts23/CS-Data-Science-Build-Week-1/blob/master/model/dbscan.py
def dbscan(D, eps, MinPts):
'''
Cluster the dataset `D` using the DBSCAN algorithm.
dbscan takes a dataset `D` (a list of vectors), a threshold distance
`eps`, and a required number of points `MinPts`.
It will return a list of cluster labels. The label -1 means noise, and then
the clusters are numbered starting from 1.
'''
# This list will hold the final cluster assignment for each point in D.
# There are two reserved values:
# -1 - Indicates a noise point
# 0 - Means the point hasn't been considered yet.
# Initially all labels are 0.
labels = [0]*len(D)
# C is the ID of the current cluster.
C = 0
# This outer loop is just responsible for picking new seed points--a point
# from which to grow a new cluster.
# Once a valid seed point is found, a new cluster is created, and the
# cluster growth is all handled by the 'expandCluster' routine.
# For each point P in the Dataset D...
# ('P' is the index of the datapoint, rather than the datapoint itself.)
for P in range(0, len(D)):
# Only points that have not already been claimed can be picked as new
# seed points.
# If the point's label is not 0, continue to the next point.
if not (labels[P] == 0):
continue
# Find all of P's neighboring points.
NeighborPts = region_query(D, P, eps)
# If the number is below MinPts, this point is noise.
# This is the only condition under which a point is labeled
# NOISE--when it's not a valid seed point. A NOISE point may later
# be picked up by another cluster as a boundary point (this is the only
# condition under which a cluster label can change--from NOISE to
# something else).
if len(NeighborPts) < MinPts:
labels[P] = -1
# Otherwise, if there are at least MinPts nearby, use this point as the
# seed for a new cluster.
else:
C += 1
grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts)
# All data has been clustered!
return labels
def grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts):
'''
Grow a new cluster with label `C` from the seed point `P`.
This function searches through the dataset to find all points that belong
to this new cluster. When this function returns, cluster `C` is complete.
Parameters:
`D` - The dataset (a list of vectors)
`labels` - List storing the cluster labels for all dataset points
`P` - Index of the seed point for this new cluster
`NeighborPts` - All of the neighbors of `P`
`C` - The label for this new cluster.
`eps` - Threshold distance
`MinPts` - Minimum required number of neighbors
'''
# Assign the cluster label to the seed point.
labels[P] = C
# Look at each neighbor of P (neighbors are referred to as Pn).
# NeighborPts will be used as a FIFO queue of points to search--that is, it
# will grow as we discover new branch points for the cluster. The FIFO
# behavior is accomplished by using a while-loop rather than a for-loop.
# In NeighborPts, the points are represented by their index in the original
# dataset.
i = 0
while i < len(NeighborPts):
# Get the next point from the queue.
Pn = NeighborPts[i]
# If Pn was labelled NOISE during the seed search, then we
# know it's not a branch point (it doesn't have enough neighbors), so
# make it a leaf point of cluster C and move on.
if labels[Pn] == -1:
labels[Pn] = C
# Otherwise, if Pn isn't already claimed, claim it as part of C.
elif labels[Pn] == 0:
# Add Pn to cluster C (Assign cluster label C).
labels[Pn] = C
# Find all the neighbors of Pn
PnNeighborPts = region_query(D, Pn, eps)
# If Pn has at least MinPts neighbors, it's a branch point!
# Add all of its neighbors to the FIFO queue to be searched.
if len(PnNeighborPts) >= MinPts:
NeighborPts = NeighborPts + PnNeighborPts
# If Pn *doesn't* have enough neighbors, then it's a leaf point.
# Don't queue up it's neighbors as expansion points.
#else:
# Do nothing
#NeighborPts = NeighborPts
# Advance to the next point in the FIFO queue.
i += 1
# We've finished growing cluster C!
def region_query(D, P, eps):
'''
Find all points in dataset `D` within distance `eps` of point `P`.
This function calculates the distance between a point P and every other
point in the dataset, and then returns only those points which are within a
threshold distance `eps`.
'''
neighbors = []
# For each point in the dataset...
for Pn in range(0, len(D)):
# If the distance is below the threshold, add it to the neighbors list.
if (rectcenterpt(D[P])[1] - rectcenterpt(D[Pn])[1]) < eps:
neighbors.append(Pn)
return neighbors
def linerectretriever(image):
shape = image.shape
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)
reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
canny = cv2.Canny(reducedimage, 0, 500, None, 3)
contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
boundingboxes = np.empty((len(contours), 4), dtype=int)
for i, contour in enumerate(contours):
boundingboxes[i] = cv2.boundingRect(contour)
epsilonvalue = np.median(boundingboxes, axis=0)[3]/3
labels = dbscan(boundingboxes, epsilonvalue, 1)
# print(labels)
numclusters = max(labels)
lineboxes = [[] for _ in range(numclusters)]
for i, item in enumerate(labels):
lineboxes[item-1].append(boundingboxes[i].tolist())
mergedboxes = np.empty((numclusters,4), dtype=int)
for i in range(numclusters):
b = mf.mergerects(lineboxes[i])
mergedboxes[i] = b
j = 0
while (j < len(mergedboxes)):
i = 0
while (i < len(mergedboxes)):
if (i == j):
i += 1
continue
outerbox = mergedboxes[j]
innerbox = mergedboxes[i]
if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:
mergedboxes = np.delete(mergedboxes, i, axis=0)
lineboxes.pop(i)
if (i < j):
j -= 1
i -= 1
i += 1
j += 1
return mergedboxes, lineboxes
def lineimagemaker(thresholded):
lineimages = []
mergedboxes, originalboxes = linerectretriever(thresholded)
mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)
mergedboxes = mergedboxes[mergedboxesordering]
originalboxes = [originalboxes[i] for i in mergedboxesordering]
for i, box in enumerate(mergedboxes):
mask = np.zeros(thresholded.shape, dtype=np.uint8)
whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)
for lb in originalboxes[i]:
mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)
invertedmask = cv2.bitwise_not(mask)
whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)
lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)
lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]
# lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)
# lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]
lineimages.append(lineimage)
# lineimages.append(mask)
return lineimages
### actual function
def lineisolator(image):
imgcopy = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
lineimages = lineimagemaker(thresholded)
# for i, lineimage in enumerate(lineimages):
# lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)
finallineimages = []
for i, lineimage in enumerate(lineimages):
templineimages = lineimagemaker(lineimage)
finallineimages += templineimages
# mergedboxes, originalboxes = linerectretriever(thresholded)
# mask = np.zeros(thresholded.shape, dtype=np.uint8)
# for i, box in enumerate(mergedboxes):
# for lb in originalboxes[i]:
# mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)
# return mask
# out = tempfunc(thresholded)
# return out
return finallineimages

View File

@ -2,15 +2,13 @@
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"\n",
"import myfunctions as mf\n",
"\n",
"\n",
"import scipy.stats as st\n",
"import math\n",
@ -20,36 +18,495 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 98,
"metadata": {},
"outputs": [],
"source": [
"img = cv2.imread('./test_images/IMG_7594.jpg')"
"import sys\n",
"sys.path.insert(0, '../../autocropper')\n",
"import myfunctions as mf\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 99,
"metadata": {},
"outputs": [],
"source": [
"out = mf.houghlineprocessing(img)"
"def rectcenterpt(rect, xywhrect=True, retint=False):\n",
" if (xywhrect):\n",
" x = rect[0] + rect[2]/2\n",
" y = rect[1] + rect[3]/2\n",
" else:\n",
" x = (rect[0]+rect[2])/2\n",
" y = (rect[1]+rect[3])/2\n",
" if (retint):\n",
" x = int(x)\n",
" y = int(y)\n",
" return (x,y)\n",
"\n",
"def containsamount(outerrect, innerrect, percentage=1):\n",
" tinyrect = mf.overlapRect([outerrect, innerrect])\n",
" tinyarea = tinyrect[2]*tinyrect[3]\n",
" innerrectarea = innerrect[2]*innerrect[3]\n",
" if (tinyarea/innerrectarea >= percentage):\n",
" return True\n",
" return False\n",
"\n",
"def aboveandbelow(outerrect, innerrect):\n",
" if (outerrect[1] < innerrect[1] and outerrect[1]+outerrect[3] > innerrect[1]+innerrect[3]):\n",
" return True\n",
" return False\n",
"\n",
"## Below code is an almost direct copy from https://github.com/scrunts23/CS-Data-Science-Build-Week-1/blob/master/model/dbscan.py\n",
"\n",
"def dbscan(D, eps, MinPts):\n",
" '''\n",
" Cluster the dataset `D` using the DBSCAN algorithm.\n",
" \n",
" dbscan takes a dataset `D` (a list of vectors), a threshold distance\n",
" `eps`, and a required number of points `MinPts`.\n",
" \n",
" It will return a list of cluster labels. The label -1 means noise, and then\n",
" the clusters are numbered starting from 1.\n",
" '''\n",
" \n",
" # This list will hold the final cluster assignment for each point in D.\n",
" # There are two reserved values:\n",
" # -1 - Indicates a noise point\n",
" # 0 - Means the point hasn't been considered yet.\n",
" # Initially all labels are 0. \n",
" labels = [0]*len(D)\n",
"\n",
" # C is the ID of the current cluster. \n",
" C = 0\n",
" \n",
" # This outer loop is just responsible for picking new seed points--a point\n",
" # from which to grow a new cluster.\n",
" # Once a valid seed point is found, a new cluster is created, and the \n",
" # cluster growth is all handled by the 'expandCluster' routine.\n",
" \n",
" # For each point P in the Dataset D...\n",
" # ('P' is the index of the datapoint, rather than the datapoint itself.)\n",
" for P in range(0, len(D)):\n",
" \n",
" # Only points that have not already been claimed can be picked as new \n",
" # seed points. \n",
" # If the point's label is not 0, continue to the next point.\n",
" if not (labels[P] == 0):\n",
" continue\n",
" \n",
" # Find all of P's neighboring points.\n",
" NeighborPts = region_query(D, P, eps)\n",
" \n",
" # If the number is below MinPts, this point is noise. \n",
" # This is the only condition under which a point is labeled \n",
" # NOISE--when it's not a valid seed point. A NOISE point may later \n",
" # be picked up by another cluster as a boundary point (this is the only\n",
" # condition under which a cluster label can change--from NOISE to \n",
" # something else).\n",
" if len(NeighborPts) < MinPts:\n",
" labels[P] = -1\n",
" # Otherwise, if there are at least MinPts nearby, use this point as the \n",
" # seed for a new cluster. \n",
" else: \n",
" C += 1\n",
" grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts)\n",
" \n",
" # All data has been clustered!\n",
" return labels\n",
"\n",
"\n",
"def grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts):\n",
" '''\n",
" Grow a new cluster with label `C` from the seed point `P`.\n",
" \n",
" This function searches through the dataset to find all points that belong\n",
" to this new cluster. When this function returns, cluster `C` is complete.\n",
" \n",
" Parameters:\n",
" `D` - The dataset (a list of vectors)\n",
" `labels` - List storing the cluster labels for all dataset points\n",
" `P` - Index of the seed point for this new cluster\n",
" `NeighborPts` - All of the neighbors of `P`\n",
" `C` - The label for this new cluster. \n",
" `eps` - Threshold distance\n",
" `MinPts` - Minimum required number of neighbors\n",
" '''\n",
"\n",
" # Assign the cluster label to the seed point.\n",
" labels[P] = C\n",
" \n",
" # Look at each neighbor of P (neighbors are referred to as Pn). \n",
" # NeighborPts will be used as a FIFO queue of points to search--that is, it\n",
" # will grow as we discover new branch points for the cluster. The FIFO\n",
" # behavior is accomplished by using a while-loop rather than a for-loop.\n",
" # In NeighborPts, the points are represented by their index in the original\n",
" # dataset.\n",
" i = 0\n",
" while i < len(NeighborPts): \n",
" \n",
" # Get the next point from the queue. \n",
" Pn = NeighborPts[i]\n",
" \n",
" # If Pn was labelled NOISE during the seed search, then we\n",
" # know it's not a branch point (it doesn't have enough neighbors), so\n",
" # make it a leaf point of cluster C and move on.\n",
" if labels[Pn] == -1:\n",
" labels[Pn] = C\n",
" \n",
" # Otherwise, if Pn isn't already claimed, claim it as part of C.\n",
" elif labels[Pn] == 0:\n",
" # Add Pn to cluster C (Assign cluster label C).\n",
" labels[Pn] = C\n",
" \n",
" # Find all the neighbors of Pn\n",
" PnNeighborPts = region_query(D, Pn, eps)\n",
" \n",
" # If Pn has at least MinPts neighbors, it's a branch point!\n",
" # Add all of its neighbors to the FIFO queue to be searched. \n",
" if len(PnNeighborPts) >= MinPts:\n",
" NeighborPts = NeighborPts + PnNeighborPts\n",
" # If Pn *doesn't* have enough neighbors, then it's a leaf point.\n",
" # Don't queue up it's neighbors as expansion points.\n",
" #else:\n",
" # Do nothing \n",
" #NeighborPts = NeighborPts \n",
" \n",
" # Advance to the next point in the FIFO queue.\n",
" i += 1 \n",
" \n",
" # We've finished growing cluster C!\n",
"\n",
"\n",
"def region_query(D, P, eps):\n",
" '''\n",
" Find all points in dataset `D` within distance `eps` of point `P`.\n",
" \n",
" This function calculates the distance between a point P and every other \n",
" point in the dataset, and then returns only those points which are within a\n",
" threshold distance `eps`.\n",
" '''\n",
" neighbors = []\n",
" \n",
" # For each point in the dataset...\n",
" for Pn in range(0, len(D)):\n",
" \n",
" # If the distance is below the threshold, add it to the neighbors list.\n",
" if (rectcenterpt(D[P])[1] - rectcenterpt(D[Pn])[1]) < eps:\n",
" neighbors.append(Pn)\n",
" \n",
" return neighbors"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"cv2.imshow(\"result2\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"def tempfunc(image):\n",
" shape = image.shape\n",
" \n",
"\n",
" # blackout = np.zeros(tempout.shape, dtype=np.uint8)\n",
" \n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)\n",
" reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
" \n",
" tempout = cv2.cvtColor(reducedimage, cv2.COLOR_GRAY2BGR)\n",
" \n",
" \n",
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
" \n",
" \n",
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
" \n",
" for i, contour in enumerate(contours):\n",
" boundingboxes[i] = cv2.boundingRect(contour)\n",
" \n",
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/2\n",
" \n",
" labels = dbscan(boundingboxes, epsilonvalue, 1)\n",
" print(labels)\n",
" numclusters = max(labels)\n",
" lineboxes = [[] for _ in range(numclusters)]\n",
"\n",
" for i, item in enumerate(labels):\n",
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
" \n",
" \n",
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
" \n",
" \n",
" for i in range(numclusters):\n",
" b = mf.mergerects(lineboxes[i])\n",
" mergedboxes[i] = b\n",
" \n",
" j = 0\n",
" while (j < len(mergedboxes)):\n",
" i = 0\n",
" while (i < len(mergedboxes)):\n",
" if (i == j):\n",
" i += 1\n",
" continue\n",
" outerbox = mergedboxes[j]\n",
" innerbox = mergedboxes[i]\n",
" if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
" lineboxes.pop(i)\n",
" if (i < j):\n",
" j -= 1\n",
" i -= 1\n",
" i += 1\n",
" j += 1\n",
" \n",
" # return mergedboxes, lineboxes\n",
" for i, b in enumerate(mergedboxes):\n",
" tempout = cv2.rectangle(tempout, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=1)\n",
" for t in lineboxes[i]:\n",
" tempout = cv2.rectangle(tempout, (t[0],t[1]), (t[0]+t[2], t[1]+t[3]), (0,0,255), thickness=1)\n",
" \n",
" print(epsilonvalue)\n",
" return tempout"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [],
"source": [
"def linerectretriever(image):\n",
" shape = image.shape\n",
" \n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)\n",
" reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
" \n",
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
" \n",
" \n",
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
" \n",
" for i, contour in enumerate(contours):\n",
" boundingboxes[i] = cv2.boundingRect(contour)\n",
" \n",
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/3\n",
" \n",
" labels = dbscan(boundingboxes, epsilonvalue, 1)\n",
" # print(labels)\n",
" numclusters = max(labels)\n",
" lineboxes = [[] for _ in range(numclusters)]\n",
"\n",
" for i, item in enumerate(labels):\n",
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
" \n",
" \n",
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
" \n",
" \n",
" for i in range(numclusters):\n",
" b = mf.mergerects(lineboxes[i])\n",
" mergedboxes[i] = b\n",
" \n",
" j = 0\n",
" while (j < len(mergedboxes)):\n",
" i = 0\n",
" while (i < len(mergedboxes)):\n",
" if (i == j):\n",
" i += 1\n",
" continue\n",
" outerbox = mergedboxes[j]\n",
" innerbox = mergedboxes[i]\n",
" if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
" lineboxes.pop(i)\n",
" if (i < j):\n",
" j -= 1\n",
" i -= 1\n",
" i += 1\n",
" j += 1\n",
" \n",
" return mergedboxes, lineboxes\n",
" \n",
"def lineimagemaker(thresholded):\n",
" lineimages = []\n",
" mergedboxes, originalboxes = linerectretriever(thresholded)\n",
" \n",
" mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)\n",
" mergedboxes = mergedboxes[mergedboxesordering]\n",
" originalboxes = [originalboxes[i] for i in mergedboxesordering]\n",
" for i, box in enumerate(mergedboxes):\n",
" mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
" whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)\n",
" for lb in originalboxes[i]:\n",
" mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
"\n",
" invertedmask = cv2.bitwise_not(mask)\n",
" whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)\n",
" lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)\n",
" lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
" # lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)\n",
" # lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
" lineimages.append(lineimage)\n",
" # lineimages.append(mask)\n",
" return lineimages\n",
" \n",
"\n",
"def lineisolator(image):\n",
" imgcopy = image.copy()\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
" \n",
" \n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" \n",
" \n",
" \n",
" lineimages = lineimagemaker(thresholded)\n",
" \n",
" # for i, lineimage in enumerate(lineimages):\n",
" # lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)\n",
"\n",
" \n",
" finallineimages = []\n",
" for i, lineimage in enumerate(lineimages):\n",
" templineimages = lineimagemaker(lineimage)\n",
" finallineimages += templineimages\n",
" \n",
" \n",
" # mergedboxes, originalboxes = linerectretriever(thresholded) \n",
" # mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
" # for i, box in enumerate(mergedboxes):\n",
" # for lb in originalboxes[i]:\n",
" # mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
"\n",
" # return mask\n",
" \n",
" \n",
" # out = tempfunc(thresholded)\n",
" # return out\n",
" \n",
" return finallineimages\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"pathname = \"../adjusted_test_images/\"\n",
"filename = \"IMG_7594.jpg\"\n",
"\n",
"# print(pathname+filename)\n",
"img = cv2.imread(pathname+filename)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"outs = lineisolator(img)"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"# thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
"# monke = tempfunc(thresholded)\n",
"# cv2.imwrite(\"../temp/monke.jpg\", monke)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(img, height=1000))\n",
"# # cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"# for out in outs:\n",
"# if (out.shape[0] > out.shape[1]):\n",
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"# else:\n",
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, width=1000))\n",
"# key = cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()\n",
"# if (key == 107):\n",
"# break\n",
"if (isinstance(outs, np.ndarray)):\n",
" if (outs.shape[0] > outs.shape[1]):\n",
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, height=1350))\n",
" else:\n",
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n",
"else:\n",
" for i, out in enumerate(outs):\n",
" if (out.shape[0] > out.shape[1]):\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
" else:\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
"cv2.destroyAllWindows()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs[30], width=1000))\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"# results = tempfunc(outs[30])"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(results, width=1000))\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
@ -59,7 +516,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
@ -68,7 +525,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [

View File

@ -1,104 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"\n",
"import myfunctions as mf\n",
"\n",
"\n",
"import scipy.stats as st\n",
"import math\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"img = cv2.imread('./test_images/IMG_7594.jpg')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"out = mf.houghlineprocessing(img)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"cv2.imshow(\"result2\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# https://medium.com/@vatvenger/extracting-lines-from-ocr-a8f410448fc\n",
"# https://www.width.ai/post/the-best-ways-to-extract-text-from-images-without-tesseract-python"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Potential Next Steps. Isolate a line of text and then feed that into the OCR Model to extract the text."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# #IDEA:\n",
"# 1. Isolate lines into rectangles\n",
"# 2. feed that rectangle portion of the image into an OCR model\n",
"# 3. append that to the final output string with the end character for nextline\n",
"# 4. give the whole final string to a model which gives the outputs"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -10,14 +10,14 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
#-y is for accepting yes when the system asked us for installing the package
RUN apt-get update && \
apt-get install -y build-essential cmake git gdb pkg-config valgrind systemd-coredump python3 python3-opencv libopencv-dev python3-pip python3-dev && \
apt-get install -y build-essential cmake git gdb pkg-config valgrind systemd-coredump python3-opencv libopencv-dev python3-pip python3-dev && \
apt-get -y clean && apt-get -y autoremove
RUN python3 -m pip install --upgrade pip
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
RUN pip3 install datasets && pip3 install jupyter notebook && pip3 install matplotlib
RUN pip3 install datasets && pip3 install jupyter notebook && pip3 install matplotlib && pip3 install deskew
RUN pip3 install easyocr && pip3 uninstall -y opencv-python-headless