V1 of line extractor #8
BIN
code/textdataretriever/adjusted_test_images/IMG_7594.jpg
Normal file
|
After Width: | Height: | Size: 412 KiB |
BIN
code/textdataretriever/adjusted_test_images/IMG_7604.jpg
Normal file
|
After Width: | Height: | Size: 426 KiB |
BIN
code/textdataretriever/adjusted_test_images/IMG_7605.jpg
Normal file
|
After Width: | Height: | Size: 550 KiB |
BIN
code/textdataretriever/adjusted_test_images/IMG_7640.jpg
Normal file
|
After Width: | Height: | Size: 168 KiB |
BIN
code/textdataretriever/adjusted_test_images/IvV2y.png
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
code/textdataretriever/test_images/IMG_7594.jpg
Normal file
|
After Width: | Height: | Size: 3.0 MiB |
BIN
code/textdataretriever/test_images/IMG_7604.jpg
Normal file
|
After Width: | Height: | Size: 2.1 MiB |
BIN
code/textdataretriever/test_images/IMG_7605.jpg
Normal file
|
After Width: | Height: | Size: 5.3 MiB |
BIN
code/textdataretriever/test_images/IMG_7640.jpg
Normal file
|
After Width: | Height: | Size: 2.3 MiB |
BIN
code/textdataretriever/test_images/IvV2y.png
Normal file
|
After Width: | Height: | Size: 139 KiB |
292
code/textdataretriever/textextractor/extractorfunctions.py
Normal file
@ -0,0 +1,292 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, '../../autocropper')
|
||||
import myfunctions as mf
|
||||
|
||||
|
||||
|
||||
## helper functions
|
||||
def rectcenterpt(rect, xywhrect=True, retint=False):
|
||||
if (xywhrect):
|
||||
x = rect[0] + rect[2]/2
|
||||
y = rect[1] + rect[3]/2
|
||||
else:
|
||||
x = (rect[0]+rect[2])/2
|
||||
y = (rect[1]+rect[3])/2
|
||||
if (retint):
|
||||
x = int(x)
|
||||
y = int(y)
|
||||
return (x,y)
|
||||
|
||||
def containsamount(outerrect, innerrect, percentage=1):
|
||||
tinyrect = mf.overlapRect([outerrect, innerrect])
|
||||
tinyarea = tinyrect[2]*tinyrect[3]
|
||||
innerrectarea = innerrect[2]*innerrect[3]
|
||||
if (tinyarea/innerrectarea >= percentage):
|
||||
return True
|
||||
return False
|
||||
|
||||
def aboveandbelow(outerrect, innerrect):
|
||||
if (outerrect[1] < innerrect[1] and outerrect[1]+outerrect[3] > innerrect[1]+innerrect[3]):
|
||||
return True
|
||||
return False
|
||||
|
||||
## Below code is an almost direct copy from https://github.com/scrunts23/CS-Data-Science-Build-Week-1/blob/master/model/dbscan.py
|
||||
|
||||
def dbscan(D, eps, MinPts):
|
||||
'''
|
||||
Cluster the dataset `D` using the DBSCAN algorithm.
|
||||
|
||||
dbscan takes a dataset `D` (a list of vectors), a threshold distance
|
||||
`eps`, and a required number of points `MinPts`.
|
||||
|
||||
It will return a list of cluster labels. The label -1 means noise, and then
|
||||
the clusters are numbered starting from 1.
|
||||
'''
|
||||
|
||||
# This list will hold the final cluster assignment for each point in D.
|
||||
# There are two reserved values:
|
||||
# -1 - Indicates a noise point
|
||||
# 0 - Means the point hasn't been considered yet.
|
||||
# Initially all labels are 0.
|
||||
labels = [0]*len(D)
|
||||
|
||||
# C is the ID of the current cluster.
|
||||
C = 0
|
||||
|
||||
# This outer loop is just responsible for picking new seed points--a point
|
||||
# from which to grow a new cluster.
|
||||
# Once a valid seed point is found, a new cluster is created, and the
|
||||
# cluster growth is all handled by the 'expandCluster' routine.
|
||||
|
||||
# For each point P in the Dataset D...
|
||||
# ('P' is the index of the datapoint, rather than the datapoint itself.)
|
||||
for P in range(0, len(D)):
|
||||
|
||||
# Only points that have not already been claimed can be picked as new
|
||||
# seed points.
|
||||
# If the point's label is not 0, continue to the next point.
|
||||
if not (labels[P] == 0):
|
||||
continue
|
||||
|
||||
# Find all of P's neighboring points.
|
||||
NeighborPts = region_query(D, P, eps)
|
||||
|
||||
# If the number is below MinPts, this point is noise.
|
||||
# This is the only condition under which a point is labeled
|
||||
# NOISE--when it's not a valid seed point. A NOISE point may later
|
||||
# be picked up by another cluster as a boundary point (this is the only
|
||||
# condition under which a cluster label can change--from NOISE to
|
||||
# something else).
|
||||
if len(NeighborPts) < MinPts:
|
||||
labels[P] = -1
|
||||
# Otherwise, if there are at least MinPts nearby, use this point as the
|
||||
# seed for a new cluster.
|
||||
else:
|
||||
C += 1
|
||||
grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts)
|
||||
|
||||
# All data has been clustered!
|
||||
return labels
|
||||
|
||||
|
||||
def grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts):
|
||||
'''
|
||||
Grow a new cluster with label `C` from the seed point `P`.
|
||||
|
||||
This function searches through the dataset to find all points that belong
|
||||
to this new cluster. When this function returns, cluster `C` is complete.
|
||||
|
||||
Parameters:
|
||||
`D` - The dataset (a list of vectors)
|
||||
`labels` - List storing the cluster labels for all dataset points
|
||||
`P` - Index of the seed point for this new cluster
|
||||
`NeighborPts` - All of the neighbors of `P`
|
||||
`C` - The label for this new cluster.
|
||||
`eps` - Threshold distance
|
||||
`MinPts` - Minimum required number of neighbors
|
||||
'''
|
||||
|
||||
# Assign the cluster label to the seed point.
|
||||
labels[P] = C
|
||||
|
||||
# Look at each neighbor of P (neighbors are referred to as Pn).
|
||||
# NeighborPts will be used as a FIFO queue of points to search--that is, it
|
||||
# will grow as we discover new branch points for the cluster. The FIFO
|
||||
# behavior is accomplished by using a while-loop rather than a for-loop.
|
||||
# In NeighborPts, the points are represented by their index in the original
|
||||
# dataset.
|
||||
i = 0
|
||||
while i < len(NeighborPts):
|
||||
|
||||
# Get the next point from the queue.
|
||||
Pn = NeighborPts[i]
|
||||
|
||||
# If Pn was labelled NOISE during the seed search, then we
|
||||
# know it's not a branch point (it doesn't have enough neighbors), so
|
||||
# make it a leaf point of cluster C and move on.
|
||||
if labels[Pn] == -1:
|
||||
labels[Pn] = C
|
||||
|
||||
# Otherwise, if Pn isn't already claimed, claim it as part of C.
|
||||
elif labels[Pn] == 0:
|
||||
# Add Pn to cluster C (Assign cluster label C).
|
||||
labels[Pn] = C
|
||||
|
||||
# Find all the neighbors of Pn
|
||||
PnNeighborPts = region_query(D, Pn, eps)
|
||||
|
||||
# If Pn has at least MinPts neighbors, it's a branch point!
|
||||
# Add all of its neighbors to the FIFO queue to be searched.
|
||||
if len(PnNeighborPts) >= MinPts:
|
||||
NeighborPts = NeighborPts + PnNeighborPts
|
||||
# If Pn *doesn't* have enough neighbors, then it's a leaf point.
|
||||
# Don't queue up it's neighbors as expansion points.
|
||||
#else:
|
||||
# Do nothing
|
||||
#NeighborPts = NeighborPts
|
||||
|
||||
# Advance to the next point in the FIFO queue.
|
||||
i += 1
|
||||
|
||||
# We've finished growing cluster C!
|
||||
|
||||
|
||||
def region_query(D, P, eps):
|
||||
'''
|
||||
Find all points in dataset `D` within distance `eps` of point `P`.
|
||||
|
||||
This function calculates the distance between a point P and every other
|
||||
point in the dataset, and then returns only those points which are within a
|
||||
threshold distance `eps`.
|
||||
'''
|
||||
neighbors = []
|
||||
|
||||
# For each point in the dataset...
|
||||
for Pn in range(0, len(D)):
|
||||
|
||||
# If the distance is below the threshold, add it to the neighbors list.
|
||||
if (rectcenterpt(D[P])[1] - rectcenterpt(D[Pn])[1]) < eps:
|
||||
neighbors.append(Pn)
|
||||
|
||||
return neighbors
|
||||
|
||||
def linerectretriever(image):
|
||||
shape = image.shape
|
||||
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
|
||||
reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)
|
||||
reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
|
||||
|
||||
canny = cv2.Canny(reducedimage, 0, 500, None, 3)
|
||||
|
||||
|
||||
contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
boundingboxes = np.empty((len(contours), 4), dtype=int)
|
||||
|
||||
for i, contour in enumerate(contours):
|
||||
boundingboxes[i] = cv2.boundingRect(contour)
|
||||
|
||||
epsilonvalue = np.median(boundingboxes, axis=0)[3]/3
|
||||
|
||||
labels = dbscan(boundingboxes, epsilonvalue, 1)
|
||||
# print(labels)
|
||||
numclusters = max(labels)
|
||||
lineboxes = [[] for _ in range(numclusters)]
|
||||
|
||||
for i, item in enumerate(labels):
|
||||
lineboxes[item-1].append(boundingboxes[i].tolist())
|
||||
|
||||
|
||||
mergedboxes = np.empty((numclusters,4), dtype=int)
|
||||
|
||||
|
||||
for i in range(numclusters):
|
||||
b = mf.mergerects(lineboxes[i])
|
||||
mergedboxes[i] = b
|
||||
|
||||
j = 0
|
||||
while (j < len(mergedboxes)):
|
||||
i = 0
|
||||
while (i < len(mergedboxes)):
|
||||
if (i == j):
|
||||
i += 1
|
||||
continue
|
||||
outerbox = mergedboxes[j]
|
||||
innerbox = mergedboxes[i]
|
||||
if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:
|
||||
mergedboxes = np.delete(mergedboxes, i, axis=0)
|
||||
lineboxes.pop(i)
|
||||
if (i < j):
|
||||
j -= 1
|
||||
i -= 1
|
||||
i += 1
|
||||
j += 1
|
||||
|
||||
return mergedboxes, lineboxes
|
||||
|
||||
def lineimagemaker(thresholded):
|
||||
lineimages = []
|
||||
mergedboxes, originalboxes = linerectretriever(thresholded)
|
||||
|
||||
mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)
|
||||
mergedboxes = mergedboxes[mergedboxesordering]
|
||||
originalboxes = [originalboxes[i] for i in mergedboxesordering]
|
||||
for i, box in enumerate(mergedboxes):
|
||||
mask = np.zeros(thresholded.shape, dtype=np.uint8)
|
||||
whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)
|
||||
for lb in originalboxes[i]:
|
||||
mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)
|
||||
|
||||
invertedmask = cv2.bitwise_not(mask)
|
||||
whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)
|
||||
lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)
|
||||
lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]
|
||||
# lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)
|
||||
# lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]
|
||||
lineimages.append(lineimage)
|
||||
# lineimages.append(mask)
|
||||
return lineimages
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### actual function
|
||||
def lineisolator(image):
|
||||
imgcopy = image.copy()
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
||||
|
||||
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
|
||||
|
||||
|
||||
|
||||
lineimages = lineimagemaker(thresholded)
|
||||
|
||||
# for i, lineimage in enumerate(lineimages):
|
||||
# lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)
|
||||
|
||||
|
||||
finallineimages = []
|
||||
for i, lineimage in enumerate(lineimages):
|
||||
templineimages = lineimagemaker(lineimage)
|
||||
finallineimages += templineimages
|
||||
|
||||
|
||||
# mergedboxes, originalboxes = linerectretriever(thresholded)
|
||||
# mask = np.zeros(thresholded.shape, dtype=np.uint8)
|
||||
# for i, box in enumerate(mergedboxes):
|
||||
# for lb in originalboxes[i]:
|
||||
# mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)
|
||||
|
||||
# return mask
|
||||
|
||||
|
||||
# out = tempfunc(thresholded)
|
||||
# return out
|
||||
|
||||
return finallineimages
|
||||
@ -2,15 +2,13 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 97,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"import scipy.stats as st\n",
|
||||
"import math\n",
|
||||
@ -20,36 +18,495 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 98,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"img = cv2.imread('./test_images/IMG_7594.jpg')"
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../../autocropper')\n",
|
||||
"import myfunctions as mf\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 99,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"out = mf.houghlineprocessing(img)"
|
||||
"def rectcenterpt(rect, xywhrect=True, retint=False):\n",
|
||||
" if (xywhrect):\n",
|
||||
" x = rect[0] + rect[2]/2\n",
|
||||
" y = rect[1] + rect[3]/2\n",
|
||||
" else:\n",
|
||||
" x = (rect[0]+rect[2])/2\n",
|
||||
" y = (rect[1]+rect[3])/2\n",
|
||||
" if (retint):\n",
|
||||
" x = int(x)\n",
|
||||
" y = int(y)\n",
|
||||
" return (x,y)\n",
|
||||
"\n",
|
||||
"def containsamount(outerrect, innerrect, percentage=1):\n",
|
||||
" tinyrect = mf.overlapRect([outerrect, innerrect])\n",
|
||||
" tinyarea = tinyrect[2]*tinyrect[3]\n",
|
||||
" innerrectarea = innerrect[2]*innerrect[3]\n",
|
||||
" if (tinyarea/innerrectarea >= percentage):\n",
|
||||
" return True\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"def aboveandbelow(outerrect, innerrect):\n",
|
||||
" if (outerrect[1] < innerrect[1] and outerrect[1]+outerrect[3] > innerrect[1]+innerrect[3]):\n",
|
||||
" return True\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"## Below code is an almost direct copy from https://github.com/scrunts23/CS-Data-Science-Build-Week-1/blob/master/model/dbscan.py\n",
|
||||
"\n",
|
||||
"def dbscan(D, eps, MinPts):\n",
|
||||
" '''\n",
|
||||
" Cluster the dataset `D` using the DBSCAN algorithm.\n",
|
||||
" \n",
|
||||
" dbscan takes a dataset `D` (a list of vectors), a threshold distance\n",
|
||||
" `eps`, and a required number of points `MinPts`.\n",
|
||||
" \n",
|
||||
" It will return a list of cluster labels. The label -1 means noise, and then\n",
|
||||
" the clusters are numbered starting from 1.\n",
|
||||
" '''\n",
|
||||
" \n",
|
||||
" # This list will hold the final cluster assignment for each point in D.\n",
|
||||
" # There are two reserved values:\n",
|
||||
" # -1 - Indicates a noise point\n",
|
||||
" # 0 - Means the point hasn't been considered yet.\n",
|
||||
" # Initially all labels are 0. \n",
|
||||
" labels = [0]*len(D)\n",
|
||||
"\n",
|
||||
" # C is the ID of the current cluster. \n",
|
||||
" C = 0\n",
|
||||
" \n",
|
||||
" # This outer loop is just responsible for picking new seed points--a point\n",
|
||||
" # from which to grow a new cluster.\n",
|
||||
" # Once a valid seed point is found, a new cluster is created, and the \n",
|
||||
" # cluster growth is all handled by the 'expandCluster' routine.\n",
|
||||
" \n",
|
||||
" # For each point P in the Dataset D...\n",
|
||||
" # ('P' is the index of the datapoint, rather than the datapoint itself.)\n",
|
||||
" for P in range(0, len(D)):\n",
|
||||
" \n",
|
||||
" # Only points that have not already been claimed can be picked as new \n",
|
||||
" # seed points. \n",
|
||||
" # If the point's label is not 0, continue to the next point.\n",
|
||||
" if not (labels[P] == 0):\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" # Find all of P's neighboring points.\n",
|
||||
" NeighborPts = region_query(D, P, eps)\n",
|
||||
" \n",
|
||||
" # If the number is below MinPts, this point is noise. \n",
|
||||
" # This is the only condition under which a point is labeled \n",
|
||||
" # NOISE--when it's not a valid seed point. A NOISE point may later \n",
|
||||
" # be picked up by another cluster as a boundary point (this is the only\n",
|
||||
" # condition under which a cluster label can change--from NOISE to \n",
|
||||
" # something else).\n",
|
||||
" if len(NeighborPts) < MinPts:\n",
|
||||
" labels[P] = -1\n",
|
||||
" # Otherwise, if there are at least MinPts nearby, use this point as the \n",
|
||||
" # seed for a new cluster. \n",
|
||||
" else: \n",
|
||||
" C += 1\n",
|
||||
" grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts)\n",
|
||||
" \n",
|
||||
" # All data has been clustered!\n",
|
||||
" return labels\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts):\n",
|
||||
" '''\n",
|
||||
" Grow a new cluster with label `C` from the seed point `P`.\n",
|
||||
" \n",
|
||||
" This function searches through the dataset to find all points that belong\n",
|
||||
" to this new cluster. When this function returns, cluster `C` is complete.\n",
|
||||
" \n",
|
||||
" Parameters:\n",
|
||||
" `D` - The dataset (a list of vectors)\n",
|
||||
" `labels` - List storing the cluster labels for all dataset points\n",
|
||||
" `P` - Index of the seed point for this new cluster\n",
|
||||
" `NeighborPts` - All of the neighbors of `P`\n",
|
||||
" `C` - The label for this new cluster. \n",
|
||||
" `eps` - Threshold distance\n",
|
||||
" `MinPts` - Minimum required number of neighbors\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # Assign the cluster label to the seed point.\n",
|
||||
" labels[P] = C\n",
|
||||
" \n",
|
||||
" # Look at each neighbor of P (neighbors are referred to as Pn). \n",
|
||||
" # NeighborPts will be used as a FIFO queue of points to search--that is, it\n",
|
||||
" # will grow as we discover new branch points for the cluster. The FIFO\n",
|
||||
" # behavior is accomplished by using a while-loop rather than a for-loop.\n",
|
||||
" # In NeighborPts, the points are represented by their index in the original\n",
|
||||
" # dataset.\n",
|
||||
" i = 0\n",
|
||||
" while i < len(NeighborPts): \n",
|
||||
" \n",
|
||||
" # Get the next point from the queue. \n",
|
||||
" Pn = NeighborPts[i]\n",
|
||||
" \n",
|
||||
" # If Pn was labelled NOISE during the seed search, then we\n",
|
||||
" # know it's not a branch point (it doesn't have enough neighbors), so\n",
|
||||
" # make it a leaf point of cluster C and move on.\n",
|
||||
" if labels[Pn] == -1:\n",
|
||||
" labels[Pn] = C\n",
|
||||
" \n",
|
||||
" # Otherwise, if Pn isn't already claimed, claim it as part of C.\n",
|
||||
" elif labels[Pn] == 0:\n",
|
||||
" # Add Pn to cluster C (Assign cluster label C).\n",
|
||||
" labels[Pn] = C\n",
|
||||
" \n",
|
||||
" # Find all the neighbors of Pn\n",
|
||||
" PnNeighborPts = region_query(D, Pn, eps)\n",
|
||||
" \n",
|
||||
" # If Pn has at least MinPts neighbors, it's a branch point!\n",
|
||||
" # Add all of its neighbors to the FIFO queue to be searched. \n",
|
||||
" if len(PnNeighborPts) >= MinPts:\n",
|
||||
" NeighborPts = NeighborPts + PnNeighborPts\n",
|
||||
" # If Pn *doesn't* have enough neighbors, then it's a leaf point.\n",
|
||||
" # Don't queue up it's neighbors as expansion points.\n",
|
||||
" #else:\n",
|
||||
" # Do nothing \n",
|
||||
" #NeighborPts = NeighborPts \n",
|
||||
" \n",
|
||||
" # Advance to the next point in the FIFO queue.\n",
|
||||
" i += 1 \n",
|
||||
" \n",
|
||||
" # We've finished growing cluster C!\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def region_query(D, P, eps):\n",
|
||||
" '''\n",
|
||||
" Find all points in dataset `D` within distance `eps` of point `P`.\n",
|
||||
" \n",
|
||||
" This function calculates the distance between a point P and every other \n",
|
||||
" point in the dataset, and then returns only those points which are within a\n",
|
||||
" threshold distance `eps`.\n",
|
||||
" '''\n",
|
||||
" neighbors = []\n",
|
||||
" \n",
|
||||
" # For each point in the dataset...\n",
|
||||
" for Pn in range(0, len(D)):\n",
|
||||
" \n",
|
||||
" # If the distance is below the threshold, add it to the neighbors list.\n",
|
||||
" if (rectcenterpt(D[P])[1] - rectcenterpt(D[Pn])[1]) < eps:\n",
|
||||
" neighbors.append(Pn)\n",
|
||||
" \n",
|
||||
" return neighbors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 100,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cv2.imshow(\"result2\", mf.ResizeWithAspectRatio(out, height=1000))\n",
|
||||
"def tempfunc(image):\n",
|
||||
" shape = image.shape\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # blackout = np.zeros(tempout.shape, dtype=np.uint8)\n",
|
||||
" \n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)\n",
|
||||
" reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
|
||||
" \n",
|
||||
" tempout = cv2.cvtColor(reducedimage, cv2.COLOR_GRAY2BGR)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
|
||||
" \n",
|
||||
" for i, contour in enumerate(contours):\n",
|
||||
" boundingboxes[i] = cv2.boundingRect(contour)\n",
|
||||
" \n",
|
||||
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/2\n",
|
||||
" \n",
|
||||
" labels = dbscan(boundingboxes, epsilonvalue, 1)\n",
|
||||
" print(labels)\n",
|
||||
" numclusters = max(labels)\n",
|
||||
" lineboxes = [[] for _ in range(numclusters)]\n",
|
||||
"\n",
|
||||
" for i, item in enumerate(labels):\n",
|
||||
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" for i in range(numclusters):\n",
|
||||
" b = mf.mergerects(lineboxes[i])\n",
|
||||
" mergedboxes[i] = b\n",
|
||||
" \n",
|
||||
" j = 0\n",
|
||||
" while (j < len(mergedboxes)):\n",
|
||||
" i = 0\n",
|
||||
" while (i < len(mergedboxes)):\n",
|
||||
" if (i == j):\n",
|
||||
" i += 1\n",
|
||||
" continue\n",
|
||||
" outerbox = mergedboxes[j]\n",
|
||||
" innerbox = mergedboxes[i]\n",
|
||||
" if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
|
||||
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
|
||||
" lineboxes.pop(i)\n",
|
||||
" if (i < j):\n",
|
||||
" j -= 1\n",
|
||||
" i -= 1\n",
|
||||
" i += 1\n",
|
||||
" j += 1\n",
|
||||
" \n",
|
||||
" # return mergedboxes, lineboxes\n",
|
||||
" for i, b in enumerate(mergedboxes):\n",
|
||||
" tempout = cv2.rectangle(tempout, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=1)\n",
|
||||
" for t in lineboxes[i]:\n",
|
||||
" tempout = cv2.rectangle(tempout, (t[0],t[1]), (t[0]+t[2], t[1]+t[3]), (0,0,255), thickness=1)\n",
|
||||
" \n",
|
||||
" print(epsilonvalue)\n",
|
||||
" return tempout"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def linerectretriever(image):\n",
|
||||
" shape = image.shape\n",
|
||||
" \n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)\n",
|
||||
" reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
|
||||
" \n",
|
||||
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
|
||||
" \n",
|
||||
" for i, contour in enumerate(contours):\n",
|
||||
" boundingboxes[i] = cv2.boundingRect(contour)\n",
|
||||
" \n",
|
||||
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/3\n",
|
||||
" \n",
|
||||
" labels = dbscan(boundingboxes, epsilonvalue, 1)\n",
|
||||
" # print(labels)\n",
|
||||
" numclusters = max(labels)\n",
|
||||
" lineboxes = [[] for _ in range(numclusters)]\n",
|
||||
"\n",
|
||||
" for i, item in enumerate(labels):\n",
|
||||
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" for i in range(numclusters):\n",
|
||||
" b = mf.mergerects(lineboxes[i])\n",
|
||||
" mergedboxes[i] = b\n",
|
||||
" \n",
|
||||
" j = 0\n",
|
||||
" while (j < len(mergedboxes)):\n",
|
||||
" i = 0\n",
|
||||
" while (i < len(mergedboxes)):\n",
|
||||
" if (i == j):\n",
|
||||
" i += 1\n",
|
||||
" continue\n",
|
||||
" outerbox = mergedboxes[j]\n",
|
||||
" innerbox = mergedboxes[i]\n",
|
||||
" if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
|
||||
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
|
||||
" lineboxes.pop(i)\n",
|
||||
" if (i < j):\n",
|
||||
" j -= 1\n",
|
||||
" i -= 1\n",
|
||||
" i += 1\n",
|
||||
" j += 1\n",
|
||||
" \n",
|
||||
" return mergedboxes, lineboxes\n",
|
||||
" \n",
|
||||
"def lineimagemaker(thresholded):\n",
|
||||
" lineimages = []\n",
|
||||
" mergedboxes, originalboxes = linerectretriever(thresholded)\n",
|
||||
" \n",
|
||||
" mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)\n",
|
||||
" mergedboxes = mergedboxes[mergedboxesordering]\n",
|
||||
" originalboxes = [originalboxes[i] for i in mergedboxesordering]\n",
|
||||
" for i, box in enumerate(mergedboxes):\n",
|
||||
" mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
|
||||
" whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)\n",
|
||||
" for lb in originalboxes[i]:\n",
|
||||
" mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
|
||||
"\n",
|
||||
" invertedmask = cv2.bitwise_not(mask)\n",
|
||||
" whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)\n",
|
||||
" lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)\n",
|
||||
" lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
|
||||
" # lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)\n",
|
||||
" # lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
|
||||
" lineimages.append(lineimage)\n",
|
||||
" # lineimages.append(mask)\n",
|
||||
" return lineimages\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"def lineisolator(image):\n",
|
||||
" imgcopy = image.copy()\n",
|
||||
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
|
||||
" thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
" lineimages = lineimagemaker(thresholded)\n",
|
||||
" \n",
|
||||
" # for i, lineimage in enumerate(lineimages):\n",
|
||||
" # lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" finallineimages = []\n",
|
||||
" for i, lineimage in enumerate(lineimages):\n",
|
||||
" templineimages = lineimagemaker(lineimage)\n",
|
||||
" finallineimages += templineimages\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # mergedboxes, originalboxes = linerectretriever(thresholded) \n",
|
||||
" # mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
|
||||
" # for i, box in enumerate(mergedboxes):\n",
|
||||
" # for lb in originalboxes[i]:\n",
|
||||
" # mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
|
||||
"\n",
|
||||
" # return mask\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # out = tempfunc(thresholded)\n",
|
||||
" # return out\n",
|
||||
" \n",
|
||||
" return finallineimages\n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 102,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pathname = \"../adjusted_test_images/\"\n",
|
||||
"filename = \"IMG_7594.jpg\"\n",
|
||||
"\n",
|
||||
"# print(pathname+filename)\n",
|
||||
"img = cv2.imread(pathname+filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 103,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"outs = lineisolator(img)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
|
||||
"# thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
|
||||
"# monke = tempfunc(thresholded)\n",
|
||||
"# cv2.imwrite(\"../temp/monke.jpg\", monke)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(img, height=1000))\n",
|
||||
"# # cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# for out in outs:\n",
|
||||
"# if (out.shape[0] > out.shape[1]):\n",
|
||||
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
|
||||
"# else:\n",
|
||||
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, width=1000))\n",
|
||||
"# key = cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()\n",
|
||||
"# if (key == 107):\n",
|
||||
"# break\n",
|
||||
"if (isinstance(outs, np.ndarray)):\n",
|
||||
" if (outs.shape[0] > outs.shape[1]):\n",
|
||||
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, height=1350))\n",
|
||||
" else:\n",
|
||||
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n",
|
||||
"else:\n",
|
||||
" for i, out in enumerate(outs):\n",
|
||||
" if (out.shape[0] > out.shape[1]):\n",
|
||||
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
|
||||
" else:\n",
|
||||
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n",
|
||||
"cv2.waitKey(0)\n",
|
||||
"cv2.destroyAllWindows()"
|
||||
"cv2.destroyAllWindows()\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 107,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs[30], width=1000))\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 108,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# results = tempfunc(outs[30])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 109,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(results, width=1000))\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 110,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -59,7 +516,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 111,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -68,7 +525,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 112,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
||||
@ -1,104 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"import scipy.stats as st\n",
|
||||
"import math\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"img = cv2.imread('./test_images/IMG_7594.jpg')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"out = mf.houghlineprocessing(img)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cv2.imshow(\"result2\", mf.ResizeWithAspectRatio(out, height=1000))\n",
|
||||
"cv2.waitKey(0)\n",
|
||||
"cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# https://medium.com/@vatvenger/extracting-lines-from-ocr-a8f410448fc\n",
|
||||
"# https://www.width.ai/post/the-best-ways-to-extract-text-from-images-without-tesseract-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Potential Next Steps. Isolate a line of text and then feed that into the OCR Model to extract the text."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# #IDEA:\n",
|
||||
"# 1. Isolate lines into rectangles\n",
|
||||
"# 2. feed that rectangle portion of the image into an OCR model\n",
|
||||
"# 3. append that to the final output string with the end character for nextline\n",
|
||||
"# 4. give the whole final string to a model which gives the outputs"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@ -10,14 +10,14 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
#-y is for accepting yes when the system asked us for installing the package
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake git gdb pkg-config valgrind systemd-coredump python3 python3-opencv libopencv-dev python3-pip python3-dev && \
|
||||
apt-get install -y build-essential cmake git gdb pkg-config valgrind systemd-coredump python3-opencv libopencv-dev python3-pip python3-dev && \
|
||||
apt-get -y clean && apt-get -y autoremove
|
||||
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
|
||||
|
||||
RUN pip3 install datasets && pip3 install jupyter notebook && pip3 install matplotlib
|
||||
RUN pip3 install datasets && pip3 install jupyter notebook && pip3 install matplotlib && pip3 install deskew
|
||||
|
||||
RUN pip3 install easyocr && pip3 uninstall -y opencv-python-headless
|
||||
|
||||
|
||||