Updating textextractor #23

Merged
ewellenr merged 17 commits from textextractor into main 2023-11-13 23:25:17 -05:00
32 changed files with 1436 additions and 468 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 412 KiB

After

Width:  |  Height:  |  Size: 448 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 426 KiB

After

Width:  |  Height:  |  Size: 433 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 550 KiB

After

Width:  |  Height:  |  Size: 634 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 168 KiB

After

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

File diff suppressed because one or more lines are too long

View File

@ -23,6 +23,8 @@ def rectcenterpt(rect, xywhrect=True, retint=False):
def containsamount(outerrect, innerrect, percentage=1):
tinyrect = mf.overlapRect([outerrect, innerrect])
tinyarea = tinyrect[2]*tinyrect[3]
if (tinyrect[0] == -1):
tinyarea = 0
innerrectarea = innerrect[2]*innerrect[3]
if (tinyarea/innerrectarea >= percentage):
return True
@ -173,23 +175,218 @@ def region_query(D, P, eps):
return neighbors
def linerectretriever(image):
def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):
borderType = cv2.BORDER_CONSTANT
out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)
return out
def mergecontours(contours):
cont = np.vstack(contours)
finalcontour = cv2.convexHull(cont)
return finalcontour
def getSkewAngle(cvImage) -> float:
# Prep image, copy, convert to gray scale, blur, and threshold
newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))
# return newImage
gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9, 9), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Apply dilate to merge text into meaningful lines/paragraphs.
# Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
# But use smaller kernel on Y axis to separate between different blocks of text
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
dilate = cv2.dilate(thresh, kernel, iterations=5)
# return dilate
# Find all contours
contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key = cv2.contourArea, reverse = True)
# Find largest contour and surround in min area box
largestContour = contours[0]
mergedcontour = mergecontours(contours)
# return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)
minAreaRect = cv2.minAreaRect(mergedcontour)
# return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)
# minAreaRect = cv2.minAreaRect(largestContour)
box = cv2.boxPoints(minAreaRect)
box = np.intp(box)
newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)
# return newImage
# Determine the angle. Convert it to the value that was originally used to obtain skewed image
angle = minAreaRect[-1]
# print(angle)
if angle > 45:
angle = angle - 90
if angle < -45:
angle = 90 + angle
# print(angle)
return angle
def minboxdeskew(img, fill=(0,0,0)):
colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
angle = getSkewAngle(colourimg)
padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)
rotated = mf.rotate(padimg, angle, fill=fill)
return rotated
def l1linerectretriever(image, divider=2):
shape = image.shape
imgcopy = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
# return imgcopy
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)
reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
linekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (shape[1]//40, 1))
# reducedimage = image
reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel, iterations=1)
# reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
# return reducedimage
charcanny = cv2.Canny(reducedimage, 0, 500, None, 3)
# return canny
lettercontours, heirarchy = cv2.findContours(charcanny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# contours, heirarchy = cv2.findContours(255-reducedimage,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# imgcopy = cv2.drawContours(imgcopy, lettercontours, -1, color=(0,255,0), thickness=1)
# return imgcopy
letterboxes = np.empty((len(lettercontours), 4), dtype=int)
for i, contour in enumerate(lettercontours):
b = list(cv2.boundingRect(contour))
# b[0] -= (kernel.shape[0]-1)
# b[1] -= (kernel.shape[1]-1)
# b[2] += (2*kernel.shape[0]-1)
# b[3] += (2*kernel.shape[1]-1)
letterboxes[i] = b
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)
# return imgcopy
epsilonvalue = np.median(letterboxes, axis=0)[3]/divider
# print(epsilonvalue)
linemade = 255-cv2.morphologyEx(255-image, cv2.MORPH_DILATE, linekernel)
# return linemade
linecanny = cv2.Canny(linemade, 0, 500, None, 3)
linecontours, heirarchy = cv2.findContours(linecanny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# imgcopy = cv2.drawContours(imgcopy, linecontours, -1, color=(0,255,0), thickness=1)
# return imgcopy
# for i, contour in enumerate(linecontours):
# k = i+1
# colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)
# imgcopy = cv2.drawContours(imgcopy, [contour], -1, colour, thickness=1)
# return imgcopy
lineboxes = np.empty((len(linecontours), 4), dtype=int)
for i, contour in enumerate(linecontours):
b = list(cv2.boundingRect(contour))
# b[0] -= (kernel.shape[0]-1)
# b[1] -= (kernel.shape[1]-1)
# b[2] += (2*kernel.shape[0]-1)
# b[3] += (2*kernel.shape[1]-1)
lineboxes[i] = b
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3)
# return imgcopy
linelabels = dbscan(lineboxes, epsilonvalue, 1)
# print(linelabels)
numclusters = max(linelabels)
letterboxesbyline = [[] for _ in range(numclusters)]
for i, linebox in enumerate(lineboxes):
for j, letterbox in enumerate(letterboxes):
if containsamount(linebox, letterbox, 0.9):
letterboxesbyline[linelabels[i]-1].append(letterbox.tolist())
# print(len(letterboxesbyline))
# # COLOUR THE RECTANGLES GROUPED
# for i, setofboxes in enumerate(letterboxesbyline):
# k = i+1
# colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)
# # print(colour)
# # b = lineboxes[i]
# # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)
# print(i)
# for b in setofboxes:
# print(i)
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)
# return imgcopy
mergedboxes = np.empty((numclusters,4), dtype=int)
tobedeleted = []
for i in range(numclusters):
b = mf.mergerects(letterboxesbyline[i])
# if (b[0] == -1):
# tobedeleted.append(i)
mergedboxes[i] = b
# if (tobedeleted != []):
# # print("hi")
# mergedboxes = np.delete(mergedboxes, tobedeleted, axis=0)
# letterboxesbyline = [ele for idx, ele in enumerate(letterboxesbyline) if idx not in tobedeleted]
return mergedboxes, letterboxesbyline
def sublinerectretriever(image, divider=2):
shape = image.shape
imgcopy = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
# return imgcopy
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
# reducedimage = image
reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel, iterations=1)
# reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
# return reducedimage
canny = cv2.Canny(reducedimage, 0, 500, None, 3)
# return canny
contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# contours, heirarchy = cv2.findContours(255-reducedimage,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)
# return imgcopy
boundingboxes = np.empty((len(contours), 4), dtype=int)
for i, contour in enumerate(contours):
boundingboxes[i] = cv2.boundingRect(contour)
b = list(cv2.boundingRect(contour))
b[0] -= (kernel.shape[0]-1)
b[1] -= (kernel.shape[1]-1)
b[2] += (2*kernel.shape[0]-1)
b[3] += (2*kernel.shape[1]-1)
boundingboxes[i] = b
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)
# return imgcopy
epsilonvalue = np.median(boundingboxes, axis=0)[3]/3
epsilonvalue = np.median(boundingboxes, axis=0)[3]/divider
# print(epsilonvalue)
labels = dbscan(boundingboxes, epsilonvalue, 1)
# print(labels)
@ -199,6 +396,16 @@ def linerectretriever(image):
for i, item in enumerate(labels):
lineboxes[item-1].append(boundingboxes[i].tolist())
# # COLOUR THE RECTANGLES GROUPED
# for i, setofboxes in enumerate(lineboxes):
# k = i+1
# colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)
# # print(colour)
# for b in setofboxes:
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)
# return imgcopy
mergedboxes = np.empty((numclusters,4), dtype=int)
@ -226,26 +433,52 @@ def linerectretriever(image):
j += 1
return mergedboxes, lineboxes
def lineimagemaker(thresholded):
def linerectretriever(image, divider=2, sublines=False):
if (sublines):
return sublinerectretriever(image, divider=divider)
else:
return l1linerectretriever(image, divider=divider)
def lineimagemaker(thresholded, divider=2, sublines=False):
lineimages = []
mergedboxes, originalboxes = linerectretriever(thresholded)
mergedboxes, originalboxes = linerectretriever(thresholded, divider=divider, sublines=sublines)
# print(mergedboxes)
# print(originalboxes)
# return thresholded
mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)
# print(mergedboxesordering)
goodpoint = 0
for i, item in enumerate(mergedboxesordering):
if (mergedboxes[item][0] != -1):
goodpoint = i
break
mergedboxesordering = mergedboxesordering[goodpoint:]
mergedboxes = mergedboxes[mergedboxesordering]
originalboxes = [originalboxes[i] for i in mergedboxesordering]
out = cv2.cvtColor(thresholded.copy(), cv2.COLOR_GRAY2BGR)
# lineimages.append(out)
for i, box in enumerate(mergedboxes):
# print(box)
mask = np.zeros(thresholded.shape, dtype=np.uint8)
whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)
# print(originalboxes[i])
for lb in originalboxes[i]:
mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)
# lineimages[0] = cv2.rectangle(lineimages[0], (box[0],box[1]), (box[0]+box[2], box[1]+box[3]), (0,255,0), thickness=1)
invertedmask = cv2.bitwise_not(mask)
whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)
lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)
lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]
# lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)
# lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
lineimage = cv2.morphologyEx(lineimage, cv2.MORPH_CLOSE, kernel, iterations=1)
lineimages.append(lineimage)
# lineimages.append(mask)
return lineimages
@ -254,27 +487,84 @@ def lineimagemaker(thresholded):
def ismultiline(img):
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
reducedimage = cv2.morphologyEx(img, cv2.MORPH_DILATE, kernel)
# reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
canny = cv2.Canny(reducedimage, 0, 500, None, 3)
# return canny
contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)
# return imgcopy
boundingboxes = np.empty((len(contours), 4), dtype=int)
for i, contour in enumerate(contours):
boundingboxes[i] = cv2.boundingRect(contour)
b = boundingboxes[i]
# heightdetermination = np.median(boundingboxes, axis=0)[3]
heightdetermination = np.max(boundingboxes, axis=0)[3]
# print(heightdetermination)
if (img.shape[0] > (heightdetermination*1.5) + (2*50)):
return True
return False
### actual function
def lineisolator(image):
imgcopy = image.copy()
# imgcopy = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# return gray
# return thresholded
thresholded = gray
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
# kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
lineimages = lineimagemaker(thresholded)
lineimages = lineimagemaker(thresholded, 1.5, False)
# for i, lineimage in enumerate(lineimages):
# lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)
finallineimages = []
for i, lineimage in enumerate(lineimages):
templineimages = lineimagemaker(lineimage)
# if (i == 0):
# finallineimages.append(lineimages[0])
# continue
deskewedlineimage = minboxdeskew(lineimage, fill=255)
# finallineimages.append(deskewedlineimage)
# print(deskewedlineimage.shape)
if (ismultiline(deskewedlineimage)):
# print("hi" + str(i))
templineimages = lineimagemaker(deskewedlineimage, 2.5, True)
else:
templineimages = lineimagemaker(deskewedlineimage, 1.5, True)
# templineimages = lineimagemaker(deskewedlineimage, 2)
finallineimages += templineimages
# finallineimages += templineimages[1:]
for i, lineimage in enumerate(finallineimages):
deskewedli = minboxdeskew(lineimage, fill=255)
dim = int((deskewedli.shape[0]-100)//20)
# print(dim)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dim, dim))
deskewedli = cv2.morphologyEx(deskewedli, cv2.MORPH_DILATE, kernel,iterations=1)
finallineimages[i] = cv2.morphologyEx(deskewedli, cv2.MORPH_OPEN, kernel)
# mergedboxes, originalboxes = linerectretriever(thresholded)
@ -289,4 +579,7 @@ def lineisolator(image):
# out = tempfunc(thresholded)
# return out
return finallineimages
return finallineimages

View File

@ -0,0 +1,511 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"\n",
"\n",
"import scipy.stats as st\n",
"import math\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
"/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
]
}
],
"source": [
"import sys\n",
"sys.path.insert(0, '../../autocropper')\n",
"import myfunctions as mf\n",
"\n",
"import extractorfunctions as ef\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"pathname = \"../test_images/\"\n",
"filename = \"IMG_7640.jpg\"\n",
"# pathname = \"../temp/\"\n",
"# filename = \"test.jpg\"\n",
"# pathname = \"../result_images/\"\n",
"# filename = \"13.jpg\"\n",
"\n",
"# print(pathname+filename)\n",
"img = cv2.imread(pathname+filename)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# import easyocr\n",
"# reader = easyocr.Reader(['en'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def l1linerectretriever(image, divider=2):\n",
" shape = image.shape\n",
"\n",
" imgcopy = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)\n",
" # return imgcopy\n",
" \n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" linekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (shape[1]//40, 1))\n",
" # reducedimage = image\n",
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel, iterations=1)\n",
" # reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
" # return reducedimage\n",
" \n",
" charcanny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
" # return canny\n",
" \n",
" \n",
" lettercontours, heirarchy = cv2.findContours(charcanny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" # contours, heirarchy = cv2.findContours(255-reducedimage,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
"\n",
" # imgcopy = cv2.drawContours(imgcopy, lettercontours, -1, color=(0,255,0), thickness=1)\n",
" # return imgcopy\n",
"\n",
" letterboxes = np.empty((len(lettercontours), 4), dtype=int)\n",
" \n",
" for i, contour in enumerate(lettercontours):\n",
" b = list(cv2.boundingRect(contour))\n",
" # b[0] -= (kernel.shape[0]-1)\n",
" # b[1] -= (kernel.shape[1]-1)\n",
" # b[2] += (2*kernel.shape[0]-1)\n",
" # b[3] += (2*kernel.shape[1]-1)\n",
" letterboxes[i] = b\n",
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)\n",
" # return imgcopy\n",
" \n",
" epsilonvalue = np.median(letterboxes, axis=0)[3]/divider\n",
" # print(epsilonvalue)\n",
"\n",
"\n",
"\n",
" linemade = 255-cv2.morphologyEx(255-image, cv2.MORPH_DILATE, linekernel)\n",
" # return linemade\n",
"\n",
" linecanny = cv2.Canny(linemade, 0, 500, None, 3)\n",
" linecontours, heirarchy = cv2.findContours(linecanny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
"\n",
" # imgcopy = cv2.drawContours(imgcopy, linecontours, -1, color=(0,255,0), thickness=1)\n",
" # return imgcopy\n",
" # for i, contour in enumerate(linecontours):\n",
" # k = i+1\n",
" # colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)\n",
" # imgcopy = cv2.drawContours(imgcopy, [contour], -1, colour, thickness=1)\n",
" # return imgcopy\n",
"\n",
"\n",
"\n",
" lineboxes = np.empty((len(linecontours), 4), dtype=int)\n",
" \n",
" for i, contour in enumerate(linecontours):\n",
" b = list(cv2.boundingRect(contour))\n",
" # b[0] -= (kernel.shape[0]-1)\n",
" # b[1] -= (kernel.shape[1]-1)\n",
" # b[2] += (2*kernel.shape[0]-1)\n",
" # b[3] += (2*kernel.shape[1]-1)\n",
" lineboxes[i] = b\n",
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3)\n",
" # return imgcopy\n",
"\n",
" linelabels = ef.dbscan(lineboxes, epsilonvalue, 1)\n",
" # print(linelabels)\n",
" numclusters = max(linelabels)\n",
"\n",
" letterboxesbyline = [[] for _ in range(numclusters)]\n",
"\n",
" for i, linebox in enumerate(lineboxes):\n",
" for j, letterbox in enumerate(letterboxes):\n",
" if ef.containsamount(linebox, letterbox, 0.9):\n",
" letterboxesbyline[linelabels[i]-1].append(letterbox.tolist())\n",
"\n",
" # print(len(letterboxesbyline))\n",
"\n",
"\n",
" # # COLOUR THE RECTANGLES GROUPED\n",
" # for i, setofboxes in enumerate(letterboxesbyline):\n",
" # k = i+1\n",
" # colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)\n",
" # # print(colour)\n",
" # # b = lineboxes[i]\n",
" # # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)\n",
" # print(i)\n",
" # for b in setofboxes:\n",
" # print(i)\n",
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)\n",
" # return imgcopy\n",
"\n",
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
"\n",
" tobedeleted = []\n",
"\n",
" for i in range(numclusters):\n",
" b = mf.mergerects(letterboxesbyline[i])\n",
" # if (b[0] == -1):\n",
" # tobedeleted.append(i)\n",
" mergedboxes[i] = b\n",
"\n",
" # if (tobedeleted != []):\n",
" # # print(\"hi\")\n",
" # mergedboxes = np.delete(mergedboxes, tobedeleted, axis=0)\n",
" # letterboxesbyline = [ele for idx, ele in enumerate(letterboxesbyline) if idx not in tobedeleted]\n",
"\n",
" return mergedboxes, letterboxesbyline\n",
"\n",
"\n",
"\n",
"def sublinerectretriever(image, divider=2):\n",
" shape = image.shape\n",
" \n",
" imgcopy = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)\n",
" # return imgcopy\n",
" \n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" # reducedimage = image\n",
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel, iterations=1)\n",
" # reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
" # return reducedimage\n",
" \n",
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
" # return canny\n",
" \n",
" \n",
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" # contours, heirarchy = cv2.findContours(255-reducedimage,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
"\n",
" # imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)\n",
" # return imgcopy\n",
"\n",
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
" \n",
" for i, contour in enumerate(contours):\n",
" b = list(cv2.boundingRect(contour))\n",
" b[0] -= (kernel.shape[0]-1)\n",
" b[1] -= (kernel.shape[1]-1)\n",
" b[2] += (2*kernel.shape[0]-1)\n",
" b[3] += (2*kernel.shape[1]-1)\n",
" boundingboxes[i] = b\n",
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)\n",
" # return imgcopy\n",
" \n",
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/divider\n",
" # print(epsilonvalue)\n",
" \n",
" labels = ef.dbscan(boundingboxes, epsilonvalue, 1)\n",
" # print(labels)\n",
" numclusters = max(labels)\n",
" lineboxes = [[] for _ in range(numclusters)]\n",
"\n",
" for i, item in enumerate(labels):\n",
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
" \n",
" \n",
" # # COLOUR THE RECTANGLES GROUPED\n",
" # for i, setofboxes in enumerate(lineboxes):\n",
" # k = i+1\n",
" # colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)\n",
" # # print(colour)\n",
" # for b in setofboxes:\n",
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)\n",
" # return imgcopy\n",
" \n",
" \n",
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
" \n",
" \n",
" for i in range(numclusters):\n",
" b = mf.mergerects(lineboxes[i])\n",
" mergedboxes[i] = b\n",
" \n",
" j = 0\n",
" while (j < len(mergedboxes)):\n",
" i = 0\n",
" while (i < len(mergedboxes)):\n",
" if (i == j):\n",
" i += 1\n",
" continue\n",
" outerbox = mergedboxes[j]\n",
" innerbox = mergedboxes[i]\n",
" if ef.containsamount(outerbox, innerbox, 1) or ef.aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
" lineboxes.pop(i)\n",
" if (i < j):\n",
" j -= 1\n",
" i -= 1\n",
" i += 1\n",
" j += 1\n",
" \n",
" return mergedboxes, lineboxes\n",
"\n",
"def linerectretriever(image, divider=2, sublines=False):\n",
"\n",
" if (sublines):\n",
" return sublinerectretriever(image, divider=divider)\n",
" else:\n",
" return l1linerectretriever(image, divider=divider)\n",
"\n",
"\n",
"def lineimagemaker(thresholded, divider=2, sublines=False):\n",
" lineimages = []\n",
" mergedboxes, originalboxes = linerectretriever(thresholded, divider=divider, sublines=sublines)\n",
" # print(mergedboxes)\n",
" # print(originalboxes)\n",
" # return thresholded\n",
" \n",
" mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)\n",
" # print(mergedboxesordering)\n",
" \n",
" goodpoint = 0\n",
" for i, item in enumerate(mergedboxesordering):\n",
" if (mergedboxes[item][0] != -1):\n",
" goodpoint = i\n",
" break\n",
" mergedboxesordering = mergedboxesordering[goodpoint:]\n",
"\n",
" mergedboxes = mergedboxes[mergedboxesordering]\n",
" originalboxes = [originalboxes[i] for i in mergedboxesordering]\n",
" out = cv2.cvtColor(thresholded.copy(), cv2.COLOR_GRAY2BGR)\n",
" # lineimages.append(out)\n",
" for i, box in enumerate(mergedboxes):\n",
" # print(box)\n",
" mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
" whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)\n",
" # print(originalboxes[i])\n",
" for lb in originalboxes[i]:\n",
" mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
"\n",
" # lineimages[0] = cv2.rectangle(lineimages[0], (box[0],box[1]), (box[0]+box[2], box[1]+box[3]), (0,255,0), thickness=1)\n",
"\n",
" invertedmask = cv2.bitwise_not(mask)\n",
" whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)\n",
" lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)\n",
" lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
" # lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)\n",
" # lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" lineimage = cv2.morphologyEx(lineimage, cv2.MORPH_CLOSE, kernel, iterations=1)\n",
" lineimages.append(lineimage)\n",
" # lineimages.append(mask)\n",
" return lineimages\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def lineisolator(image):\n",
" # imgcopy = image.copy()\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" # thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
" # return gray\n",
" # return thresholded\n",
" thresholded = gray\n",
" \n",
" \n",
" # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" \n",
" \n",
" \n",
" lineimages = lineimagemaker(thresholded, 1.5, False)\n",
" \n",
" # for i, lineimage in enumerate(lineimages):\n",
" # lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)\n",
"\n",
" \n",
" finallineimages = []\n",
" \n",
" for i, lineimage in enumerate(lineimages):\n",
" # if (i == 0):\n",
" # finallineimages.append(lineimages[0])\n",
" # continue\n",
" deskewedlineimage = ef.minboxdeskew(lineimage, fill=255)\n",
"\n",
" # finallineimages.append(deskewedlineimage)\n",
" # print(deskewedlineimage.shape)\n",
"\n",
" if (ef.ismultiline(deskewedlineimage)):\n",
" # print(\"hi\" + str(i))\n",
" templineimages = lineimagemaker(deskewedlineimage, 2.5, True)\n",
" else:\n",
" templineimages = lineimagemaker(deskewedlineimage, 1.5, True)\n",
"\n",
" # templineimages = lineimagemaker(deskewedlineimage, 2)\n",
"\n",
" finallineimages += templineimages\n",
" # finallineimages += templineimages[1:]\n",
"\n",
" for i, lineimage in enumerate(finallineimages):\n",
" deskewedli = ef.minboxdeskew(lineimage, fill=255)\n",
" dim = int((deskewedli.shape[0]-100)//20)\n",
" # print(dim)\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dim, dim))\n",
" deskewedli = cv2.morphologyEx(deskewedli, cv2.MORPH_DILATE, kernel,iterations=1)\n",
" finallineimages[i] = cv2.morphologyEx(deskewedli, cv2.MORPH_OPEN, kernel)\n",
" \n",
" \n",
" # mergedboxes, originalboxes = linerectretriever(thresholded) \n",
" # mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
" # for i, box in enumerate(mergedboxes):\n",
" # for lb in originalboxes[i]:\n",
" # mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
"\n",
" # return mask\n",
" \n",
" \n",
" # out = tempfunc(thresholded)\n",
" # return out\n",
" \n",
" return finallineimages"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# result = reader.readtext(pathname+filename)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# print(result)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"bing = mf.houghlineprocessing(img)\n",
"# outs = bing\n",
"outs = ef.lineisolator(bing)\n",
"# # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"# # outs = linerectretriever(gray)\n",
"# outs = getSkewAngle(img)\n",
"# outs = minboxdeskew(img, fill=(255,255,255))\n",
"# bing = cv2.cvtColor(bing, cv2.COLOR_BGR2GRAY)\n",
"# bing = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"# outs = bing\n",
"# outs = linerectretriever(bing, 1.5, False)\n",
"# outs = lineimagemaker(bing, 1.5, False)\n",
"# for i, _ in enumerate(outs):\n",
"# outs[i] = ef.minboxdeskew(outs[i], fill=255)\n",
"\n",
"# outs = img"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# print(outs)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# for out in outs:\n",
"# if (out.shape[0] > out.shape[1]):\n",
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"# else:\n",
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, width=1000))\n",
"# key = cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()\n",
"# if (key == 107):\n",
"# break\n",
"if (isinstance(outs, np.ndarray)):\n",
" if (outs.shape[0] > outs.shape[1]):\n",
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, height=1350))\n",
" else:\n",
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n",
"else:\n",
" for i, out in enumerate(outs):\n",
" # cv2.imwrite(\"../result_images/\"+str(i)+\".jpg\", out)\n",
" if (out.shape[0] > out.shape[1]):\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
" else:\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imwrite(\"../temp/test.jpg\", outs[2])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,260 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"# https://github.com/NielsRogge/Transformers-Tutorials/blob/master/TrOCR/Inference_with_TrOCR_%2B_Gradio_demo.ipynb\n",
"# https://github.com/NielsRogge/Transformers-Tutorials/tree/master/TrOCR\n",
"# https://huggingface.co/docs/transformers/model_doc/trocr"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"from transformers import TrOCRProcessor\n",
"from transformers import VisionEncoderDecoderModel\n",
"\n",
"from PIL import Image\n",
"import torch"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.insert(0, '../../autocropper')\n",
"import myfunctions as mf\n",
"\n",
"import extractorfunctions as ef\n",
"import cv2"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-small-printed and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
],
"source": [
"processor = TrOCRProcessor.from_pretrained('microsoft/trocr-small-printed')\n",
"model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-small-printed')"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"device = torch.device(\"cpu\")\n",
"if torch.cuda.is_available:\n",
" device = torch.device(\"cuda:0\")\n",
" \n",
"model = model.to(device)\n"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"filename = \"IMG_7640.jpg\"\n",
"pathname = \"../test_images/\""
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"img = cv2.imread(pathname+filename)"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"clarified = mf.houghlineprocessing(img)\n",
"lineimages = ef.lineisolator(clarified)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"# print(len(lineimages))"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"PILversions = []\n",
"for line in lineimages:\n",
" rgbline = cv2.cvtColor(line, cv2.COLOR_GRAY2RGB)\n",
" PILversions.append(Image.fromarray(rgbline))"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAogAAAB8CAIAAABlt9bLAAAI4klEQVR4nO3d23ajuBYF0FCj//+XfR7cx52yMVddtrTnfMqopGwQQksSIJbH4/EDAMTwz+q/Lsvy/EFsA0BLy1v0viJ5lZwGgKr+nPrrZVm2kxsAuONcMD+JZwCo5EowAwCVCGYACEQwA0AgghkAAhHMABCIYAaAQNZX/tplpZEaLLgGwMURs+eYAaCGiyNm7tjt1izL+1KpACQhmJs6PtPw+ZeiGiADN3+1Y/4fgF2CuRGpDMARghkAAhHMwzDmBshAMLcgUwE4SDBXJ5UBOE4wA0AgghkAAhHMABCIYAaAQCzJCfDu9z2bq6vhehcc9QjmYTj/oY23Jyk8WEFjgjmix+OhPw6Qk2AO55nE8hggJzd/xSKPAZITzIFIZQAEMwAE8lcwu/kQAPq6OGI26QoANfwXzIbLNShVAE65MmI2XAaAStz8BQCBzLPAyOqkscE9AGOZJ5hXWdgSgLFMHswvEhqAIbjGXJFbsgE4SzBXsSzLhVQW5ABkmcqu6m2e/E6+mnIHSE4wl2TIC5BZkcGVYL5LGAPw83ccfIuGI4EtmOMyrQ2QkGAOquVA/O279AMAKlmWZbeNFcy8e+b0q+qIbYCCdrPZ41IA0NT2nKgRczoHJ8nd1AbQhWAewJFrEgc/p/iHmNkG+Ck6mDGVnUWlEbCBNZnpmFKDYOYu2cw0ri2mC2WZyqaAtxu5U/ndjn+WgNeET8xxpBLBDNdtj66+/dbSMcAGwXzXatsaZzas8UIlkuYn0tEHRiSYq3g8Hjlb5+TZfPagu8V9m6kFchLMUUzT9GS+3nxT8m7NhreSGeXKvY4F1wjmECqdtzlH7VUp0l6e2bxR/tF6hG+beuddQ2QjmGtJO5vNHZnHWLvny5ETarirA3H6E12KbuOYRiiT4y609hszZIJ5Wne6BcHvaOvFnXT11FsAp30xfu7L2S5F1W1eDeCAZ/fBLkuE6xqXS+9VP9965IKZv2xU6ORzAO33PUk21y7YBkPS+7vwOe9dcIN3H+r71hGPX/12n0j8rdTuvHWhbh791UMvmOd0ra7sVtzjtXCIs5o86l0jCN5bvTD/v/rvZcsteKEdV2NHlmX5p96n00ulVH79WcLaknCXZ5Wqyzhfve2+R202IN1a2d2Pa0w1mipFfVOe/ODT/VW7y56A3U/nVMuYnw7mmI1FngNWw9ljGrMOMKKhz9wGURGqfLpE4/Mbg5RDs834c+rLYrbIQY4ZnxwaNqge9dQr2/bxXOrrBqpvbv4i11W3xg6+b2r3f1GKsi0iYaPRMtfTXWPOVpkOOrK2Q6prPPc9Ho/VyqYGdjR94bc8T0dsDUbZ5lwj5ulPyzsOdoFHqdl9bZfk9rLPamklgxasEzOhXMHMNud2d4OGxwVlH+rb/bQuBdvm2cKOp22c9UQnI5hnI1zHpYG7bDsCe6Xy6leXXVQ5guCbNyLBPBWpHMTxpkqLdtCRZemeP6yuBd1Ysy+tsZqQZqS7RDd/aQFpSevWS8cz/fF/239T6usq1bGxmsrdAr/wl90ZMQOzad/+nvrGvuvaHtnUsK+cevPalwZH/PL9DRe+RTBXEb9CNzBK57Qet8YUFPlC5oUN65XNA3UgNoSqCddKaXsXhp/KjllvmMydhkAV5aZQOfSp8eYFL40jdndh+GCewAT1LIMi2Wydljtilpvzt2WtTlLaprI7S1LPoIh6bwi+oPsGnNJga6teuxmrtG8yYoYWDJQpywh1YoIZjtJIFXS/MPN0dPLs6aqBHnMq5Vww6/VDEc6jHx2dQThM7V25xvz53EKEpXaggYIPkER+/mcUr2OhJLcpn7FcnMpe/vb529sbxmlni925ek38lZsGUqowlSQzqXVX9rfzRBgE8W2F/W2av6ewCy8AE2j9uJTAHprD9FIqm0M9/zM066wxjSh3ZRt/MJyEN4vWULYM3aBKe8XbgSjBDKSlfxOZjk57gjkpJ1spBcfNmQ9KhHHzQOV/alNbrvSuj1VElGB2OBnakRfxHjFQNhRXqgxfThXmrAk0ynbyW5Rgnqk9GmVfrm3nxmNyv397e+tG9fjl2idkLr2n+A+kSbtViqUUL7FI7ewaF9/CuNwWTeXyndvWHvm9+yrYZBzQXVFGzNxXuylfXUnGObbNuPm+mxV7vpIceo/OXh3P2UMVzNkNfZKTxHxXBwJuEpcVP5qCuafincF6vUvtyGU5u/yVzJHNs95ollONqvXnp8LNkIylRsUK1Q525+QqaI5sPk7lmcyRA/rXzV/Ha8CgVZxvHNBvSr05zfLaBSlMgqg0+XHxruwjn+7MYVzfaq/3DAYhm5lYxceldlsubVwNQRqsoY/pkQIs8qRZWkXO/ef/VbDMp+dzzEO33ZF1z+YkR9bTxtf8rpxerlWbWjqiLHdlq5rUcLADZFRHR3fW+Cu+MRwx/Mpfb4n7WZNEMqecbYx2Z2W1bi/Fi0LZMqXhg/mNGH7qPpvNk6MAnDVbMPPSJZszd4xk8IaD99O9fvbcxwbd7ullucYMY8ncxfk5ELrJk+ls9Tj7BszkxdudEXN5uy/GadbmNu5ZJ8+SgmYqyZs18HMYLTOuqX179kyVtjvBXFf3yvragNrNWfc9ZXoi+aZ62ez0L8tUNgU4LYlPLf3RuRmEYM6iRqs05etPuu/RZOV5mSud2yq9zEOZR2AqO5HPae07E93ygyOC3EI8ZXW9XLZlj8iUZXtQpeotmDP6PJEyn1qret1n5EBwSvd+jxpbg6ls+KpxozNrG9f96sDclO18BDOEMH3zOv0OdtSrbB3TSgQzbJnyBrde5Ec97fcxQ6n24hozHFLpqrPWrapUxdvyenOqgm3PiBn6yDkQb7nLinfob8nMiBlOKPVux8xNW5uBXdoSrl28aQu2JcEMBWitTtldT77Uh+dUL5uVbRumsoF5SI6nSiv9Ff9MVhkxAz0Fae5bLinTeDL/zncFOTrZCGaAf0V7JWuR7bncD5DKvQhmgA7C3qAeYW3z5AQzAP+p2mNYvWRgaP6m1nuzASCD3TmGszn7Px23ddef5EmFAAAAAElFTkSuQmCC",
"text/plain": [
"<PIL.Image.Image image mode=RGB size=648x124>"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# PILversions[9]"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
"# image = Image.open(\"../result_images/6.jpg\").convert(\"RGB\")\n",
"# image"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
"# pixel_values = processor(image, return_tensors=\"pt\").pixel_values\n",
"# # print(pixel_values.shape)\n",
"# # print(image)\n",
"# # print(pixel_values)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"# pixel_values = processor(image, return_tensors=\"pt\").pixel_values\n",
"# # print(pixel_values.shape)\n",
"# generated_ids = model.generate(pixel_values)\n",
"# generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
"# print(generated_text)"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"finalstring = \"\""
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [],
"source": [
"for image in PILversions:\n",
" pixel_values = processor(image, return_tensors=\"pt\").pixel_values\n",
" pixel_values = pixel_values.to(device)\n",
" generated_ids = model.generate(pixel_values)\n",
" generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
" finalstring = finalstring + generated_text + \"\\n\"\n",
" # print(generated_text)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WALKER'S\n",
"CHOCOLATES\n",
"NO RETURNS OR EXCHANGES\n",
"ON FOOD ITEMS.\n",
"REG 09-22-2023 12:08\n",
"000021\n",
"1 BAKING NT $14.40\n",
"TL $14.40\n",
"CREDIT : $14.40\n",
"LIFE S SHORT\n",
"EAT CHOCOLATE\n",
"\n"
]
}
],
"source": [
"print(finalstring)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 97,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -18,392 +18,26 @@
},
{
"cell_type": "code",
"execution_count": 98,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.insert(0, '../../autocropper')\n",
"import myfunctions as mf\n"
"import myfunctions as mf\n",
"\n",
"import extractorfunctions as ef\n"
]
},
{
"cell_type": "code",
"execution_count": 99,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def rectcenterpt(rect, xywhrect=True, retint=False):\n",
" if (xywhrect):\n",
" x = rect[0] + rect[2]/2\n",
" y = rect[1] + rect[3]/2\n",
" else:\n",
" x = (rect[0]+rect[2])/2\n",
" y = (rect[1]+rect[3])/2\n",
" if (retint):\n",
" x = int(x)\n",
" y = int(y)\n",
" return (x,y)\n",
"\n",
"def containsamount(outerrect, innerrect, percentage=1):\n",
" tinyrect = mf.overlapRect([outerrect, innerrect])\n",
" tinyarea = tinyrect[2]*tinyrect[3]\n",
" innerrectarea = innerrect[2]*innerrect[3]\n",
" if (tinyarea/innerrectarea >= percentage):\n",
" return True\n",
" return False\n",
"\n",
"def aboveandbelow(outerrect, innerrect):\n",
" if (outerrect[1] < innerrect[1] and outerrect[1]+outerrect[3] > innerrect[1]+innerrect[3]):\n",
" return True\n",
" return False\n",
"\n",
"## Below code is an almost direct copy from https://github.com/scrunts23/CS-Data-Science-Build-Week-1/blob/master/model/dbscan.py\n",
"\n",
"def dbscan(D, eps, MinPts):\n",
" '''\n",
" Cluster the dataset `D` using the DBSCAN algorithm.\n",
" \n",
" dbscan takes a dataset `D` (a list of vectors), a threshold distance\n",
" `eps`, and a required number of points `MinPts`.\n",
" \n",
" It will return a list of cluster labels. The label -1 means noise, and then\n",
" the clusters are numbered starting from 1.\n",
" '''\n",
" \n",
" # This list will hold the final cluster assignment for each point in D.\n",
" # There are two reserved values:\n",
" # -1 - Indicates a noise point\n",
" # 0 - Means the point hasn't been considered yet.\n",
" # Initially all labels are 0. \n",
" labels = [0]*len(D)\n",
"\n",
" # C is the ID of the current cluster. \n",
" C = 0\n",
" \n",
" # This outer loop is just responsible for picking new seed points--a point\n",
" # from which to grow a new cluster.\n",
" # Once a valid seed point is found, a new cluster is created, and the \n",
" # cluster growth is all handled by the 'expandCluster' routine.\n",
" \n",
" # For each point P in the Dataset D...\n",
" # ('P' is the index of the datapoint, rather than the datapoint itself.)\n",
" for P in range(0, len(D)):\n",
" \n",
" # Only points that have not already been claimed can be picked as new \n",
" # seed points. \n",
" # If the point's label is not 0, continue to the next point.\n",
" if not (labels[P] == 0):\n",
" continue\n",
" \n",
" # Find all of P's neighboring points.\n",
" NeighborPts = region_query(D, P, eps)\n",
" \n",
" # If the number is below MinPts, this point is noise. \n",
" # This is the only condition under which a point is labeled \n",
" # NOISE--when it's not a valid seed point. A NOISE point may later \n",
" # be picked up by another cluster as a boundary point (this is the only\n",
" # condition under which a cluster label can change--from NOISE to \n",
" # something else).\n",
" if len(NeighborPts) < MinPts:\n",
" labels[P] = -1\n",
" # Otherwise, if there are at least MinPts nearby, use this point as the \n",
" # seed for a new cluster. \n",
" else: \n",
" C += 1\n",
" grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts)\n",
" \n",
" # All data has been clustered!\n",
" return labels\n",
"\n",
"\n",
"def grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts):\n",
" '''\n",
" Grow a new cluster with label `C` from the seed point `P`.\n",
" \n",
" This function searches through the dataset to find all points that belong\n",
" to this new cluster. When this function returns, cluster `C` is complete.\n",
" \n",
" Parameters:\n",
" `D` - The dataset (a list of vectors)\n",
" `labels` - List storing the cluster labels for all dataset points\n",
" `P` - Index of the seed point for this new cluster\n",
" `NeighborPts` - All of the neighbors of `P`\n",
" `C` - The label for this new cluster. \n",
" `eps` - Threshold distance\n",
" `MinPts` - Minimum required number of neighbors\n",
" '''\n",
"\n",
" # Assign the cluster label to the seed point.\n",
" labels[P] = C\n",
" \n",
" # Look at each neighbor of P (neighbors are referred to as Pn). \n",
" # NeighborPts will be used as a FIFO queue of points to search--that is, it\n",
" # will grow as we discover new branch points for the cluster. The FIFO\n",
" # behavior is accomplished by using a while-loop rather than a for-loop.\n",
" # In NeighborPts, the points are represented by their index in the original\n",
" # dataset.\n",
" i = 0\n",
" while i < len(NeighborPts): \n",
" \n",
" # Get the next point from the queue. \n",
" Pn = NeighborPts[i]\n",
" \n",
" # If Pn was labelled NOISE during the seed search, then we\n",
" # know it's not a branch point (it doesn't have enough neighbors), so\n",
" # make it a leaf point of cluster C and move on.\n",
" if labels[Pn] == -1:\n",
" labels[Pn] = C\n",
" \n",
" # Otherwise, if Pn isn't already claimed, claim it as part of C.\n",
" elif labels[Pn] == 0:\n",
" # Add Pn to cluster C (Assign cluster label C).\n",
" labels[Pn] = C\n",
" \n",
" # Find all the neighbors of Pn\n",
" PnNeighborPts = region_query(D, Pn, eps)\n",
" \n",
" # If Pn has at least MinPts neighbors, it's a branch point!\n",
" # Add all of its neighbors to the FIFO queue to be searched. \n",
" if len(PnNeighborPts) >= MinPts:\n",
" NeighborPts = NeighborPts + PnNeighborPts\n",
" # If Pn *doesn't* have enough neighbors, then it's a leaf point.\n",
" # Don't queue up it's neighbors as expansion points.\n",
" #else:\n",
" # Do nothing \n",
" #NeighborPts = NeighborPts \n",
" \n",
" # Advance to the next point in the FIFO queue.\n",
" i += 1 \n",
" \n",
" # We've finished growing cluster C!\n",
"\n",
"\n",
"def region_query(D, P, eps):\n",
" '''\n",
" Find all points in dataset `D` within distance `eps` of point `P`.\n",
" \n",
" This function calculates the distance between a point P and every other \n",
" point in the dataset, and then returns only those points which are within a\n",
" threshold distance `eps`.\n",
" '''\n",
" neighbors = []\n",
" \n",
" # For each point in the dataset...\n",
" for Pn in range(0, len(D)):\n",
" \n",
" # If the distance is below the threshold, add it to the neighbors list.\n",
" if (rectcenterpt(D[P])[1] - rectcenterpt(D[Pn])[1]) < eps:\n",
" neighbors.append(Pn)\n",
" \n",
" return neighbors"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"def tempfunc(image):\n",
" shape = image.shape\n",
" \n",
"\n",
" # blackout = np.zeros(tempout.shape, dtype=np.uint8)\n",
" \n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)\n",
" reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
" \n",
" tempout = cv2.cvtColor(reducedimage, cv2.COLOR_GRAY2BGR)\n",
" \n",
" \n",
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
" \n",
" \n",
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
" \n",
" for i, contour in enumerate(contours):\n",
" boundingboxes[i] = cv2.boundingRect(contour)\n",
" \n",
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/2\n",
" \n",
" labels = dbscan(boundingboxes, epsilonvalue, 1)\n",
" print(labels)\n",
" numclusters = max(labels)\n",
" lineboxes = [[] for _ in range(numclusters)]\n",
"\n",
" for i, item in enumerate(labels):\n",
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
" \n",
" \n",
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
" \n",
" \n",
" for i in range(numclusters):\n",
" b = mf.mergerects(lineboxes[i])\n",
" mergedboxes[i] = b\n",
" \n",
" j = 0\n",
" while (j < len(mergedboxes)):\n",
" i = 0\n",
" while (i < len(mergedboxes)):\n",
" if (i == j):\n",
" i += 1\n",
" continue\n",
" outerbox = mergedboxes[j]\n",
" innerbox = mergedboxes[i]\n",
" if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
" lineboxes.pop(i)\n",
" if (i < j):\n",
" j -= 1\n",
" i -= 1\n",
" i += 1\n",
" j += 1\n",
" \n",
" # return mergedboxes, lineboxes\n",
" for i, b in enumerate(mergedboxes):\n",
" tempout = cv2.rectangle(tempout, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=1)\n",
" for t in lineboxes[i]:\n",
" tempout = cv2.rectangle(tempout, (t[0],t[1]), (t[0]+t[2], t[1]+t[3]), (0,0,255), thickness=1)\n",
" \n",
" print(epsilonvalue)\n",
" return tempout"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [],
"source": [
"def linerectretriever(image):\n",
" shape = image.shape\n",
" \n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)\n",
" reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
" \n",
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
" \n",
" \n",
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
" \n",
" for i, contour in enumerate(contours):\n",
" boundingboxes[i] = cv2.boundingRect(contour)\n",
" \n",
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/3\n",
" \n",
" labels = dbscan(boundingboxes, epsilonvalue, 1)\n",
" # print(labels)\n",
" numclusters = max(labels)\n",
" lineboxes = [[] for _ in range(numclusters)]\n",
"\n",
" for i, item in enumerate(labels):\n",
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
" \n",
" \n",
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
" \n",
" \n",
" for i in range(numclusters):\n",
" b = mf.mergerects(lineboxes[i])\n",
" mergedboxes[i] = b\n",
" \n",
" j = 0\n",
" while (j < len(mergedboxes)):\n",
" i = 0\n",
" while (i < len(mergedboxes)):\n",
" if (i == j):\n",
" i += 1\n",
" continue\n",
" outerbox = mergedboxes[j]\n",
" innerbox = mergedboxes[i]\n",
" if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
" lineboxes.pop(i)\n",
" if (i < j):\n",
" j -= 1\n",
" i -= 1\n",
" i += 1\n",
" j += 1\n",
" \n",
" return mergedboxes, lineboxes\n",
" \n",
"def lineimagemaker(thresholded):\n",
" lineimages = []\n",
" mergedboxes, originalboxes = linerectretriever(thresholded)\n",
" \n",
" mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)\n",
" mergedboxes = mergedboxes[mergedboxesordering]\n",
" originalboxes = [originalboxes[i] for i in mergedboxesordering]\n",
" for i, box in enumerate(mergedboxes):\n",
" mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
" whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)\n",
" for lb in originalboxes[i]:\n",
" mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
"\n",
" invertedmask = cv2.bitwise_not(mask)\n",
" whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)\n",
" lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)\n",
" lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
" # lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)\n",
" # lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
" lineimages.append(lineimage)\n",
" # lineimages.append(mask)\n",
" return lineimages\n",
" \n",
"\n",
"def lineisolator(image):\n",
" imgcopy = image.copy()\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
" \n",
" \n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
" \n",
" \n",
" \n",
" lineimages = lineimagemaker(thresholded)\n",
" \n",
" # for i, lineimage in enumerate(lineimages):\n",
" # lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)\n",
"\n",
" \n",
" finallineimages = []\n",
" for i, lineimage in enumerate(lineimages):\n",
" templineimages = lineimagemaker(lineimage)\n",
" finallineimages += templineimages\n",
" \n",
" \n",
" # mergedboxes, originalboxes = linerectretriever(thresholded) \n",
" # mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
" # for i, box in enumerate(mergedboxes):\n",
" # for lb in originalboxes[i]:\n",
" # mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
"\n",
" # return mask\n",
" \n",
" \n",
" # out = tempfunc(thresholded)\n",
" # return out\n",
" \n",
" return finallineimages\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"pathname = \"../adjusted_test_images/\"\n",
"filename = \"IMG_7594.jpg\"\n",
"# pathname = \"../test_images/\"\n",
"pathname = \"../result_images/\"\n",
"filename = \"13.jpg\"\n",
"\n",
"# print(pathname+filename)\n",
"img = cv2.imread(pathname+filename)"
@ -411,40 +45,131 @@
},
{
"cell_type": "code",
"execution_count": 103,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"outs = lineisolator(img)"
"# import easyocr\n",
"# reader = easyocr.Reader(['en'])"
]
},
{
"cell_type": "code",
"execution_count": 104,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"# thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
"# monke = tempfunc(thresholded)\n",
"# cv2.imwrite(\"../temp/monke.jpg\", monke)"
"def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):\n",
" borderType = cv2.BORDER_CONSTANT\n",
" out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)\n",
" return out\n",
"\n",
"def mergecontours(contours):\n",
" cont = np.vstack(contours)\n",
" finalcontour = cv2.convexHull(cont)\n",
" return finalcontour"
]
},
{
"cell_type": "code",
"execution_count": 105,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(img, height=1000))\n",
"# # cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
"def getSkewAngle(cvImage) -> float:\n",
" # Prep image, copy, convert to gray scale, blur, and threshold\n",
" newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))\n",
" # return newImage\n",
" gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)\n",
" blur = cv2.GaussianBlur(gray, (9, 9), 0)\n",
" thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]\n",
"\n",
" # Apply dilate to merge text into meaningful lines/paragraphs.\n",
" # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.\n",
" # But use smaller kernel on Y axis to separate between different blocks of text\n",
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))\n",
" dilate = cv2.dilate(thresh, kernel, iterations=5)\n",
" # return dilate\n",
"\n",
" # Find all contours\n",
" contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n",
" contours = sorted(contours, key = cv2.contourArea, reverse = True)\n",
"\n",
" # Find largest contour and surround in min area box\n",
" largestContour = contours[0]\n",
"\n",
" mergedcontour = mergecontours(contours)\n",
"\n",
" # return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)\n",
" minAreaRect = cv2.minAreaRect(mergedcontour)\n",
" # return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)\n",
" # minAreaRect = cv2.minAreaRect(largestContour)\n",
"\n",
" box = cv2.boxPoints(minAreaRect)\n",
" box = np.intp(box) \n",
" newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)\n",
" # return newImage\n",
"\n",
" # Determine the angle. Convert it to the value that was originally used to obtain skewed image\n",
" angle = minAreaRect[-1]\n",
" print(angle)\n",
" if angle > 45:\n",
" angle = angle - 90\n",
" if angle < -45:\n",
" angle = 90 + angle\n",
" print(angle)\n",
" return angle\n",
"\n",
"def minboxdeskew(img, fill=(0,0,0)):\n",
" angle = getSkewAngle(img)\n",
" rotated = mf.rotate(img, angle, fill=fill)\n",
" return rotated"
]
},
{
"cell_type": "code",
"execution_count": 106,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# result = reader.readtext(pathname+filename)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# outs = ef.lineisolator(img)\n",
"# # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"# # outs = linerectretriever(gray)\n",
"# outs = getSkewAngle(img)\n",
"outs = minboxdeskew(img, fill=(255,255,255))\n",
"# outs = img"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# print(outs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -464,6 +189,7 @@
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n",
"else:\n",
" for i, out in enumerate(outs):\n",
" cv2.imwrite(\"../result_images/\"+str(i)+\".jpg\", out)\n",
" if (out.shape[0] > out.shape[1]):\n",
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
" else:\n",
@ -472,69 +198,6 @@
"cv2.destroyAllWindows()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs[30], width=1000))\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"# results = tempfunc(outs[30])"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(results, width=1000))\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"# https://medium.com/@vatvenger/extracting-lines-from-ocr-a8f410448fc\n",
"# https://www.width.ai/post/the-best-ways-to-extract-text-from-images-without-tesseract-python"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"## Potential Next Steps. Isolate a line of text and then feed that into the OCR Model to extract the text."
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
"# #IDEA:\n",
"# 1. Isolate lines into rectangles\n",
"# 2. feed that rectangle portion of the image into an OCR model\n",
"# 3. append that to the final output string with the end character for nextline\n",
"# 4. give the whole final string to a model which gives the outputs"
]
}
],
"metadata": {

View File

@ -10,18 +10,20 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
#-y is for accepting yes when the system asked us for installing the package
RUN apt-get update && \
apt-get install -y build-essential cmake git gdb pkg-config valgrind systemd-coredump python3-opencv libopencv-dev python3-pip python3-dev && \
apt-get install -y build-essential cmake git gdb pkg-config valgrind systemd-coredump python3 python3-opencv libopencv-dev python3-pip && \
apt-get -y clean && apt-get -y autoremove
RUN python3 -m pip install --upgrade pip
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
RUN pip3 install -q transformers && pip3 install sentencepiece && pip3 install protobuf
RUN pip3 install datasets && pip3 install jupyter notebook && pip3 install matplotlib && pip3 install deskew
RUN pip3 install easyocr && pip3 uninstall -y opencv-python-headless
ENV HF_DATASETS_CACHE="/mnt/code/.cache/datasets"
ENV TORCH_HOME="/mnt/code/.cache/torch"
ENV TRANSFORMERS_CACHE="/mnt/code/.cache/transformers"

View File

@ -81,4 +81,17 @@ https://en.cppreference.com/w/cpp/language/constraints
Models/Ideas:
https://huggingface.co/docs/transformers/model_doc/donut
https://huggingface.co/blog/document-ai
https://huggingface.co/EleutherAI/gpt-neo-125m
https://huggingface.co/EleutherAI/gpt-neo-125m
https://www.width.ai/post/extracting-information-from-unstructured-text-using-algorithms
https://towardsdatascience.com/machine-learning-text-processing-1d5a2d638958
https://towardsdatascience.com/deep-learning-for-specific-information-extraction-from-unstructured-texts-12c5b9dceada
NER:
https://medium.com/mysuperai/what-is-named-entity-recognition-ner-and-how-can-i-use-it-2b68cf6f545d
https://medium.com/@shivamcse17818/bert-model-for-text-extraction-with-code-pytorch-91c13ef82e7b
https://github.com/dayyass/pytorch-ner
https://github.com/senadkurtisi/pytorch-NER/tree/main
https://towardsdatascience.com/named-entity-recognition-with-bert-in-pytorch-a454405e0b6a
https://www.kaggle.com/code/dianalaveena/ner-using-bert-pytorch/notebook
https://wandb.ai/mostafaibrahim17/ml-articles/reports/Named-Entity-Recognition-With-HuggingFace-Using-PyTorch-and-W-B--Vmlldzo0NDgzODA2

2
run.sh
View File

@ -97,6 +97,8 @@ for branch in ${branches[@]}; do
imagename=${branch}"indexerenv"
extrarunflags=""
case ${branch} in
"textextractor")
;&
"autocropper")
if [ "$OS" = "Windows" ]; then
extrarunflags+="--gpus all"