Updated text extractor #16
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 136,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -15,7 +15,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 137,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -24,7 +24,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 138,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -99,7 +99,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 139,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -228,7 +228,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 140,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -250,7 +250,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 141,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -270,6 +270,7 @@
|
||||
" kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))\n",
|
||||
" kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))\n",
|
||||
" kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))\n",
|
||||
" adaptivekernel = None\n",
|
||||
" \n",
|
||||
" # return lab[:,:,2]\n",
|
||||
"\n",
|
||||
@ -283,11 +284,32 @@
|
||||
" # imglist = []\n",
|
||||
"\n",
|
||||
" Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)\n",
|
||||
" \n",
|
||||
" # return Bthresh\n",
|
||||
"\n",
|
||||
" contours, heirarchy = cv2.findContours(255-Bthresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
" # imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)\n",
|
||||
" # return imgcopy\n",
|
||||
" \n",
|
||||
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
|
||||
"\n",
|
||||
" for i, contour in enumerate(contours):\n",
|
||||
" b = cv2.boundingRect(contour)\n",
|
||||
" boundingboxes[i] = b\n",
|
||||
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)\n",
|
||||
" # return imgcopy\n",
|
||||
" \n",
|
||||
" epsilonvalue = np.median(boundingboxes, axis=0)[3]\n",
|
||||
" \n",
|
||||
" adaptivekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(epsilonvalue/15), int(epsilonvalue/15)))\n",
|
||||
" \n",
|
||||
" # imglist.append(Bthresh)\n",
|
||||
" # imglist.append(255-Bthresh)\n",
|
||||
" \n",
|
||||
" morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)\n",
|
||||
" # morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, adaptivekernel, iterations=2)\n",
|
||||
" goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)\n",
|
||||
" # goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, adaptivekernel, iterations=3)\n",
|
||||
" # morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)\n",
|
||||
" # imglist.append(morphedBthresh)\n",
|
||||
" # imglist.append(goodmorphBthresh)\n",
|
||||
@ -319,7 +341,9 @@
|
||||
" mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)\n",
|
||||
" \n",
|
||||
" bingus = cv2.bitwise_or(goodmorphBthresh, mask)\n",
|
||||
" # bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel)\n",
|
||||
" # imglist.append(bingus)\n",
|
||||
" # return imglist\n",
|
||||
" return bingus\n",
|
||||
" \n",
|
||||
" # imglist.append(image)\n",
|
||||
@ -427,7 +451,103 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 142,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# funtion to correct the median-angle to give it to the cv2.warpaffine() function\n",
|
||||
"def anglecorrector(angle):\n",
|
||||
" if 0 <= angle <= 90:\n",
|
||||
" corrected_angle = angle - 90\n",
|
||||
" elif -45 <= angle < 0:\n",
|
||||
" corrected_angle = angle - 90\n",
|
||||
" elif -90 <= angle < -45:\n",
|
||||
" corrected_angle = 90 + angle\n",
|
||||
" return corrected_angle"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):\n",
|
||||
" borderType = cv2.BORDER_CONSTANT\n",
|
||||
" out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)\n",
|
||||
" return out\n",
|
||||
"\n",
|
||||
"def mergecontours(contours):\n",
|
||||
" cont = np.vstack(contours)\n",
|
||||
" finalcontour = cv2.convexHull(cont)\n",
|
||||
" return finalcontour"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def getSkewAngle(cvImage) -> float:\n",
|
||||
" # Prep image, copy, convert to gray scale, blur, and threshold\n",
|
||||
" newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))\n",
|
||||
" # return newImage\n",
|
||||
" gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)\n",
|
||||
" blur = cv2.GaussianBlur(gray, (9, 9), 0)\n",
|
||||
" thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]\n",
|
||||
"\n",
|
||||
" # Apply dilate to merge text into meaningful lines/paragraphs.\n",
|
||||
" # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.\n",
|
||||
" # But use smaller kernel on Y axis to separate between different blocks of text\n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))\n",
|
||||
" dilate = cv2.dilate(thresh, kernel, iterations=5)\n",
|
||||
" # return dilate\n",
|
||||
"\n",
|
||||
" # Find all contours\n",
|
||||
" contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
" contours = sorted(contours, key = cv2.contourArea, reverse = True)\n",
|
||||
"\n",
|
||||
" # Find largest contour and surround in min area box\n",
|
||||
" largestContour = contours[0]\n",
|
||||
"\n",
|
||||
" mergedcontour = mergecontours(contours)\n",
|
||||
"\n",
|
||||
" # return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)\n",
|
||||
" minAreaRect = cv2.minAreaRect(mergedcontour)\n",
|
||||
" minAreaRect = list(minAreaRect)\n",
|
||||
" minAreaRect[1] = list(minAreaRect[1])\n",
|
||||
" if (minAreaRect[1][0] > minAreaRect[1][1]):\n",
|
||||
" temp = minAreaRect[1][0]\n",
|
||||
" minAreaRect[1][0] = minAreaRect[1][1]\n",
|
||||
" minAreaRect[1][1] = temp\n",
|
||||
" minAreaRect[2] -= 90\n",
|
||||
" # return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)\n",
|
||||
" # minAreaRect = cv2.minAreaRect(largestContour)\n",
|
||||
"\n",
|
||||
" box = cv2.boxPoints(minAreaRect)\n",
|
||||
" box = np.intp(box) \n",
|
||||
" newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)\n",
|
||||
" # return newImage\n",
|
||||
"\n",
|
||||
" # Determine the angle. Convert it to the value that was originally used to obtain skewed image\n",
|
||||
" angle = minAreaRect[-1]\n",
|
||||
" # print(angle)\n",
|
||||
" angle = anglecorrector(angle)+90\n",
|
||||
" # print(angle)\n",
|
||||
" return angle\n",
|
||||
"\n",
|
||||
"def minboxdeskew(img, fill=(0,0,0)):\n",
|
||||
" colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)\n",
|
||||
" angle = getSkewAngle(colourimg)\n",
|
||||
" padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)\n",
|
||||
" rotated = mf.rotate(padimg, angle, fill=fill)\n",
|
||||
" return rotated"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -435,11 +555,11 @@
|
||||
" whitedbackground = mf.whiteoutbackground(image)\n",
|
||||
" # return whitedbackground\n",
|
||||
" \n",
|
||||
" textrefined = mf.textClarifying(whitedbackground)\n",
|
||||
" textrefined = textClarifying(whitedbackground)\n",
|
||||
" # return textrefined\n",
|
||||
" #maybe now is when I put in the line removing function\n",
|
||||
" \n",
|
||||
" lineout = removeLinesFromText(textrefined)\n",
|
||||
" lineout = mf.removeLinesFromText(textrefined)\n",
|
||||
" \n",
|
||||
" return lineout\n",
|
||||
" # implement a function that's called refine text\n",
|
||||
@ -453,9 +573,13 @@
|
||||
" # return postprocessed\n",
|
||||
" postprocessed = mf.croptoblack(postprocessed)\n",
|
||||
" \n",
|
||||
" postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
|
||||
" # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
|
||||
" # return postprocessed\n",
|
||||
" \n",
|
||||
" final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
|
||||
" # final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
|
||||
" final = minboxdeskew(postprocessed, fill=(255,255,255))\n",
|
||||
" \n",
|
||||
" # final = mf.croptoblack(final)\n",
|
||||
" \n",
|
||||
" # cv2.imshow(\"postprocessed\", mf.ResizeWithAspectRatio(postprocessed, 1000))\n",
|
||||
" # cv2.imshow(\"final\", mf.ResizeWithAspectRatio(final, 1000))\n",
|
||||
@ -467,19 +591,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 143,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# prepped, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
|
||||
"outs = houghlineprocessing(img)\n",
|
||||
"outs = mf.houghlineprocessing(img)\n",
|
||||
"# print(croprect)\n",
|
||||
"#need to fix premorphCrop. it removes too much"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 144,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -493,7 +617,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 145,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -505,7 +629,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 146,
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
||||
@ -81,6 +81,28 @@ def colourscaler(n, min, max):
|
||||
diff = abs(max - min)
|
||||
return clip((temp/diff)*255, 0, 255)
|
||||
|
||||
def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):
|
||||
borderType = cv2.BORDER_CONSTANT
|
||||
out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)
|
||||
return out
|
||||
|
||||
def mergecontours(contours):
|
||||
cont = np.vstack(contours)
|
||||
finalcontour = cv2.convexHull(cont)
|
||||
return finalcontour
|
||||
|
||||
|
||||
# funtion to correct the median-angle to give it to the cv2.warpaffine() function
|
||||
# specifically, when getting the angle from a minAreaRect rectangle
|
||||
def anglecorrector(angle):
|
||||
if 0 <= angle <= 90:
|
||||
corrected_angle = angle - 90
|
||||
elif -45 <= angle < 0:
|
||||
corrected_angle = angle - 90
|
||||
elif -90 <= angle < -45:
|
||||
corrected_angle = 90 + angle
|
||||
return corrected_angle
|
||||
|
||||
tensorize = v2.Compose([v2.ToImageTensor(), v2.ConvertImageDtype()]) ## for converting an image (usually PIL image) to a pytorch tensor
|
||||
|
||||
## ------------------------------for selective segmentation search crop------------------------------
|
||||
@ -649,18 +671,19 @@ def textClarifying(image):
|
||||
|
||||
## Try using the LAB colour space???
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
# autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)
|
||||
autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)
|
||||
|
||||
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
|
||||
# hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
|
||||
hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
|
||||
|
||||
# kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
||||
# kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
|
||||
kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
||||
kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
|
||||
kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
|
||||
kernel4 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
|
||||
# kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
|
||||
kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
|
||||
kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))
|
||||
kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))
|
||||
adaptivekernel = None
|
||||
|
||||
# return lab[:,:,2]
|
||||
|
||||
@ -674,11 +697,32 @@ def textClarifying(image):
|
||||
# imglist = []
|
||||
|
||||
Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)
|
||||
|
||||
# return Bthresh
|
||||
|
||||
contours, heirarchy = cv2.findContours(255-Bthresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
# imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)
|
||||
# return imgcopy
|
||||
|
||||
boundingboxes = np.empty((len(contours), 4), dtype=int)
|
||||
|
||||
for i, contour in enumerate(contours):
|
||||
b = cv2.boundingRect(contour)
|
||||
boundingboxes[i] = b
|
||||
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)
|
||||
# return imgcopy
|
||||
|
||||
epsilonvalue = np.median(boundingboxes, axis=0)[3]
|
||||
|
||||
adaptivekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(epsilonvalue/15), int(epsilonvalue/15)))
|
||||
|
||||
# imglist.append(Bthresh)
|
||||
# imglist.append(255-Bthresh)
|
||||
|
||||
morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)
|
||||
# morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, adaptivekernel, iterations=2)
|
||||
goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)
|
||||
# goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, adaptivekernel, iterations=3)
|
||||
# morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)
|
||||
# imglist.append(morphedBthresh)
|
||||
# imglist.append(goodmorphBthresh)
|
||||
@ -693,7 +737,7 @@ def textClarifying(image):
|
||||
|
||||
|
||||
# imglist.append(morphedthresh)
|
||||
# anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)
|
||||
anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)
|
||||
anded2 = cv2.bitwise_and(morphedBthresh, 255-morphedthresh)
|
||||
# imglist.append(anded1)
|
||||
# imglist.append(anded2)
|
||||
@ -710,7 +754,9 @@ def textClarifying(image):
|
||||
mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)
|
||||
|
||||
bingus = cv2.bitwise_or(goodmorphBthresh, mask)
|
||||
|
||||
# bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel)
|
||||
# imglist.append(bingus)
|
||||
# return imglist
|
||||
return bingus
|
||||
|
||||
|
||||
@ -936,7 +982,60 @@ def externaldeskew(image, fill=(0,0,0), alreadygray=False):
|
||||
rotated = rotate(image, angle, fill=fill)
|
||||
return rotated
|
||||
|
||||
def getreceipttextAngle(cvImage) -> float:
|
||||
# Prep image, copy, convert to gray scale, blur, and threshold
|
||||
newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))
|
||||
# return newImage
|
||||
gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
|
||||
blur = cv2.GaussianBlur(gray, (9, 9), 0)
|
||||
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||
|
||||
# Apply dilate to merge text into meaningful lines/paragraphs.
|
||||
# Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
|
||||
# But use smaller kernel on Y axis to separate between different blocks of text
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
|
||||
dilate = cv2.dilate(thresh, kernel, iterations=5)
|
||||
# return dilate
|
||||
|
||||
# Find all contours
|
||||
contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
||||
contours = sorted(contours, key = cv2.contourArea, reverse = True)
|
||||
|
||||
# Find largest contour and surround in min area box
|
||||
largestContour = contours[0]
|
||||
|
||||
mergedcontour = mergecontours(contours)
|
||||
|
||||
# return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)
|
||||
minAreaRect = cv2.minAreaRect(mergedcontour)
|
||||
minAreaRect = list(minAreaRect)
|
||||
minAreaRect[1] = list(minAreaRect[1])
|
||||
if (minAreaRect[1][0] > minAreaRect[1][1]):
|
||||
temp = minAreaRect[1][0]
|
||||
minAreaRect[1][0] = minAreaRect[1][1]
|
||||
minAreaRect[1][1] = temp
|
||||
minAreaRect[2] -= 90
|
||||
# return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)
|
||||
# minAreaRect = cv2.minAreaRect(largestContour)
|
||||
|
||||
box = cv2.boxPoints(minAreaRect)
|
||||
box = np.intp(box)
|
||||
newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)
|
||||
# return newImage
|
||||
|
||||
# Determine the angle. Convert it to the value that was originally used to obtain skewed image
|
||||
angle = minAreaRect[-1]
|
||||
# print(angle)
|
||||
angle = anglecorrector(angle)+90
|
||||
# print(angle)
|
||||
return angle
|
||||
|
||||
def receipttextdeskew(img, fill=(0,0,0)):
|
||||
colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||
angle = getreceipttextAngle(colourimg)
|
||||
padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)
|
||||
rotated = rotate(padimg, angle, fill=fill)
|
||||
return rotated
|
||||
|
||||
## ------------------------------Full deskewing and cropping------------------------------
|
||||
def houghlineprocessing(image):
|
||||
@ -947,10 +1046,16 @@ def houghlineprocessing(image):
|
||||
postprocessed = cropclarifying(croppedanddeskewed)
|
||||
# return postprocessed
|
||||
postprocessed = croptoblack(postprocessed)
|
||||
|
||||
# postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)
|
||||
# return postprocessed
|
||||
|
||||
# final = externaldeskew(postprocessed, fill=(255,255,255))
|
||||
final = receipttextdeskew(postprocessed, fill=(255,255,255))
|
||||
|
||||
final = cv2.cvtColor(final, cv2.COLOR_GRAY2BGR)
|
||||
|
||||
postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)
|
||||
|
||||
final = externaldeskew(postprocessed, fill=(255,255,255))
|
||||
# final = mf.croptoblack(final)
|
||||
|
||||
# cv2.imshow("postprocessed", mf.ResizeWithAspectRatio(postprocessed, 1000))
|
||||
# cv2.imshow("final", mf.ResizeWithAspectRatio(final, 1000))
|
||||
|
||||
|
Before Width: | Height: | Size: 412 KiB After Width: | Height: | Size: 448 KiB |
|
Before Width: | Height: | Size: 426 KiB After Width: | Height: | Size: 433 KiB |
|
Before Width: | Height: | Size: 550 KiB After Width: | Height: | Size: 634 KiB |
|
Before Width: | Height: | Size: 168 KiB After Width: | Height: | Size: 194 KiB |
BIN
code/textdataretriever/result_images/0.jpg
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
code/textdataretriever/result_images/1.jpg
Normal file
|
After Width: | Height: | Size: 17 KiB |
BIN
code/textdataretriever/result_images/10.jpg
Normal file
|
After Width: | Height: | Size: 15 KiB |
BIN
code/textdataretriever/result_images/11.jpg
Normal file
|
After Width: | Height: | Size: 21 KiB |
BIN
code/textdataretriever/result_images/12.jpg
Normal file
|
After Width: | Height: | Size: 24 KiB |
BIN
code/textdataretriever/result_images/13.jpg
Normal file
|
After Width: | Height: | Size: 27 KiB |
BIN
code/textdataretriever/result_images/14.jpg
Normal file
|
After Width: | Height: | Size: 14 KiB |
BIN
code/textdataretriever/result_images/15.jpg
Normal file
|
After Width: | Height: | Size: 9.4 KiB |
BIN
code/textdataretriever/result_images/16.jpg
Normal file
|
After Width: | Height: | Size: 31 KiB |
BIN
code/textdataretriever/result_images/17.jpg
Normal file
|
After Width: | Height: | Size: 23 KiB |
BIN
code/textdataretriever/result_images/18.jpg
Normal file
|
After Width: | Height: | Size: 18 KiB |
BIN
code/textdataretriever/result_images/19.jpg
Normal file
|
After Width: | Height: | Size: 23 KiB |
BIN
code/textdataretriever/result_images/2.jpg
Normal file
|
After Width: | Height: | Size: 14 KiB |
BIN
code/textdataretriever/result_images/3.jpg
Normal file
|
After Width: | Height: | Size: 36 KiB |
BIN
code/textdataretriever/result_images/4.jpg
Normal file
|
After Width: | Height: | Size: 21 KiB |
BIN
code/textdataretriever/result_images/5.jpg
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
code/textdataretriever/result_images/6.jpg
Normal file
|
After Width: | Height: | Size: 15 KiB |
BIN
code/textdataretriever/result_images/7.jpg
Normal file
|
After Width: | Height: | Size: 18 KiB |
BIN
code/textdataretriever/result_images/8.jpg
Normal file
|
After Width: | Height: | Size: 21 KiB |
BIN
code/textdataretriever/result_images/9.jpg
Normal file
|
After Width: | Height: | Size: 15 KiB |
224
code/textdataretriever/textextractor/donuttesting.ipynb
Normal file
@ -23,6 +23,8 @@ def rectcenterpt(rect, xywhrect=True, retint=False):
|
||||
def containsamount(outerrect, innerrect, percentage=1):
|
||||
tinyrect = mf.overlapRect([outerrect, innerrect])
|
||||
tinyarea = tinyrect[2]*tinyrect[3]
|
||||
if (tinyrect[0] == -1):
|
||||
tinyarea = 0
|
||||
innerrectarea = innerrect[2]*innerrect[3]
|
||||
if (tinyarea/innerrectarea >= percentage):
|
||||
return True
|
||||
@ -173,23 +175,218 @@ def region_query(D, P, eps):
|
||||
|
||||
return neighbors
|
||||
|
||||
def linerectretriever(image):
|
||||
|
||||
def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):
|
||||
borderType = cv2.BORDER_CONSTANT
|
||||
out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)
|
||||
return out
|
||||
|
||||
def mergecontours(contours):
|
||||
cont = np.vstack(contours)
|
||||
finalcontour = cv2.convexHull(cont)
|
||||
return finalcontour
|
||||
|
||||
def getSkewAngle(cvImage) -> float:
|
||||
# Prep image, copy, convert to gray scale, blur, and threshold
|
||||
newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))
|
||||
# return newImage
|
||||
gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
|
||||
blur = cv2.GaussianBlur(gray, (9, 9), 0)
|
||||
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||
|
||||
# Apply dilate to merge text into meaningful lines/paragraphs.
|
||||
# Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
|
||||
# But use smaller kernel on Y axis to separate between different blocks of text
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
|
||||
dilate = cv2.dilate(thresh, kernel, iterations=5)
|
||||
# return dilate
|
||||
|
||||
# Find all contours
|
||||
contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
||||
contours = sorted(contours, key = cv2.contourArea, reverse = True)
|
||||
|
||||
# Find largest contour and surround in min area box
|
||||
largestContour = contours[0]
|
||||
|
||||
mergedcontour = mergecontours(contours)
|
||||
|
||||
# return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)
|
||||
minAreaRect = cv2.minAreaRect(mergedcontour)
|
||||
# return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)
|
||||
# minAreaRect = cv2.minAreaRect(largestContour)
|
||||
|
||||
box = cv2.boxPoints(minAreaRect)
|
||||
box = np.intp(box)
|
||||
newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)
|
||||
# return newImage
|
||||
|
||||
# Determine the angle. Convert it to the value that was originally used to obtain skewed image
|
||||
angle = minAreaRect[-1]
|
||||
# print(angle)
|
||||
if angle > 45:
|
||||
angle = angle - 90
|
||||
if angle < -45:
|
||||
angle = 90 + angle
|
||||
# print(angle)
|
||||
return angle
|
||||
|
||||
def minboxdeskew(img, fill=(0,0,0)):
|
||||
colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||
angle = getSkewAngle(colourimg)
|
||||
padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)
|
||||
rotated = mf.rotate(padimg, angle, fill=fill)
|
||||
return rotated
|
||||
|
||||
|
||||
|
||||
def l1linerectretriever(image, divider=2):
|
||||
shape = image.shape
|
||||
|
||||
imgcopy = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
||||
# return imgcopy
|
||||
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
|
||||
reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)
|
||||
reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
|
||||
linekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (shape[1]//40, 1))
|
||||
# reducedimage = image
|
||||
reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel, iterations=1)
|
||||
# reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
|
||||
# return reducedimage
|
||||
|
||||
charcanny = cv2.Canny(reducedimage, 0, 500, None, 3)
|
||||
# return canny
|
||||
|
||||
|
||||
lettercontours, heirarchy = cv2.findContours(charcanny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
# contours, heirarchy = cv2.findContours(255-reducedimage,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# imgcopy = cv2.drawContours(imgcopy, lettercontours, -1, color=(0,255,0), thickness=1)
|
||||
# return imgcopy
|
||||
|
||||
letterboxes = np.empty((len(lettercontours), 4), dtype=int)
|
||||
|
||||
for i, contour in enumerate(lettercontours):
|
||||
b = list(cv2.boundingRect(contour))
|
||||
# b[0] -= (kernel.shape[0]-1)
|
||||
# b[1] -= (kernel.shape[1]-1)
|
||||
# b[2] += (2*kernel.shape[0]-1)
|
||||
# b[3] += (2*kernel.shape[1]-1)
|
||||
letterboxes[i] = b
|
||||
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)
|
||||
# return imgcopy
|
||||
|
||||
epsilonvalue = np.median(letterboxes, axis=0)[3]/divider
|
||||
# print(epsilonvalue)
|
||||
|
||||
|
||||
|
||||
linemade = 255-cv2.morphologyEx(255-image, cv2.MORPH_DILATE, linekernel)
|
||||
# return linemade
|
||||
|
||||
linecanny = cv2.Canny(linemade, 0, 500, None, 3)
|
||||
linecontours, heirarchy = cv2.findContours(linecanny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# imgcopy = cv2.drawContours(imgcopy, linecontours, -1, color=(0,255,0), thickness=1)
|
||||
# return imgcopy
|
||||
# for i, contour in enumerate(linecontours):
|
||||
# k = i+1
|
||||
# colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)
|
||||
# imgcopy = cv2.drawContours(imgcopy, [contour], -1, colour, thickness=1)
|
||||
# return imgcopy
|
||||
|
||||
|
||||
|
||||
lineboxes = np.empty((len(linecontours), 4), dtype=int)
|
||||
|
||||
for i, contour in enumerate(linecontours):
|
||||
b = list(cv2.boundingRect(contour))
|
||||
# b[0] -= (kernel.shape[0]-1)
|
||||
# b[1] -= (kernel.shape[1]-1)
|
||||
# b[2] += (2*kernel.shape[0]-1)
|
||||
# b[3] += (2*kernel.shape[1]-1)
|
||||
lineboxes[i] = b
|
||||
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3)
|
||||
# return imgcopy
|
||||
|
||||
linelabels = dbscan(lineboxes, epsilonvalue, 1)
|
||||
# print(linelabels)
|
||||
numclusters = max(linelabels)
|
||||
|
||||
letterboxesbyline = [[] for _ in range(numclusters)]
|
||||
|
||||
for i, linebox in enumerate(lineboxes):
|
||||
for j, letterbox in enumerate(letterboxes):
|
||||
if containsamount(linebox, letterbox, 0.9):
|
||||
letterboxesbyline[linelabels[i]-1].append(letterbox.tolist())
|
||||
|
||||
# print(len(letterboxesbyline))
|
||||
|
||||
|
||||
# # COLOUR THE RECTANGLES GROUPED
|
||||
# for i, setofboxes in enumerate(letterboxesbyline):
|
||||
# k = i+1
|
||||
# colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)
|
||||
# # print(colour)
|
||||
# # b = lineboxes[i]
|
||||
# # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)
|
||||
# print(i)
|
||||
# for b in setofboxes:
|
||||
# print(i)
|
||||
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)
|
||||
# return imgcopy
|
||||
|
||||
mergedboxes = np.empty((numclusters,4), dtype=int)
|
||||
|
||||
tobedeleted = []
|
||||
|
||||
for i in range(numclusters):
|
||||
b = mf.mergerects(letterboxesbyline[i])
|
||||
# if (b[0] == -1):
|
||||
# tobedeleted.append(i)
|
||||
mergedboxes[i] = b
|
||||
|
||||
# if (tobedeleted != []):
|
||||
# # print("hi")
|
||||
# mergedboxes = np.delete(mergedboxes, tobedeleted, axis=0)
|
||||
# letterboxesbyline = [ele for idx, ele in enumerate(letterboxesbyline) if idx not in tobedeleted]
|
||||
|
||||
return mergedboxes, letterboxesbyline
|
||||
|
||||
def sublinerectretriever(image, divider=2):
|
||||
shape = image.shape
|
||||
|
||||
imgcopy = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
||||
# return imgcopy
|
||||
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
|
||||
# reducedimage = image
|
||||
reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel, iterations=1)
|
||||
# reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
|
||||
# return reducedimage
|
||||
|
||||
canny = cv2.Canny(reducedimage, 0, 500, None, 3)
|
||||
# return canny
|
||||
|
||||
|
||||
contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
# contours, heirarchy = cv2.findContours(255-reducedimage,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)
|
||||
# return imgcopy
|
||||
|
||||
boundingboxes = np.empty((len(contours), 4), dtype=int)
|
||||
|
||||
for i, contour in enumerate(contours):
|
||||
boundingboxes[i] = cv2.boundingRect(contour)
|
||||
b = list(cv2.boundingRect(contour))
|
||||
b[0] -= (kernel.shape[0]-1)
|
||||
b[1] -= (kernel.shape[1]-1)
|
||||
b[2] += (2*kernel.shape[0]-1)
|
||||
b[3] += (2*kernel.shape[1]-1)
|
||||
boundingboxes[i] = b
|
||||
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)
|
||||
# return imgcopy
|
||||
|
||||
epsilonvalue = np.median(boundingboxes, axis=0)[3]/3
|
||||
epsilonvalue = np.median(boundingboxes, axis=0)[3]/divider
|
||||
# print(epsilonvalue)
|
||||
|
||||
labels = dbscan(boundingboxes, epsilonvalue, 1)
|
||||
# print(labels)
|
||||
@ -199,6 +396,16 @@ def linerectretriever(image):
|
||||
for i, item in enumerate(labels):
|
||||
lineboxes[item-1].append(boundingboxes[i].tolist())
|
||||
|
||||
|
||||
# # COLOUR THE RECTANGLES GROUPED
|
||||
# for i, setofboxes in enumerate(lineboxes):
|
||||
# k = i+1
|
||||
# colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)
|
||||
# # print(colour)
|
||||
# for b in setofboxes:
|
||||
# imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)
|
||||
# return imgcopy
|
||||
|
||||
|
||||
mergedboxes = np.empty((numclusters,4), dtype=int)
|
||||
|
||||
@ -226,26 +433,52 @@ def linerectretriever(image):
|
||||
j += 1
|
||||
|
||||
return mergedboxes, lineboxes
|
||||
|
||||
def lineimagemaker(thresholded):
|
||||
|
||||
def linerectretriever(image, divider=2, sublines=False):
|
||||
if (sublines):
|
||||
return sublinerectretriever(image, divider=divider)
|
||||
else:
|
||||
return l1linerectretriever(image, divider=divider)
|
||||
|
||||
def lineimagemaker(thresholded, divider=2, sublines=False):
|
||||
lineimages = []
|
||||
mergedboxes, originalboxes = linerectretriever(thresholded)
|
||||
mergedboxes, originalboxes = linerectretriever(thresholded, divider=divider, sublines=sublines)
|
||||
# print(mergedboxes)
|
||||
# print(originalboxes)
|
||||
# return thresholded
|
||||
|
||||
mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)
|
||||
# print(mergedboxesordering)
|
||||
|
||||
goodpoint = 0
|
||||
for i, item in enumerate(mergedboxesordering):
|
||||
if (mergedboxes[item][0] != -1):
|
||||
goodpoint = i
|
||||
break
|
||||
mergedboxesordering = mergedboxesordering[goodpoint:]
|
||||
|
||||
mergedboxes = mergedboxes[mergedboxesordering]
|
||||
originalboxes = [originalboxes[i] for i in mergedboxesordering]
|
||||
out = cv2.cvtColor(thresholded.copy(), cv2.COLOR_GRAY2BGR)
|
||||
# lineimages.append(out)
|
||||
for i, box in enumerate(mergedboxes):
|
||||
# print(box)
|
||||
mask = np.zeros(thresholded.shape, dtype=np.uint8)
|
||||
whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)
|
||||
# print(originalboxes[i])
|
||||
for lb in originalboxes[i]:
|
||||
mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)
|
||||
|
||||
# lineimages[0] = cv2.rectangle(lineimages[0], (box[0],box[1]), (box[0]+box[2], box[1]+box[3]), (0,255,0), thickness=1)
|
||||
|
||||
invertedmask = cv2.bitwise_not(mask)
|
||||
whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)
|
||||
lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)
|
||||
lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]
|
||||
# lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)
|
||||
# lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
|
||||
lineimage = cv2.morphologyEx(lineimage, cv2.MORPH_CLOSE, kernel, iterations=1)
|
||||
lineimages.append(lineimage)
|
||||
# lineimages.append(mask)
|
||||
return lineimages
|
||||
@ -254,27 +487,84 @@ def lineimagemaker(thresholded):
|
||||
|
||||
|
||||
|
||||
def ismultiline(img):
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
|
||||
reducedimage = cv2.morphologyEx(img, cv2.MORPH_DILATE, kernel)
|
||||
# reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)
|
||||
|
||||
canny = cv2.Canny(reducedimage, 0, 500, None, 3)
|
||||
# return canny
|
||||
|
||||
|
||||
contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
# imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)
|
||||
# return imgcopy
|
||||
|
||||
boundingboxes = np.empty((len(contours), 4), dtype=int)
|
||||
|
||||
for i, contour in enumerate(contours):
|
||||
boundingboxes[i] = cv2.boundingRect(contour)
|
||||
b = boundingboxes[i]
|
||||
|
||||
# heightdetermination = np.median(boundingboxes, axis=0)[3]
|
||||
heightdetermination = np.max(boundingboxes, axis=0)[3]
|
||||
# print(heightdetermination)
|
||||
|
||||
if (img.shape[0] > (heightdetermination*1.5) + (2*50)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
||||
### actual function
|
||||
def lineisolator(image):
|
||||
imgcopy = image.copy()
|
||||
# imgcopy = image.copy()
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
||||
# thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
||||
# return gray
|
||||
# return thresholded
|
||||
thresholded = gray
|
||||
|
||||
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
|
||||
# kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
|
||||
|
||||
|
||||
|
||||
lineimages = lineimagemaker(thresholded)
|
||||
lineimages = lineimagemaker(thresholded, 1.5, False)
|
||||
|
||||
# for i, lineimage in enumerate(lineimages):
|
||||
# lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)
|
||||
|
||||
|
||||
finallineimages = []
|
||||
|
||||
for i, lineimage in enumerate(lineimages):
|
||||
templineimages = lineimagemaker(lineimage)
|
||||
# if (i == 0):
|
||||
# finallineimages.append(lineimages[0])
|
||||
# continue
|
||||
deskewedlineimage = minboxdeskew(lineimage, fill=255)
|
||||
|
||||
# finallineimages.append(deskewedlineimage)
|
||||
# print(deskewedlineimage.shape)
|
||||
|
||||
if (ismultiline(deskewedlineimage)):
|
||||
# print("hi" + str(i))
|
||||
templineimages = lineimagemaker(deskewedlineimage, 2.5, True)
|
||||
else:
|
||||
templineimages = lineimagemaker(deskewedlineimage, 1.5, True)
|
||||
|
||||
# templineimages = lineimagemaker(deskewedlineimage, 2)
|
||||
|
||||
finallineimages += templineimages
|
||||
# finallineimages += templineimages[1:]
|
||||
|
||||
for i, lineimage in enumerate(finallineimages):
|
||||
deskewedli = minboxdeskew(lineimage, fill=255)
|
||||
dim = int((deskewedli.shape[0]-100)//20)
|
||||
# print(dim)
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dim, dim))
|
||||
deskewedli = cv2.morphologyEx(deskewedli, cv2.MORPH_DILATE, kernel,iterations=1)
|
||||
finallineimages[i] = cv2.morphologyEx(deskewedli, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
|
||||
# mergedboxes, originalboxes = linerectretriever(thresholded)
|
||||
@ -289,4 +579,7 @@ def lineisolator(image):
|
||||
# out = tempfunc(thresholded)
|
||||
# return out
|
||||
|
||||
return finallineimages
|
||||
return finallineimages
|
||||
|
||||
|
||||
|
||||
511
code/textdataretriever/textextractor/lineisolatortesting.ipynb
Normal file
@ -0,0 +1,511 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"import scipy.stats as st\n",
|
||||
"import math\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
|
||||
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
|
||||
"/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
|
||||
" warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../../autocropper')\n",
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"import extractorfunctions as ef\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pathname = \"../test_images/\"\n",
|
||||
"filename = \"IMG_7640.jpg\"\n",
|
||||
"# pathname = \"../temp/\"\n",
|
||||
"# filename = \"test.jpg\"\n",
|
||||
"# pathname = \"../result_images/\"\n",
|
||||
"# filename = \"13.jpg\"\n",
|
||||
"\n",
|
||||
"# print(pathname+filename)\n",
|
||||
"img = cv2.imread(pathname+filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import easyocr\n",
|
||||
"# reader = easyocr.Reader(['en'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def l1linerectretriever(image, divider=2):\n",
|
||||
" shape = image.shape\n",
|
||||
"\n",
|
||||
" imgcopy = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)\n",
|
||||
" # return imgcopy\n",
|
||||
" \n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" linekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (shape[1]//40, 1))\n",
|
||||
" # reducedimage = image\n",
|
||||
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel, iterations=1)\n",
|
||||
" # reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
|
||||
" # return reducedimage\n",
|
||||
" \n",
|
||||
" charcanny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
|
||||
" # return canny\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" lettercontours, heirarchy = cv2.findContours(charcanny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
" # contours, heirarchy = cv2.findContours(255-reducedimage,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
"\n",
|
||||
" # imgcopy = cv2.drawContours(imgcopy, lettercontours, -1, color=(0,255,0), thickness=1)\n",
|
||||
" # return imgcopy\n",
|
||||
"\n",
|
||||
" letterboxes = np.empty((len(lettercontours), 4), dtype=int)\n",
|
||||
" \n",
|
||||
" for i, contour in enumerate(lettercontours):\n",
|
||||
" b = list(cv2.boundingRect(contour))\n",
|
||||
" # b[0] -= (kernel.shape[0]-1)\n",
|
||||
" # b[1] -= (kernel.shape[1]-1)\n",
|
||||
" # b[2] += (2*kernel.shape[0]-1)\n",
|
||||
" # b[3] += (2*kernel.shape[1]-1)\n",
|
||||
" letterboxes[i] = b\n",
|
||||
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)\n",
|
||||
" # return imgcopy\n",
|
||||
" \n",
|
||||
" epsilonvalue = np.median(letterboxes, axis=0)[3]/divider\n",
|
||||
" # print(epsilonvalue)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" linemade = 255-cv2.morphologyEx(255-image, cv2.MORPH_DILATE, linekernel)\n",
|
||||
" # return linemade\n",
|
||||
"\n",
|
||||
" linecanny = cv2.Canny(linemade, 0, 500, None, 3)\n",
|
||||
" linecontours, heirarchy = cv2.findContours(linecanny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
"\n",
|
||||
" # imgcopy = cv2.drawContours(imgcopy, linecontours, -1, color=(0,255,0), thickness=1)\n",
|
||||
" # return imgcopy\n",
|
||||
" # for i, contour in enumerate(linecontours):\n",
|
||||
" # k = i+1\n",
|
||||
" # colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)\n",
|
||||
" # imgcopy = cv2.drawContours(imgcopy, [contour], -1, colour, thickness=1)\n",
|
||||
" # return imgcopy\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" lineboxes = np.empty((len(linecontours), 4), dtype=int)\n",
|
||||
" \n",
|
||||
" for i, contour in enumerate(linecontours):\n",
|
||||
" b = list(cv2.boundingRect(contour))\n",
|
||||
" # b[0] -= (kernel.shape[0]-1)\n",
|
||||
" # b[1] -= (kernel.shape[1]-1)\n",
|
||||
" # b[2] += (2*kernel.shape[0]-1)\n",
|
||||
" # b[3] += (2*kernel.shape[1]-1)\n",
|
||||
" lineboxes[i] = b\n",
|
||||
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=3)\n",
|
||||
" # return imgcopy\n",
|
||||
"\n",
|
||||
" linelabels = ef.dbscan(lineboxes, epsilonvalue, 1)\n",
|
||||
" # print(linelabels)\n",
|
||||
" numclusters = max(linelabels)\n",
|
||||
"\n",
|
||||
" letterboxesbyline = [[] for _ in range(numclusters)]\n",
|
||||
"\n",
|
||||
" for i, linebox in enumerate(lineboxes):\n",
|
||||
" for j, letterbox in enumerate(letterboxes):\n",
|
||||
" if ef.containsamount(linebox, letterbox, 0.9):\n",
|
||||
" letterboxesbyline[linelabels[i]-1].append(letterbox.tolist())\n",
|
||||
"\n",
|
||||
" # print(len(letterboxesbyline))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # # COLOUR THE RECTANGLES GROUPED\n",
|
||||
" # for i, setofboxes in enumerate(letterboxesbyline):\n",
|
||||
" # k = i+1\n",
|
||||
" # colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)\n",
|
||||
" # # print(colour)\n",
|
||||
" # # b = lineboxes[i]\n",
|
||||
" # # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)\n",
|
||||
" # print(i)\n",
|
||||
" # for b in setofboxes:\n",
|
||||
" # print(i)\n",
|
||||
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)\n",
|
||||
" # return imgcopy\n",
|
||||
"\n",
|
||||
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
|
||||
"\n",
|
||||
" tobedeleted = []\n",
|
||||
"\n",
|
||||
" for i in range(numclusters):\n",
|
||||
" b = mf.mergerects(letterboxesbyline[i])\n",
|
||||
" # if (b[0] == -1):\n",
|
||||
" # tobedeleted.append(i)\n",
|
||||
" mergedboxes[i] = b\n",
|
||||
"\n",
|
||||
" # if (tobedeleted != []):\n",
|
||||
" # # print(\"hi\")\n",
|
||||
" # mergedboxes = np.delete(mergedboxes, tobedeleted, axis=0)\n",
|
||||
" # letterboxesbyline = [ele for idx, ele in enumerate(letterboxesbyline) if idx not in tobedeleted]\n",
|
||||
"\n",
|
||||
" return mergedboxes, letterboxesbyline\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def sublinerectretriever(image, divider=2):\n",
|
||||
" shape = image.shape\n",
|
||||
" \n",
|
||||
" imgcopy = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)\n",
|
||||
" # return imgcopy\n",
|
||||
" \n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" # reducedimage = image\n",
|
||||
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel, iterations=1)\n",
|
||||
" # reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
|
||||
" # return reducedimage\n",
|
||||
" \n",
|
||||
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
|
||||
" # return canny\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
" # contours, heirarchy = cv2.findContours(255-reducedimage,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
"\n",
|
||||
" # imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)\n",
|
||||
" # return imgcopy\n",
|
||||
"\n",
|
||||
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
|
||||
" \n",
|
||||
" for i, contour in enumerate(contours):\n",
|
||||
" b = list(cv2.boundingRect(contour))\n",
|
||||
" b[0] -= (kernel.shape[0]-1)\n",
|
||||
" b[1] -= (kernel.shape[1]-1)\n",
|
||||
" b[2] += (2*kernel.shape[0]-1)\n",
|
||||
" b[3] += (2*kernel.shape[1]-1)\n",
|
||||
" boundingboxes[i] = b\n",
|
||||
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)\n",
|
||||
" # return imgcopy\n",
|
||||
" \n",
|
||||
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/divider\n",
|
||||
" # print(epsilonvalue)\n",
|
||||
" \n",
|
||||
" labels = ef.dbscan(boundingboxes, epsilonvalue, 1)\n",
|
||||
" # print(labels)\n",
|
||||
" numclusters = max(labels)\n",
|
||||
" lineboxes = [[] for _ in range(numclusters)]\n",
|
||||
"\n",
|
||||
" for i, item in enumerate(labels):\n",
|
||||
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # # COLOUR THE RECTANGLES GROUPED\n",
|
||||
" # for i, setofboxes in enumerate(lineboxes):\n",
|
||||
" # k = i+1\n",
|
||||
" # colour = ((k*23123)%255, (k*8654)%255, (k*45242)%255)\n",
|
||||
" # # print(colour)\n",
|
||||
" # for b in setofboxes:\n",
|
||||
" # imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), colour, thickness=3)\n",
|
||||
" # return imgcopy\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" for i in range(numclusters):\n",
|
||||
" b = mf.mergerects(lineboxes[i])\n",
|
||||
" mergedboxes[i] = b\n",
|
||||
" \n",
|
||||
" j = 0\n",
|
||||
" while (j < len(mergedboxes)):\n",
|
||||
" i = 0\n",
|
||||
" while (i < len(mergedboxes)):\n",
|
||||
" if (i == j):\n",
|
||||
" i += 1\n",
|
||||
" continue\n",
|
||||
" outerbox = mergedboxes[j]\n",
|
||||
" innerbox = mergedboxes[i]\n",
|
||||
" if ef.containsamount(outerbox, innerbox, 1) or ef.aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
|
||||
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
|
||||
" lineboxes.pop(i)\n",
|
||||
" if (i < j):\n",
|
||||
" j -= 1\n",
|
||||
" i -= 1\n",
|
||||
" i += 1\n",
|
||||
" j += 1\n",
|
||||
" \n",
|
||||
" return mergedboxes, lineboxes\n",
|
||||
"\n",
|
||||
"def linerectretriever(image, divider=2, sublines=False):\n",
|
||||
"\n",
|
||||
" if (sublines):\n",
|
||||
" return sublinerectretriever(image, divider=divider)\n",
|
||||
" else:\n",
|
||||
" return l1linerectretriever(image, divider=divider)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def lineimagemaker(thresholded, divider=2, sublines=False):\n",
|
||||
" lineimages = []\n",
|
||||
" mergedboxes, originalboxes = linerectretriever(thresholded, divider=divider, sublines=sublines)\n",
|
||||
" # print(mergedboxes)\n",
|
||||
" # print(originalboxes)\n",
|
||||
" # return thresholded\n",
|
||||
" \n",
|
||||
" mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)\n",
|
||||
" # print(mergedboxesordering)\n",
|
||||
" \n",
|
||||
" goodpoint = 0\n",
|
||||
" for i, item in enumerate(mergedboxesordering):\n",
|
||||
" if (mergedboxes[item][0] != -1):\n",
|
||||
" goodpoint = i\n",
|
||||
" break\n",
|
||||
" mergedboxesordering = mergedboxesordering[goodpoint:]\n",
|
||||
"\n",
|
||||
" mergedboxes = mergedboxes[mergedboxesordering]\n",
|
||||
" originalboxes = [originalboxes[i] for i in mergedboxesordering]\n",
|
||||
" out = cv2.cvtColor(thresholded.copy(), cv2.COLOR_GRAY2BGR)\n",
|
||||
" # lineimages.append(out)\n",
|
||||
" for i, box in enumerate(mergedboxes):\n",
|
||||
" # print(box)\n",
|
||||
" mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
|
||||
" whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)\n",
|
||||
" # print(originalboxes[i])\n",
|
||||
" for lb in originalboxes[i]:\n",
|
||||
" mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
|
||||
"\n",
|
||||
" # lineimages[0] = cv2.rectangle(lineimages[0], (box[0],box[1]), (box[0]+box[2], box[1]+box[3]), (0,255,0), thickness=1)\n",
|
||||
"\n",
|
||||
" invertedmask = cv2.bitwise_not(mask)\n",
|
||||
" whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)\n",
|
||||
" lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)\n",
|
||||
" lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
|
||||
" # lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)\n",
|
||||
" # lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" lineimage = cv2.morphologyEx(lineimage, cv2.MORPH_CLOSE, kernel, iterations=1)\n",
|
||||
" lineimages.append(lineimage)\n",
|
||||
" # lineimages.append(mask)\n",
|
||||
" return lineimages\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def lineisolator(image):\n",
|
||||
" # imgcopy = image.copy()\n",
|
||||
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
|
||||
" # thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
|
||||
" # return gray\n",
|
||||
" # return thresholded\n",
|
||||
" thresholded = gray\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
" lineimages = lineimagemaker(thresholded, 1.5, False)\n",
|
||||
" \n",
|
||||
" # for i, lineimage in enumerate(lineimages):\n",
|
||||
" # lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" finallineimages = []\n",
|
||||
" \n",
|
||||
" for i, lineimage in enumerate(lineimages):\n",
|
||||
" # if (i == 0):\n",
|
||||
" # finallineimages.append(lineimages[0])\n",
|
||||
" # continue\n",
|
||||
" deskewedlineimage = ef.minboxdeskew(lineimage, fill=255)\n",
|
||||
"\n",
|
||||
" # finallineimages.append(deskewedlineimage)\n",
|
||||
" # print(deskewedlineimage.shape)\n",
|
||||
"\n",
|
||||
" if (ef.ismultiline(deskewedlineimage)):\n",
|
||||
" # print(\"hi\" + str(i))\n",
|
||||
" templineimages = lineimagemaker(deskewedlineimage, 2.5, True)\n",
|
||||
" else:\n",
|
||||
" templineimages = lineimagemaker(deskewedlineimage, 1.5, True)\n",
|
||||
"\n",
|
||||
" # templineimages = lineimagemaker(deskewedlineimage, 2)\n",
|
||||
"\n",
|
||||
" finallineimages += templineimages\n",
|
||||
" # finallineimages += templineimages[1:]\n",
|
||||
"\n",
|
||||
" for i, lineimage in enumerate(finallineimages):\n",
|
||||
" deskewedli = ef.minboxdeskew(lineimage, fill=255)\n",
|
||||
" dim = int((deskewedli.shape[0]-100)//20)\n",
|
||||
" # print(dim)\n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dim, dim))\n",
|
||||
" deskewedli = cv2.morphologyEx(deskewedli, cv2.MORPH_DILATE, kernel,iterations=1)\n",
|
||||
" finallineimages[i] = cv2.morphologyEx(deskewedli, cv2.MORPH_OPEN, kernel)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # mergedboxes, originalboxes = linerectretriever(thresholded) \n",
|
||||
" # mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
|
||||
" # for i, box in enumerate(mergedboxes):\n",
|
||||
" # for lb in originalboxes[i]:\n",
|
||||
" # mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
|
||||
"\n",
|
||||
" # return mask\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # out = tempfunc(thresholded)\n",
|
||||
" # return out\n",
|
||||
" \n",
|
||||
" return finallineimages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# result = reader.readtext(pathname+filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bing = mf.houghlineprocessing(img)\n",
|
||||
"# outs = bing\n",
|
||||
"outs = ef.lineisolator(bing)\n",
|
||||
"# # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
|
||||
"# # outs = linerectretriever(gray)\n",
|
||||
"# outs = getSkewAngle(img)\n",
|
||||
"# outs = minboxdeskew(img, fill=(255,255,255))\n",
|
||||
"# bing = cv2.cvtColor(bing, cv2.COLOR_BGR2GRAY)\n",
|
||||
"# bing = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
|
||||
"# outs = bing\n",
|
||||
"# outs = linerectretriever(bing, 1.5, False)\n",
|
||||
"# outs = lineimagemaker(bing, 1.5, False)\n",
|
||||
"# for i, _ in enumerate(outs):\n",
|
||||
"# outs[i] = ef.minboxdeskew(outs[i], fill=255)\n",
|
||||
"\n",
|
||||
"# outs = img"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# print(outs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# for out in outs:\n",
|
||||
"# if (out.shape[0] > out.shape[1]):\n",
|
||||
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
|
||||
"# else:\n",
|
||||
"# cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, width=1000))\n",
|
||||
"# key = cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()\n",
|
||||
"# if (key == 107):\n",
|
||||
"# break\n",
|
||||
"if (isinstance(outs, np.ndarray)):\n",
|
||||
" if (outs.shape[0] > outs.shape[1]):\n",
|
||||
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, height=1350))\n",
|
||||
" else:\n",
|
||||
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n",
|
||||
"else:\n",
|
||||
" for i, out in enumerate(outs):\n",
|
||||
" # cv2.imwrite(\"../result_images/\"+str(i)+\".jpg\", out)\n",
|
||||
" if (out.shape[0] > out.shape[1]):\n",
|
||||
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
|
||||
" else:\n",
|
||||
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, width=1000))\n",
|
||||
"cv2.waitKey(0)\n",
|
||||
"cv2.destroyAllWindows()\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imwrite(\"../temp/test.jpg\", outs[2])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
260
code/textdataretriever/textextractor/modelimp.ipynb
Normal file
@ -0,0 +1,260 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# https://github.com/NielsRogge/Transformers-Tutorials/blob/master/TrOCR/Inference_with_TrOCR_%2B_Gradio_demo.ipynb\n",
|
||||
"# https://github.com/NielsRogge/Transformers-Tutorials/tree/master/TrOCR\n",
|
||||
"# https://huggingface.co/docs/transformers/model_doc/trocr"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from transformers import TrOCRProcessor\n",
|
||||
"from transformers import VisionEncoderDecoderModel\n",
|
||||
"\n",
|
||||
"from PIL import Image\n",
|
||||
"import torch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../../autocropper')\n",
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"import extractorfunctions as ef\n",
|
||||
"import cv2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 86,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-small-printed and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']\n",
|
||||
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"processor = TrOCRProcessor.from_pretrained('microsoft/trocr-small-printed')\n",
|
||||
"model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-small-printed')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 87,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"device = torch.device(\"cpu\")\n",
|
||||
"if torch.cuda.is_available:\n",
|
||||
" device = torch.device(\"cuda:0\")\n",
|
||||
" \n",
|
||||
"model = model.to(device)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 88,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"filename = \"IMG_7640.jpg\"\n",
|
||||
"pathname = \"../test_images/\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 89,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"img = cv2.imread(pathname+filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clarified = mf.houghlineprocessing(img)\n",
|
||||
"lineimages = ef.lineisolator(clarified)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# print(len(lineimages))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"PILversions = []\n",
|
||||
"for line in lineimages:\n",
|
||||
" rgbline = cv2.cvtColor(line, cv2.COLOR_GRAY2RGB)\n",
|
||||
" PILversions.append(Image.fromarray(rgbline))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAogAAAB8CAIAAABlt9bLAAAI4klEQVR4nO3d23ajuBYF0FCj//+XfR7cx52yMVddtrTnfMqopGwQQksSIJbH4/EDAMTwz+q/Lsvy/EFsA0BLy1v0viJ5lZwGgKr+nPrrZVm2kxsAuONcMD+JZwCo5EowAwCVCGYACEQwA0AgghkAAhHMABCIYAaAQNZX/tplpZEaLLgGwMURs+eYAaCGiyNm7tjt1izL+1KpACQhmJs6PtPw+ZeiGiADN3+1Y/4fgF2CuRGpDMARghkAAhHMwzDmBshAMLcgUwE4SDBXJ5UBOE4wA0AgghkAAhHMABCIYAaAQCzJCfDu9z2bq6vhehcc9QjmYTj/oY23Jyk8WEFjgjmix+OhPw6Qk2AO55nE8hggJzd/xSKPAZITzIFIZQAEMwAE8lcwu/kQAPq6OGI26QoANfwXzIbLNShVAE65MmI2XAaAStz8BQCBzLPAyOqkscE9AGOZJ5hXWdgSgLFMHswvEhqAIbjGXJFbsgE4SzBXsSzLhVQW5ABkmcqu6m2e/E6+mnIHSE4wl2TIC5BZkcGVYL5LGAPw83ccfIuGI4EtmOMyrQ2QkGAOquVA/O279AMAKlmWZbeNFcy8e+b0q+qIbYCCdrPZ41IA0NT2nKgRczoHJ8nd1AbQhWAewJFrEgc/p/iHmNkG+Ck6mDGVnUWlEbCBNZnpmFKDYOYu2cw0ri2mC2WZyqaAtxu5U/ndjn+WgNeET8xxpBLBDNdtj66+/dbSMcAGwXzXatsaZzas8UIlkuYn0tEHRiSYq3g8Hjlb5+TZfPagu8V9m6kFchLMUUzT9GS+3nxT8m7NhreSGeXKvY4F1wjmECqdtzlH7VUp0l6e2bxR/tF6hG+beuddQ2QjmGtJO5vNHZnHWLvny5ETarirA3H6E12KbuOYRiiT4y609hszZIJ5Wne6BcHvaOvFnXT11FsAp30xfu7L2S5F1W1eDeCAZ/fBLkuE6xqXS+9VP9965IKZv2xU6ORzAO33PUk21y7YBkPS+7vwOe9dcIN3H+r71hGPX/12n0j8rdTuvHWhbh791UMvmOd0ra7sVtzjtXCIs5o86l0jCN5bvTD/v/rvZcsteKEdV2NHlmX5p96n00ulVH79WcLaknCXZ5Wqyzhfve2+R202IN1a2d2Pa0w1mipFfVOe/ODT/VW7y56A3U/nVMuYnw7mmI1FngNWw9ljGrMOMKKhz9wGURGqfLpE4/Mbg5RDs834c+rLYrbIQY4ZnxwaNqge9dQr2/bxXOrrBqpvbv4i11W3xg6+b2r3f1GKsi0iYaPRMtfTXWPOVpkOOrK2Q6prPPc9Ho/VyqYGdjR94bc8T0dsDUbZ5lwj5ulPyzsOdoFHqdl9bZfk9rLPamklgxasEzOhXMHMNud2d4OGxwVlH+rb/bQuBdvm2cKOp22c9UQnI5hnI1zHpYG7bDsCe6Xy6leXXVQ5guCbNyLBPBWpHMTxpkqLdtCRZemeP6yuBd1Ysy+tsZqQZqS7RDd/aQFpSevWS8cz/fF/239T6usq1bGxmsrdAr/wl90ZMQOzad/+nvrGvuvaHtnUsK+cevPalwZH/PL9DRe+RTBXEb9CNzBK57Qet8YUFPlC5oUN65XNA3UgNoSqCddKaXsXhp/KjllvmMydhkAV5aZQOfSp8eYFL40jdndh+GCewAT1LIMi2Wydljtilpvzt2WtTlLaprI7S1LPoIh6bwi+oPsGnNJga6teuxmrtG8yYoYWDJQpywh1YoIZjtJIFXS/MPN0dPLs6aqBHnMq5Vww6/VDEc6jHx2dQThM7V25xvz53EKEpXaggYIPkER+/mcUr2OhJLcpn7FcnMpe/vb529sbxmlni925ek38lZsGUqowlSQzqXVX9rfzRBgE8W2F/W2av6ewCy8AE2j9uJTAHprD9FIqm0M9/zM066wxjSh3ZRt/MJyEN4vWULYM3aBKe8XbgSjBDKSlfxOZjk57gjkpJ1spBcfNmQ9KhHHzQOV/alNbrvSuj1VElGB2OBnakRfxHjFQNhRXqgxfThXmrAk0ynbyW5Rgnqk9GmVfrm3nxmNyv397e+tG9fjl2idkLr2n+A+kSbtViqUUL7FI7ewaF9/CuNwWTeXyndvWHvm9+yrYZBzQXVFGzNxXuylfXUnGObbNuPm+mxV7vpIceo/OXh3P2UMVzNkNfZKTxHxXBwJuEpcVP5qCuafincF6vUvtyGU5u/yVzJHNs95ollONqvXnp8LNkIylRsUK1Q525+QqaI5sPk7lmcyRA/rXzV/Ha8CgVZxvHNBvSr05zfLaBSlMgqg0+XHxruwjn+7MYVzfaq/3DAYhm5lYxceldlsubVwNQRqsoY/pkQIs8qRZWkXO/ef/VbDMp+dzzEO33ZF1z+YkR9bTxtf8rpxerlWbWjqiLHdlq5rUcLADZFRHR3fW+Cu+MRwx/Mpfb4n7WZNEMqecbYx2Z2W1bi/Fi0LZMqXhg/mNGH7qPpvNk6MAnDVbMPPSJZszd4xk8IaD99O9fvbcxwbd7ullucYMY8ncxfk5ELrJk+ls9Tj7BszkxdudEXN5uy/GadbmNu5ZJ8+SgmYqyZs18HMYLTOuqX179kyVtjvBXFf3yvragNrNWfc9ZXoi+aZ62ez0L8tUNgU4LYlPLf3RuRmEYM6iRqs05etPuu/RZOV5mSud2yq9zEOZR2AqO5HPae07E93ygyOC3EI8ZXW9XLZlj8iUZXtQpeotmDP6PJEyn1qret1n5EBwSvd+jxpbg6ls+KpxozNrG9f96sDclO18BDOEMH3zOv0OdtSrbB3TSgQzbJnyBrde5Ec97fcxQ6n24hozHFLpqrPWrapUxdvyenOqgm3PiBn6yDkQb7nLinfob8nMiBlOKPVux8xNW5uBXdoSrl28aQu2JcEMBWitTtldT77Uh+dUL5uVbRumsoF5SI6nSiv9Ff9MVhkxAz0Fae5bLinTeDL/zncFOTrZCGaAf0V7JWuR7bncD5DKvQhmgA7C3qAeYW3z5AQzAP+p2mNYvWRgaP6m1nuzASCD3TmGszn7Px23ddef5EmFAAAAAElFTkSuQmCC",
|
||||
"text/plain": [
|
||||
"<PIL.Image.Image image mode=RGB size=648x124>"
|
||||
]
|
||||
},
|
||||
"execution_count": 100,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# PILversions[9]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 94,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# image = Image.open(\"../result_images/6.jpg\").convert(\"RGB\")\n",
|
||||
"# image"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 95,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pixel_values = processor(image, return_tensors=\"pt\").pixel_values\n",
|
||||
"# # print(pixel_values.shape)\n",
|
||||
"# # print(image)\n",
|
||||
"# # print(pixel_values)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 96,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pixel_values = processor(image, return_tensors=\"pt\").pixel_values\n",
|
||||
"# # print(pixel_values.shape)\n",
|
||||
"# generated_ids = model.generate(pixel_values)\n",
|
||||
"# generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
|
||||
"# print(generated_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"finalstring = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for image in PILversions:\n",
|
||||
" pixel_values = processor(image, return_tensors=\"pt\").pixel_values\n",
|
||||
" pixel_values = pixel_values.to(device)\n",
|
||||
" generated_ids = model.generate(pixel_values)\n",
|
||||
" generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
|
||||
" finalstring = finalstring + generated_text + \"\\n\"\n",
|
||||
" # print(generated_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 99,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WALKER'S\n",
|
||||
"CHOCOLATES\n",
|
||||
"NO RETURNS OR EXCHANGES\n",
|
||||
"ON FOOD ITEMS.\n",
|
||||
"REG 09-22-2023 12:08\n",
|
||||
"000021\n",
|
||||
"1 BAKING NT $14.40\n",
|
||||
"TL $14.40\n",
|
||||
"CREDIT : $14.40\n",
|
||||
"LIFE S SHORT\n",
|
||||
"EAT CHOCOLATE\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(finalstring)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -18,392 +18,26 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../../autocropper')\n",
|
||||
"import myfunctions as mf\n"
|
||||
"import myfunctions as mf\n",
|
||||
"\n",
|
||||
"import extractorfunctions as ef\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 99,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def rectcenterpt(rect, xywhrect=True, retint=False):\n",
|
||||
" if (xywhrect):\n",
|
||||
" x = rect[0] + rect[2]/2\n",
|
||||
" y = rect[1] + rect[3]/2\n",
|
||||
" else:\n",
|
||||
" x = (rect[0]+rect[2])/2\n",
|
||||
" y = (rect[1]+rect[3])/2\n",
|
||||
" if (retint):\n",
|
||||
" x = int(x)\n",
|
||||
" y = int(y)\n",
|
||||
" return (x,y)\n",
|
||||
"\n",
|
||||
"def containsamount(outerrect, innerrect, percentage=1):\n",
|
||||
" tinyrect = mf.overlapRect([outerrect, innerrect])\n",
|
||||
" tinyarea = tinyrect[2]*tinyrect[3]\n",
|
||||
" innerrectarea = innerrect[2]*innerrect[3]\n",
|
||||
" if (tinyarea/innerrectarea >= percentage):\n",
|
||||
" return True\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"def aboveandbelow(outerrect, innerrect):\n",
|
||||
" if (outerrect[1] < innerrect[1] and outerrect[1]+outerrect[3] > innerrect[1]+innerrect[3]):\n",
|
||||
" return True\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"## Below code is an almost direct copy from https://github.com/scrunts23/CS-Data-Science-Build-Week-1/blob/master/model/dbscan.py\n",
|
||||
"\n",
|
||||
"def dbscan(D, eps, MinPts):\n",
|
||||
" '''\n",
|
||||
" Cluster the dataset `D` using the DBSCAN algorithm.\n",
|
||||
" \n",
|
||||
" dbscan takes a dataset `D` (a list of vectors), a threshold distance\n",
|
||||
" `eps`, and a required number of points `MinPts`.\n",
|
||||
" \n",
|
||||
" It will return a list of cluster labels. The label -1 means noise, and then\n",
|
||||
" the clusters are numbered starting from 1.\n",
|
||||
" '''\n",
|
||||
" \n",
|
||||
" # This list will hold the final cluster assignment for each point in D.\n",
|
||||
" # There are two reserved values:\n",
|
||||
" # -1 - Indicates a noise point\n",
|
||||
" # 0 - Means the point hasn't been considered yet.\n",
|
||||
" # Initially all labels are 0. \n",
|
||||
" labels = [0]*len(D)\n",
|
||||
"\n",
|
||||
" # C is the ID of the current cluster. \n",
|
||||
" C = 0\n",
|
||||
" \n",
|
||||
" # This outer loop is just responsible for picking new seed points--a point\n",
|
||||
" # from which to grow a new cluster.\n",
|
||||
" # Once a valid seed point is found, a new cluster is created, and the \n",
|
||||
" # cluster growth is all handled by the 'expandCluster' routine.\n",
|
||||
" \n",
|
||||
" # For each point P in the Dataset D...\n",
|
||||
" # ('P' is the index of the datapoint, rather than the datapoint itself.)\n",
|
||||
" for P in range(0, len(D)):\n",
|
||||
" \n",
|
||||
" # Only points that have not already been claimed can be picked as new \n",
|
||||
" # seed points. \n",
|
||||
" # If the point's label is not 0, continue to the next point.\n",
|
||||
" if not (labels[P] == 0):\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" # Find all of P's neighboring points.\n",
|
||||
" NeighborPts = region_query(D, P, eps)\n",
|
||||
" \n",
|
||||
" # If the number is below MinPts, this point is noise. \n",
|
||||
" # This is the only condition under which a point is labeled \n",
|
||||
" # NOISE--when it's not a valid seed point. A NOISE point may later \n",
|
||||
" # be picked up by another cluster as a boundary point (this is the only\n",
|
||||
" # condition under which a cluster label can change--from NOISE to \n",
|
||||
" # something else).\n",
|
||||
" if len(NeighborPts) < MinPts:\n",
|
||||
" labels[P] = -1\n",
|
||||
" # Otherwise, if there are at least MinPts nearby, use this point as the \n",
|
||||
" # seed for a new cluster. \n",
|
||||
" else: \n",
|
||||
" C += 1\n",
|
||||
" grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts)\n",
|
||||
" \n",
|
||||
" # All data has been clustered!\n",
|
||||
" return labels\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def grow_cluster(D, labels, P, NeighborPts, C, eps, MinPts):\n",
|
||||
" '''\n",
|
||||
" Grow a new cluster with label `C` from the seed point `P`.\n",
|
||||
" \n",
|
||||
" This function searches through the dataset to find all points that belong\n",
|
||||
" to this new cluster. When this function returns, cluster `C` is complete.\n",
|
||||
" \n",
|
||||
" Parameters:\n",
|
||||
" `D` - The dataset (a list of vectors)\n",
|
||||
" `labels` - List storing the cluster labels for all dataset points\n",
|
||||
" `P` - Index of the seed point for this new cluster\n",
|
||||
" `NeighborPts` - All of the neighbors of `P`\n",
|
||||
" `C` - The label for this new cluster. \n",
|
||||
" `eps` - Threshold distance\n",
|
||||
" `MinPts` - Minimum required number of neighbors\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # Assign the cluster label to the seed point.\n",
|
||||
" labels[P] = C\n",
|
||||
" \n",
|
||||
" # Look at each neighbor of P (neighbors are referred to as Pn). \n",
|
||||
" # NeighborPts will be used as a FIFO queue of points to search--that is, it\n",
|
||||
" # will grow as we discover new branch points for the cluster. The FIFO\n",
|
||||
" # behavior is accomplished by using a while-loop rather than a for-loop.\n",
|
||||
" # In NeighborPts, the points are represented by their index in the original\n",
|
||||
" # dataset.\n",
|
||||
" i = 0\n",
|
||||
" while i < len(NeighborPts): \n",
|
||||
" \n",
|
||||
" # Get the next point from the queue. \n",
|
||||
" Pn = NeighborPts[i]\n",
|
||||
" \n",
|
||||
" # If Pn was labelled NOISE during the seed search, then we\n",
|
||||
" # know it's not a branch point (it doesn't have enough neighbors), so\n",
|
||||
" # make it a leaf point of cluster C and move on.\n",
|
||||
" if labels[Pn] == -1:\n",
|
||||
" labels[Pn] = C\n",
|
||||
" \n",
|
||||
" # Otherwise, if Pn isn't already claimed, claim it as part of C.\n",
|
||||
" elif labels[Pn] == 0:\n",
|
||||
" # Add Pn to cluster C (Assign cluster label C).\n",
|
||||
" labels[Pn] = C\n",
|
||||
" \n",
|
||||
" # Find all the neighbors of Pn\n",
|
||||
" PnNeighborPts = region_query(D, Pn, eps)\n",
|
||||
" \n",
|
||||
" # If Pn has at least MinPts neighbors, it's a branch point!\n",
|
||||
" # Add all of its neighbors to the FIFO queue to be searched. \n",
|
||||
" if len(PnNeighborPts) >= MinPts:\n",
|
||||
" NeighborPts = NeighborPts + PnNeighborPts\n",
|
||||
" # If Pn *doesn't* have enough neighbors, then it's a leaf point.\n",
|
||||
" # Don't queue up it's neighbors as expansion points.\n",
|
||||
" #else:\n",
|
||||
" # Do nothing \n",
|
||||
" #NeighborPts = NeighborPts \n",
|
||||
" \n",
|
||||
" # Advance to the next point in the FIFO queue.\n",
|
||||
" i += 1 \n",
|
||||
" \n",
|
||||
" # We've finished growing cluster C!\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def region_query(D, P, eps):\n",
|
||||
" '''\n",
|
||||
" Find all points in dataset `D` within distance `eps` of point `P`.\n",
|
||||
" \n",
|
||||
" This function calculates the distance between a point P and every other \n",
|
||||
" point in the dataset, and then returns only those points which are within a\n",
|
||||
" threshold distance `eps`.\n",
|
||||
" '''\n",
|
||||
" neighbors = []\n",
|
||||
" \n",
|
||||
" # For each point in the dataset...\n",
|
||||
" for Pn in range(0, len(D)):\n",
|
||||
" \n",
|
||||
" # If the distance is below the threshold, add it to the neighbors list.\n",
|
||||
" if (rectcenterpt(D[P])[1] - rectcenterpt(D[Pn])[1]) < eps:\n",
|
||||
" neighbors.append(Pn)\n",
|
||||
" \n",
|
||||
" return neighbors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def tempfunc(image):\n",
|
||||
" shape = image.shape\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # blackout = np.zeros(tempout.shape, dtype=np.uint8)\n",
|
||||
" \n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)\n",
|
||||
" reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
|
||||
" \n",
|
||||
" tempout = cv2.cvtColor(reducedimage, cv2.COLOR_GRAY2BGR)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
|
||||
" \n",
|
||||
" for i, contour in enumerate(contours):\n",
|
||||
" boundingboxes[i] = cv2.boundingRect(contour)\n",
|
||||
" \n",
|
||||
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/2\n",
|
||||
" \n",
|
||||
" labels = dbscan(boundingboxes, epsilonvalue, 1)\n",
|
||||
" print(labels)\n",
|
||||
" numclusters = max(labels)\n",
|
||||
" lineboxes = [[] for _ in range(numclusters)]\n",
|
||||
"\n",
|
||||
" for i, item in enumerate(labels):\n",
|
||||
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" for i in range(numclusters):\n",
|
||||
" b = mf.mergerects(lineboxes[i])\n",
|
||||
" mergedboxes[i] = b\n",
|
||||
" \n",
|
||||
" j = 0\n",
|
||||
" while (j < len(mergedboxes)):\n",
|
||||
" i = 0\n",
|
||||
" while (i < len(mergedboxes)):\n",
|
||||
" if (i == j):\n",
|
||||
" i += 1\n",
|
||||
" continue\n",
|
||||
" outerbox = mergedboxes[j]\n",
|
||||
" innerbox = mergedboxes[i]\n",
|
||||
" if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
|
||||
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
|
||||
" lineboxes.pop(i)\n",
|
||||
" if (i < j):\n",
|
||||
" j -= 1\n",
|
||||
" i -= 1\n",
|
||||
" i += 1\n",
|
||||
" j += 1\n",
|
||||
" \n",
|
||||
" # return mergedboxes, lineboxes\n",
|
||||
" for i, b in enumerate(mergedboxes):\n",
|
||||
" tempout = cv2.rectangle(tempout, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), (0,255,0), thickness=1)\n",
|
||||
" for t in lineboxes[i]:\n",
|
||||
" tempout = cv2.rectangle(tempout, (t[0],t[1]), (t[0]+t[2], t[1]+t[3]), (0,0,255), thickness=1)\n",
|
||||
" \n",
|
||||
" print(epsilonvalue)\n",
|
||||
" return tempout"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def linerectretriever(image):\n",
|
||||
" shape = image.shape\n",
|
||||
" \n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" reducedimage = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)\n",
|
||||
" reducedimage = cv2.morphologyEx(reducedimage, cv2.MORPH_ERODE, kernel)\n",
|
||||
" \n",
|
||||
" canny = cv2.Canny(reducedimage, 0, 500, None, 3)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" contours, heirarchy = cv2.findContours(canny,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
" boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
|
||||
" \n",
|
||||
" for i, contour in enumerate(contours):\n",
|
||||
" boundingboxes[i] = cv2.boundingRect(contour)\n",
|
||||
" \n",
|
||||
" epsilonvalue = np.median(boundingboxes, axis=0)[3]/3\n",
|
||||
" \n",
|
||||
" labels = dbscan(boundingboxes, epsilonvalue, 1)\n",
|
||||
" # print(labels)\n",
|
||||
" numclusters = max(labels)\n",
|
||||
" lineboxes = [[] for _ in range(numclusters)]\n",
|
||||
"\n",
|
||||
" for i, item in enumerate(labels):\n",
|
||||
" lineboxes[item-1].append(boundingboxes[i].tolist())\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" mergedboxes = np.empty((numclusters,4), dtype=int)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" for i in range(numclusters):\n",
|
||||
" b = mf.mergerects(lineboxes[i])\n",
|
||||
" mergedboxes[i] = b\n",
|
||||
" \n",
|
||||
" j = 0\n",
|
||||
" while (j < len(mergedboxes)):\n",
|
||||
" i = 0\n",
|
||||
" while (i < len(mergedboxes)):\n",
|
||||
" if (i == j):\n",
|
||||
" i += 1\n",
|
||||
" continue\n",
|
||||
" outerbox = mergedboxes[j]\n",
|
||||
" innerbox = mergedboxes[i]\n",
|
||||
" if containsamount(outerbox, innerbox, 1) or aboveandbelow(outerbox, innerbox) or innerbox[3] < epsilonvalue:\n",
|
||||
" mergedboxes = np.delete(mergedboxes, i, axis=0)\n",
|
||||
" lineboxes.pop(i)\n",
|
||||
" if (i < j):\n",
|
||||
" j -= 1\n",
|
||||
" i -= 1\n",
|
||||
" i += 1\n",
|
||||
" j += 1\n",
|
||||
" \n",
|
||||
" return mergedboxes, lineboxes\n",
|
||||
" \n",
|
||||
"def lineimagemaker(thresholded):\n",
|
||||
" lineimages = []\n",
|
||||
" mergedboxes, originalboxes = linerectretriever(thresholded)\n",
|
||||
" \n",
|
||||
" mergedboxesordering = (mergedboxes[:,1]).argsort() # sorted by y value (aka lines from top to bottom)\n",
|
||||
" mergedboxes = mergedboxes[mergedboxesordering]\n",
|
||||
" originalboxes = [originalboxes[i] for i in mergedboxesordering]\n",
|
||||
" for i, box in enumerate(mergedboxes):\n",
|
||||
" mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
|
||||
" whitebackground = np.full(thresholded.shape, fill_value=255, dtype=np.uint8)\n",
|
||||
" for lb in originalboxes[i]:\n",
|
||||
" mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
|
||||
"\n",
|
||||
" invertedmask = cv2.bitwise_not(mask)\n",
|
||||
" whitedscreen = cv2.bitwise_and(whitebackground, whitebackground, mask=invertedmask)\n",
|
||||
" lineimage = cv2.bitwise_and(thresholded, thresholded, mask=mask)\n",
|
||||
" lineimage = cv2.bitwise_or(whitedscreen, lineimage)[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
|
||||
" # lineimage = mf.externaldeskew(lineimage, fill=(255,255,255), alreadygray=True)\n",
|
||||
" # lineimage = thresholded[box[1]:box[1]+box[3], box[0]:box[0]+box[2]]\n",
|
||||
" lineimages.append(lineimage)\n",
|
||||
" # lineimages.append(mask)\n",
|
||||
" return lineimages\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"def lineisolator(image):\n",
|
||||
" imgcopy = image.copy()\n",
|
||||
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
|
||||
" thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
" lineimages = lineimagemaker(thresholded)\n",
|
||||
" \n",
|
||||
" # for i, lineimage in enumerate(lineimages):\n",
|
||||
" # lineimages[i] = cv2.morphologyEx(lineimage, cv2.MORPH_ERODE, kernel)\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" finallineimages = []\n",
|
||||
" for i, lineimage in enumerate(lineimages):\n",
|
||||
" templineimages = lineimagemaker(lineimage)\n",
|
||||
" finallineimages += templineimages\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # mergedboxes, originalboxes = linerectretriever(thresholded) \n",
|
||||
" # mask = np.zeros(thresholded.shape, dtype=np.uint8)\n",
|
||||
" # for i, box in enumerate(mergedboxes):\n",
|
||||
" # for lb in originalboxes[i]:\n",
|
||||
" # mask = cv2.rectangle(mask, (lb[0],lb[1]), (lb[0]+lb[2], lb[1]+lb[3]), (255,255,255), thickness=cv2.FILLED)\n",
|
||||
"\n",
|
||||
" # return mask\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" # out = tempfunc(thresholded)\n",
|
||||
" # return out\n",
|
||||
" \n",
|
||||
" return finallineimages\n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 102,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pathname = \"../adjusted_test_images/\"\n",
|
||||
"filename = \"IMG_7594.jpg\"\n",
|
||||
"# pathname = \"../test_images/\"\n",
|
||||
"pathname = \"../result_images/\"\n",
|
||||
"filename = \"13.jpg\"\n",
|
||||
"\n",
|
||||
"# print(pathname+filename)\n",
|
||||
"img = cv2.imread(pathname+filename)"
|
||||
@ -411,40 +45,131 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 103,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"outs = lineisolator(img)"
|
||||
"# import easyocr\n",
|
||||
"# reader = easyocr.Reader(['en'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
|
||||
"# thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]\n",
|
||||
"# monke = tempfunc(thresholded)\n",
|
||||
"# cv2.imwrite(\"../temp/monke.jpg\", monke)"
|
||||
"def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):\n",
|
||||
" borderType = cv2.BORDER_CONSTANT\n",
|
||||
" out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)\n",
|
||||
" return out\n",
|
||||
"\n",
|
||||
"def mergecontours(contours):\n",
|
||||
" cont = np.vstack(contours)\n",
|
||||
" finalcontour = cv2.convexHull(cont)\n",
|
||||
" return finalcontour"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(img, height=1000))\n",
|
||||
"# # cv2.imshow(\"test1\", mf.ResizeWithAspectRatio(out, height=1000))\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()"
|
||||
"def getSkewAngle(cvImage) -> float:\n",
|
||||
" # Prep image, copy, convert to gray scale, blur, and threshold\n",
|
||||
" newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))\n",
|
||||
" # return newImage\n",
|
||||
" gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)\n",
|
||||
" blur = cv2.GaussianBlur(gray, (9, 9), 0)\n",
|
||||
" thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]\n",
|
||||
"\n",
|
||||
" # Apply dilate to merge text into meaningful lines/paragraphs.\n",
|
||||
" # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.\n",
|
||||
" # But use smaller kernel on Y axis to separate between different blocks of text\n",
|
||||
" kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))\n",
|
||||
" dilate = cv2.dilate(thresh, kernel, iterations=5)\n",
|
||||
" # return dilate\n",
|
||||
"\n",
|
||||
" # Find all contours\n",
|
||||
" contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n",
|
||||
" contours = sorted(contours, key = cv2.contourArea, reverse = True)\n",
|
||||
"\n",
|
||||
" # Find largest contour and surround in min area box\n",
|
||||
" largestContour = contours[0]\n",
|
||||
"\n",
|
||||
" mergedcontour = mergecontours(contours)\n",
|
||||
"\n",
|
||||
" # return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)\n",
|
||||
" minAreaRect = cv2.minAreaRect(mergedcontour)\n",
|
||||
" # return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)\n",
|
||||
" # minAreaRect = cv2.minAreaRect(largestContour)\n",
|
||||
"\n",
|
||||
" box = cv2.boxPoints(minAreaRect)\n",
|
||||
" box = np.intp(box) \n",
|
||||
" newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)\n",
|
||||
" # return newImage\n",
|
||||
"\n",
|
||||
" # Determine the angle. Convert it to the value that was originally used to obtain skewed image\n",
|
||||
" angle = minAreaRect[-1]\n",
|
||||
" print(angle)\n",
|
||||
" if angle > 45:\n",
|
||||
" angle = angle - 90\n",
|
||||
" if angle < -45:\n",
|
||||
" angle = 90 + angle\n",
|
||||
" print(angle)\n",
|
||||
" return angle\n",
|
||||
"\n",
|
||||
"def minboxdeskew(img, fill=(0,0,0)):\n",
|
||||
" angle = getSkewAngle(img)\n",
|
||||
" rotated = mf.rotate(img, angle, fill=fill)\n",
|
||||
" return rotated"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# result = reader.readtext(pathname+filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# outs = ef.lineisolator(img)\n",
|
||||
"# # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
|
||||
"# # outs = linerectretriever(gray)\n",
|
||||
"# outs = getSkewAngle(img)\n",
|
||||
"outs = minboxdeskew(img, fill=(255,255,255))\n",
|
||||
"# outs = img"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# print(outs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -464,6 +189,7 @@
|
||||
" cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs, width=1000))\n",
|
||||
"else:\n",
|
||||
" for i, out in enumerate(outs):\n",
|
||||
" cv2.imwrite(\"../result_images/\"+str(i)+\".jpg\", out)\n",
|
||||
" if (out.shape[0] > out.shape[1]):\n",
|
||||
" cv2.imshow(\"test\"+str(i), mf.ResizeWithAspectRatio(out, height=1350))\n",
|
||||
" else:\n",
|
||||
@ -472,69 +198,6 @@
|
||||
"cv2.destroyAllWindows()\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 107,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(outs[30], width=1000))\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 108,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# results = tempfunc(outs[30])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 109,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cv2.imshow(\"test\", mf.ResizeWithAspectRatio(results, width=1000))\n",
|
||||
"# cv2.waitKey(0)\n",
|
||||
"# cv2.destroyAllWindows()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 110,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# https://medium.com/@vatvenger/extracting-lines-from-ocr-a8f410448fc\n",
|
||||
"# https://www.width.ai/post/the-best-ways-to-extract-text-from-images-without-tesseract-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 111,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Potential Next Steps. Isolate a line of text and then feed that into the OCR Model to extract the text."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 112,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# #IDEA:\n",
|
||||
"# 1. Isolate lines into rectangles\n",
|
||||
"# 2. feed that rectangle portion of the image into an OCR model\n",
|
||||
"# 3. append that to the final output string with the end character for nextline\n",
|
||||
"# 4. give the whole final string to a model which gives the outputs"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@ -10,18 +10,20 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
#-y is for accepting yes when the system asked us for installing the package
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake git gdb pkg-config valgrind systemd-coredump python3-opencv libopencv-dev python3-pip python3-dev && \
|
||||
apt-get install -y build-essential cmake git gdb pkg-config valgrind systemd-coredump python3 python3-opencv libopencv-dev python3-pip && \
|
||||
apt-get -y clean && apt-get -y autoremove
|
||||
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
|
||||
|
||||
RUN pip3 install -q transformers && pip3 install sentencepiece && pip3 install protobuf
|
||||
|
||||
RUN pip3 install datasets && pip3 install jupyter notebook && pip3 install matplotlib && pip3 install deskew
|
||||
|
||||
RUN pip3 install easyocr && pip3 uninstall -y opencv-python-headless
|
||||
|
||||
ENV HF_DATASETS_CACHE="/mnt/code/.cache/datasets"
|
||||
ENV TORCH_HOME="/mnt/code/.cache/torch"
|
||||
|
||||
ENV TRANSFORMERS_CACHE="/mnt/code/.cache/transformers"
|
||||
|
||||
|
||||
97
helpful_links.md
Normal file
@ -0,0 +1,97 @@
|
||||
OpenCV Modifiers:
|
||||
https://docs.opencv.org/3.4/d4/d1b/tutorial_histogram_equalization.html
|
||||
https://stackoverflow.com/questions/39308030/how-do-i-increase-the-contrast-of-an-image-in-python-opencv
|
||||
https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html
|
||||
https://docs.opencv.org/4.x/d7/d1b/group__imgproc__misc.html#ggaa9e58d2860d4afa658ef70a9b1115576a0e50a338a4b711a8c48f06a6b105dd98
|
||||
https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html
|
||||
https://docs.opencv.org/4.x/d9/d61/tutorial_py_morphological_ops.html
|
||||
https://docs.opencv.org/4.x/d9/d8b/tutorial_py_contours_hierarchy.html
|
||||
https://stackoverflow.com/questions/4292249/automatic-calculation-of-low-and-high-thresholds-for-the-canny-operation-in-open
|
||||
https://stackabuse.com/opencv-thresholding-in-python-with-cv2threshold/
|
||||
https://stackoverflow.com/questions/70300189/how-to-keep-only-black-color-text-in-the-image-using-opencv-python
|
||||
https://stackoverflow.com/questions/50210304/change-the-colors-within-certain-range-to-another-color-using-opencv
|
||||
https://answers.opencv.org/question/231191/detect-all-black-pixels-inside-a-surrounded-closed-white-area/
|
||||
|
||||
OpenCV removing straight lines:
|
||||
https://www.appsloveworld.com/opencv/100/83/remove-straight-lines-from-an-image
|
||||
https://docs.opencv.org/3.4/df/d3d/tutorial_py_inpainting.html
|
||||
https://stackoverflow.com/questions/22081908/how-to-determine-the-width-of-the-lines
|
||||
https://www.google.com/search?q=determine+the+thickness+of+a+line+opencv&rlz=1C1CHBF_enCA1042CA1042&oq=determine+the+thickness+of+a+line+opencv&gs_lcrp=EgZjaHJvbWUyBggAEEUYOdIBCTEzNzY3ajBqMagCALACAA&sourceid=chrome&ie=UTF-8
|
||||
|
||||
numpy array sorting:
|
||||
https://stackoverflow.com/questions/12877764/numpy-sort-by-key-function
|
||||
|
||||
|
||||
Random OCR/Opencv idea link:
|
||||
https://pyimagesearch.com/2020/09/21/opencv-automatic-license-number-plate-recognition-anpr-with-python/
|
||||
|
||||
|
||||
DBScan from scratch:
|
||||
https://scrunts23.medium.com/dbscan-algorithm-from-scratch-in-python-475b82e0571c
|
||||
https://github.com/scrunts23/CS-Data-Science-Build-Week-1/blob/master/model/dbscan.py
|
||||
|
||||
|
||||
Dewarping:
|
||||
https://www.google.com/search?q=ocr+preprocessing+dewarping+technique&sca_esv=575939874&rlz=1C1CHBF_enCA1042CA1042&ei=fwU3Zc_1DbzG0PEP8JqS2AY&ved=0ahUKEwiPsYL9rI2CAxU8IzQIHXCNBGsQ4dUDCBA&uact=5&oq=ocr+preprocessing+dewarping+technique&gs_lp=Egxnd3Mtd2l6LXNlcnAiJW9jciBwcmVwcm9jZXNzaW5nIGRld2FycGluZyB0ZWNobmlxdWUyBRAhGKABSNQ_UABYuT5wAXgAkAEBmAH0AaABqRuqAQYzMy40LjG4AQPIAQD4AQHCAg4QABiKBRixAxiDARiRAsICCBAAGIoFGJECwgIXEC4YgAQYsQMYgwEYxwEY0QMYqAMY0gPCAg4QLhiABBjUAhioAxikA8ICCxAAGIAEGLEDGIMBwgIREC4YigUYsQMYgwEYpAMYqAPCAgsQABiKBRixAxiDAcICERAuGIoFGLEDGIMBGJsDGKgDwgIXEC4YgAQYsQMYgwEYxwEY0QMY0gMYqAPCAhEQLhiABBixAxiDARidAxioA8ICBRAAGIAEwgIOEC4YgAQYsQMYqAMYngPCAhEQLhiABBjHARivARioAximA8ICFBAuGIAEGLEDGIMBGKgDGKIFGJ0DwgIREC4YgAQYxwEYrwEYpgMYqAPCAiAQLhiABBjHARivARimAxioAxiXBRjcBBjeBBjgBNgBAcICBhAAGBYYHsICBxAAGA0YgATCAgYQABgeGA3CAggQABgFGB4YDcICCBAAGAgYHhgNwgIEECEYFcICBxAhGKABGAriAwQYACBBiAYBugYGCAEQARgU&sclient=gws-wiz-serp
|
||||
https://www.sciencedirect.com/science/article/pii/S1877050914013787?ref=pdf_download&fr=RR-2&rr=81ade81d3b44a22f
|
||||
https://pdf.sciencedirectassets.com/280203/1-s2.0-S1877050910X00047/1-s2.0-S187705091000373X/main.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEGcaCXVzLWVhc3QtMSJHMEUCIFu4Xcnb89uX05vGUFvLuYtXFkB4zEm3KthkS6WQC84lAiEA%2BfaxLUo3qnMIDznl%2F1Z8nuT4nYq2x2R8xtM9vizqB%2FAquwUIkP%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAFGgwwNTkwMDM1NDY4NjUiDDD0O8JjINDXsH1j5yqPBQxpo0dvDtwGzFPebXNBUY9ExbYgyfGREbngKY%2BkdP6bwchA3xl6dDTqoH25OfaAfrB5PG8JYOaWe2wLr3XOGhmFBRczP1LTPQokg1F2rGdN0KTkOJW45XqtMPCZBHhvOdVgSxUgFvAqq9hp1uZyOLwotngOAcjGl6NiMXtO6Ln9%2FdEMGMBRG3LkmYGgRo%2FrnKQL7bd7CWaj%2FwsPcCLWw95XBejWhRDoMksfFcZFof4gfdkCnM0ygD4PXiTzpWz2OYUDtrAeJ%2FGdR9hlkE0QdVxPsXf1BJUdB2id2Mj1I8yI1iVO2LnpgHGtWa%2BriemOpaUzVoQUP6pqkqa0KCjidP1GRYaVMjQ5YQvfk5pinWcHtLFuI5SSlGFqnb%2F5W7RtE8dDUhR8sHk6Mwdq%2Fzyj5wtXIUOc8QkKZ0AHh15qDZFEaqjWL7HGNB8xL7AOM7C32mcBeQzMdkpo3ETJGTHIHykdfE2IQQ6cwWRNfCCX52B8%2FG6ledEM6wR7z2%2FY7djM%2FFLkttf59gRBXWTG1R1eBqEJG0ousRwbqXcymtTmCu9PIg1e0i8fzvJJyO5tDie0nTiFK8iekF%2FnOaq3iCRWmVk6CgxHSx5LG1xldY7%2BW5PiChAL23YkV9WHislgdmf20fky8dd6SzKRDpWILZsuraS6yrSbLHlX712d34sW6Y2g6dCTfpjJVDOu9SHvMsFig0uCtbBZMHKo%2B6J4eNmarokgZ%2FkI7IRdEYZxsD%2FblRv95N%2FWWgWnMmOwXQJb58%2F39qUlta8vteShDLoLsiauvaxgrcfq6GRHtWh5QlkR0KWglMcQoqv1%2FW6NFjWKxPJaDDuQ9vYvENFw6cNDNWr2okbCqSUe1e5BOTbE06aKNkQwzp7vqQY6sQFsiQzPC9HOL3yRcH%2FCn%2B%2FIb310y25epUToqO8PBHcxoxXgqaPP5gRy27dyTD%2BE6YlME1mAgj95LwhULsn4F5oo1R20oieqksLIT4KPAn1jaVwhK1iIxp1jYsTpgCwEGZ7padLWDmuRmpqmWJzXTV%2FhTFhLuz00WA5PjRIfnvfFXm7yvnirbQMOiBpfqPFeEZbIsnbQTikYUGNyQNvnCtRibfYTVhiph43NtmUSeiTsQts%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20231027T160109Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTYVXDPOIWA%2F20231027%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=fa0f2fe5ed127c227806c2bae2126c88fd31ef05e35820e88f4dc057749bbb3b&hash=184faf0ae8aa00ad4982c3b14ce77f7f5a5444249e74c2a876319d0a7828f356&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S187705091000373X&tid=spdf-57c4ffb1-28d8-4895-8b8e-7189fc466c91&sid=445ee0076e18f74aa1996c48cb48cca0c1bfgxrqa&type=client&tsoh=d3d3LnNjaWVuY2VkaXJlY3QuY29t&ua=19075a5506015058000854&rr=81cc27ef1ff5a214&cc=ca
|
||||
Picking middle points of characters and fitting them to a curve
|
||||
https://mzucker.github.io/2016/08/15/page-dewarping.html
|
||||
https://github.com/mzucker/page_dewarp/blob/master/page_dewarp.py#L344
|
||||
https://www.researchgate.net/publication/322780192_Robust_Document_Image_Dewarping_Method_Using_Text-Lines_and_Line_Segments
|
||||
https://github.com/taeho-kil/Document-Image-Dewarping
|
||||
|
||||
|
||||
OCR:
|
||||
https://github.com/zacharywhitley/awesome-ocr
|
||||
https://eng-mhasan.medium.com/ocr-with-deep-learning-in-pytorch-b8a481c604fc
|
||||
https://huggingface.co/docs/transformers/model_doc/trocr
|
||||
https://deepayan137.github.io/blog/markdown/2020/08/29/building-ocr.html
|
||||
https://www.width.ai/post/the-best-ways-to-extract-text-from-images-without-tesseract-python
|
||||
https://eng-mhasan.medium.com/ocr-with-deep-learning-in-python-e443970d09e4
|
||||
https://github.com/watersink/Character-Segmentation
|
||||
https://github.com/githubharald/WordDetector/blob/master/word_detector/__init__.py
|
||||
|
||||
|
||||
Line Extraction:
|
||||
https://medium.com/@vatvenger/extracting-lines-from-ocr-a8f410448fc
|
||||
https://stackoverflow.com/questions/34981144/split-text-lines-in-scanned-document
|
||||
|
||||
|
||||
|
||||
Cmake and shared/dynamic Libraries:
|
||||
https://www.digitalocean.com/community/tutorials/calling-c-functions-from-python
|
||||
https://www.tutorialspoint.com/how-to-call-a-c-function-in-python
|
||||
https://stackoverflow.com/questions/43387112/wrapping-c-code-with-python-manually
|
||||
https://docs.python.org/3/library/ctypes.html
|
||||
https://stackoverflow.com/questions/38661635/ctypes-struct-returned-from-library
|
||||
https://www.youtube.com/watch?v=Slfwk28vhws
|
||||
https://stackoverflow.com/questions/17511496/how-to-create-a-shared-library-with-cmake
|
||||
|
||||
|
||||
C++ templates and stuff:
|
||||
https://stackoverflow.com/questions/115703/storing-c-template-function-definitions-in-a-cpp-file
|
||||
https://stackoverflow.com/questions/44848011/c-limit-template-type-to-numbers
|
||||
https://stackoverflow.com/questions/4021981/use-static-assert-to-check-types-passed-to-macro/60769143#60769143
|
||||
https://softwareengineering.stackexchange.com/questions/333447/template-restrictions-in-c
|
||||
https://stackoverflow.com/questions/10442404/invoke-a-c-class-method-without-a-class-instance
|
||||
https://en.cppreference.com/w/cpp/language/constraints
|
||||
|
||||
|
||||
Models/Ideas:
|
||||
https://huggingface.co/docs/transformers/model_doc/donut
|
||||
https://huggingface.co/blog/document-ai
|
||||
https://huggingface.co/EleutherAI/gpt-neo-125m
|
||||
https://www.width.ai/post/extracting-information-from-unstructured-text-using-algorithms
|
||||
https://towardsdatascience.com/machine-learning-text-processing-1d5a2d638958
|
||||
https://towardsdatascience.com/deep-learning-for-specific-information-extraction-from-unstructured-texts-12c5b9dceada
|
||||
|
||||
|
||||
NER:
|
||||
https://medium.com/mysuperai/what-is-named-entity-recognition-ner-and-how-can-i-use-it-2b68cf6f545d
|
||||
https://medium.com/@shivamcse17818/bert-model-for-text-extraction-with-code-pytorch-91c13ef82e7b
|
||||
https://github.com/dayyass/pytorch-ner
|
||||
https://github.com/senadkurtisi/pytorch-NER/tree/main
|
||||
https://towardsdatascience.com/named-entity-recognition-with-bert-in-pytorch-a454405e0b6a
|
||||
https://www.kaggle.com/code/dianalaveena/ner-using-bert-pytorch/notebook
|
||||
https://wandb.ai/mostafaibrahim17/ml-articles/reports/Named-Entity-Recognition-With-HuggingFace-Using-PyTorch-and-W-B--Vmlldzo0NDgzODA2
|
||||