2023-11-13 23:24:04 -05:00
2 changed files with 255 additions and 32 deletions
--- a/code/autocropper/houghlinedevspace.ipynb
+++ b/code/autocropper/houghlinedevspace.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 136,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
@ -15,7 +15,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 137,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 138,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
@ -99,7 +99,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 139,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
@ -228,7 +228,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 140,
+   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
@ -250,7 +250,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 141,
+   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
@ -270,6 +270,7 @@
    "    kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))\n",
    "    kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))\n",
    "    kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))\n",
+    "    adaptivekernel = None\n",
    "    \n",
    "    # return lab[:,:,2]\n",
    "\n",
@ -283,11 +284,32 @@
    "    # imglist = []\n",
    "\n",
    "    Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)\n",
+    "    \n",
+    "    # return Bthresh\n",
+    "\n",
+    "    contours, heirarchy = cv2.findContours(255-Bthresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "    # imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)\n",
+    "    # return imgcopy\n",
+    "    \n",
+    "    boundingboxes = np.empty((len(contours), 4), dtype=int)\n",
+    "\n",
+    "    for i, contour in enumerate(contours):\n",
+    "        b = cv2.boundingRect(contour)\n",
+    "        boundingboxes[i] = b\n",
+    "    #     imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)\n",
+    "    # return imgcopy\n",
+    "                         \n",
+    "    epsilonvalue = np.median(boundingboxes, axis=0)[3]\n",
+    "    \n",
+    "    adaptivekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(epsilonvalue/15), int(epsilonvalue/15)))\n",
+    "    \n",
    "    # imglist.append(Bthresh)\n",
    "    # imglist.append(255-Bthresh)\n",
    "    \n",
    "    morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)\n",
+    "    # morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, adaptivekernel, iterations=2)\n",
    "    goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)\n",
+    "    # goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, adaptivekernel, iterations=3)\n",
    "    # morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)\n",
    "    # imglist.append(morphedBthresh)\n",
    "    # imglist.append(goodmorphBthresh)\n",
@ -319,7 +341,9 @@
    "            mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)\n",
    "            \n",
    "    bingus = cv2.bitwise_or(goodmorphBthresh, mask)\n",
+    "    # bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel)\n",
    "    # imglist.append(bingus)\n",
+    "    # return imglist\n",
    "    return bingus\n",
    "        \n",
    "    # imglist.append(image)\n",
@ -427,7 +451,103 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 142,
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# funtion to correct the median-angle to give it to the cv2.warpaffine() function\n",
+    "def anglecorrector(angle):\n",
+    "        if 0 <= angle <= 90:\n",
+    "            corrected_angle = angle - 90\n",
+    "        elif -45 <= angle < 0:\n",
+    "            corrected_angle = angle - 90\n",
+    "        elif -90 <= angle < -45:\n",
+    "            corrected_angle = 90 + angle\n",
+    "        return corrected_angle"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):\n",
+    "    borderType = cv2.BORDER_CONSTANT\n",
+    "    out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)\n",
+    "    return out\n",
+    "\n",
+    "def mergecontours(contours):\n",
+    "    cont = np.vstack(contours)\n",
+    "    finalcontour = cv2.convexHull(cont)\n",
+    "    return finalcontour"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def getSkewAngle(cvImage) -> float:\n",
+    "    # Prep image, copy, convert to gray scale, blur, and threshold\n",
+    "    newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))\n",
+    "    # return newImage\n",
+    "    gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)\n",
+    "    blur = cv2.GaussianBlur(gray, (9, 9), 0)\n",
+    "    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]\n",
+    "\n",
+    "    # Apply dilate to merge text into meaningful lines/paragraphs.\n",
+    "    # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.\n",
+    "    # But use smaller kernel on Y axis to separate between different blocks of text\n",
+    "    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))\n",
+    "    dilate = cv2.dilate(thresh, kernel, iterations=5)\n",
+    "    # return dilate\n",
+    "\n",
+    "    # Find all contours\n",
+    "    contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "    contours = sorted(contours, key = cv2.contourArea, reverse = True)\n",
+    "\n",
+    "    # Find largest contour and surround in min area box\n",
+    "    largestContour = contours[0]\n",
+    "\n",
+    "    mergedcontour = mergecontours(contours)\n",
+    "\n",
+    "    # return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)\n",
+    "    minAreaRect = cv2.minAreaRect(mergedcontour)\n",
+    "    minAreaRect = list(minAreaRect)\n",
+    "    minAreaRect[1] = list(minAreaRect[1])\n",
+    "    if (minAreaRect[1][0] > minAreaRect[1][1]):\n",
+    "        temp = minAreaRect[1][0]\n",
+    "        minAreaRect[1][0] = minAreaRect[1][1]\n",
+    "        minAreaRect[1][1] = temp\n",
+    "        minAreaRect[2] -= 90\n",
+    "    # return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)\n",
+    "    # minAreaRect = cv2.minAreaRect(largestContour)\n",
+    "\n",
+    "    box = cv2.boxPoints(minAreaRect)\n",
+    "    box = np.intp(box)       \n",
+    "    newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)\n",
+    "    # return newImage\n",
+    "\n",
+    "    # Determine the angle. Convert it to the value that was originally used to obtain skewed image\n",
+    "    angle = minAreaRect[-1]\n",
+    "    # print(angle)\n",
+    "    angle = anglecorrector(angle)+90\n",
+    "    # print(angle)\n",
+    "    return angle\n",
+    "\n",
+    "def minboxdeskew(img, fill=(0,0,0)):\n",
+    "    colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)\n",
+    "    angle = getSkewAngle(colourimg)\n",
+    "    padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)\n",
+    "    rotated = mf.rotate(padimg, angle, fill=fill)\n",
+    "    return rotated"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
@ -435,11 +555,11 @@
    "    whitedbackground = mf.whiteoutbackground(image)\n",
    "    # return whitedbackground\n",
    "    \n",
-    "    textrefined = mf.textClarifying(whitedbackground)\n",
+    "    textrefined = textClarifying(whitedbackground)\n",
    "    # return textrefined\n",
    "    #maybe now is when I put in the line removing function\n",
    "    \n",
-    "    lineout = removeLinesFromText(textrefined)\n",
+    "    lineout = mf.removeLinesFromText(textrefined)\n",
    "    \n",
    "    return lineout\n",
    "    # implement a function that's called refine text\n",
@ -453,9 +573,13 @@
    "    # return postprocessed\n",
    "    postprocessed = mf.croptoblack(postprocessed)\n",
    "    \n",
-    "    postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
+    "    # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
+    "    # return postprocessed\n",
    "    \n",
-    "    final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
+    "    # final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
+    "    final = minboxdeskew(postprocessed, fill=(255,255,255))\n",
+    "    \n",
+    "    # final = mf.croptoblack(final)\n",
    "    \n",
    "    # cv2.imshow(\"postprocessed\", mf.ResizeWithAspectRatio(postprocessed, 1000))\n",
    "    # cv2.imshow(\"final\", mf.ResizeWithAspectRatio(final, 1000))\n",
@ -467,19 +591,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 143,
+   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# prepped, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
-    "outs = houghlineprocessing(img)\n",
+    "outs = mf.houghlineprocessing(img)\n",
    "# print(croprect)\n",
    "#need to fix premorphCrop. it removes too much"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 144,
+   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
@ -493,7 +617,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 145,
+   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
@ -505,7 +629,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 146,
+   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
--- a/code/autocropper/myfunctions.py
+++ b/code/autocropper/myfunctions.py
@ -81,6 +81,28 @@ def colourscaler(n, min, max):
    diff = abs(max - min)
    return clip((temp/diff)*255, 0, 255)

+def padWithColour(img, hpadding=0, vpadding=0, fill=(0,0,0)):
+    borderType = cv2.BORDER_CONSTANT
+    out = cv2.copyMakeBorder(img, vpadding, vpadding, hpadding, hpadding, borderType, None, fill)
+    return out
+
+def mergecontours(contours):
+    cont = np.vstack(contours)
+    finalcontour = cv2.convexHull(cont)
+    return finalcontour
+
+
+# funtion to correct the median-angle to give it to the cv2.warpaffine() function
+# specifically, when getting the angle from a minAreaRect rectangle
+def anglecorrector(angle):
+        if 0 <= angle <= 90:
+            corrected_angle = angle - 90
+        elif -45 <= angle < 0:
+            corrected_angle = angle - 90
+        elif -90 <= angle < -45:
+            corrected_angle = 90 + angle
+        return corrected_angle
+
 tensorize = v2.Compose([v2.ToImageTensor(), v2.ConvertImageDtype()]) ## for converting an image (usually PIL image) to a pytorch tensor

 ## ------------------------------for selective segmentation search crop------------------------------
@ -649,23 +671,19 @@ def textClarifying(image):
    
    ## Try using the LAB colour space???
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    # autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)
+    autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)
    
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
-    # hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
+    hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
    
-    # kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
-    # kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
+    kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
    kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    kernel4 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
-    # kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
+    kernel5 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
    kernel6 = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 2))
    kernel7 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 8))
-    
-    #This will probably need fixing later because it should be in proportion to the character/word height vs actual picture dimension
-    kdim = 5-(max(gray.shape[0], gray.shape[1])//1000)
-    print(kdim)
-    kernel8 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kdim, kdim))
+    adaptivekernel = None
    
    # return lab[:,:,2]

@ -679,11 +697,32 @@ def textClarifying(image):
    # imglist = []

    Bthresh = cv2.adaptiveThreshold(currentimgofatype, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 201, 35)
+    
+    # return Bthresh
+
+    contours, heirarchy = cv2.findContours(255-Bthresh,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    # imgcopy = cv2.drawContours(imgcopy, contours, -1, color=(0,255,0), thickness=1)
+    # return imgcopy
+    
+    boundingboxes = np.empty((len(contours), 4), dtype=int)
+
+    for i, contour in enumerate(contours):
+        b = cv2.boundingRect(contour)
+        boundingboxes[i] = b
+    #     imgcopy = cv2.rectangle(imgcopy, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 128, thickness=3)
+    # return imgcopy
+                         
+    epsilonvalue = np.median(boundingboxes, axis=0)[3]
+    
+    adaptivekernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(epsilonvalue/15), int(epsilonvalue/15)))
+    
    # imglist.append(Bthresh)
    # imglist.append(255-Bthresh)
    
    morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, kernel3, iterations=2)
+    # morphedBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_DILATE, adaptivekernel, iterations=2)
    goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, kernel4, iterations=2)
+    # goodmorphBthresh = cv2.morphologyEx(Bthresh, cv2.MORPH_ERODE, adaptivekernel, iterations=3)
    # morphedBthresh = cv2.morphologyEx(morphedBthresh, cv2.MORPH_DILATE, kernel7)
    # imglist.append(morphedBthresh)
    # imglist.append(goodmorphBthresh)
@ -698,7 +737,7 @@ def textClarifying(image):
    
    
    # imglist.append(morphedthresh)
-    # anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)
+    anded1 = cv2.bitwise_and(255-Bthresh, morphedthresh)
    anded2 = cv2.bitwise_and(morphedBthresh, 255-morphedthresh)
    # imglist.append(anded1)
    # imglist.append(anded2)
@ -715,8 +754,9 @@ def textClarifying(image):
            mask = cv2.rectangle(mask, (b[0],b[1]), (b[0]+b[2], b[1]+b[3]), 0, thickness=cv2.FILLED)
            
    bingus = cv2.bitwise_or(goodmorphBthresh, mask)
-    bingus = cv2.morphologyEx(bingus, cv2.MORPH_ERODE, kernel8)
-
+    # bingus = cv2.morphologyEx(bingus, cv2.MORPH_CLOSE, adaptivekernel)
+    # imglist.append(bingus)
+    # return imglist
    return bingus


@ -942,7 +982,60 @@ def externaldeskew(image, fill=(0,0,0), alreadygray=False):
    rotated = rotate(image, angle, fill=fill)
    return rotated

+def getreceipttextAngle(cvImage) -> float:
+    # Prep image, copy, convert to gray scale, blur, and threshold
+    newImage = padWithColour(cvImage, hpadding=50, vpadding=50, fill=(255,255,255))
+    # return newImage
+    gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
+    blur = cv2.GaussianBlur(gray, (9, 9), 0)
+    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

+    # Apply dilate to merge text into meaningful lines/paragraphs.
+    # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
+    # But use smaller kernel on Y axis to separate between different blocks of text
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
+    dilate = cv2.dilate(thresh, kernel, iterations=5)
+    # return dilate
+
+    # Find all contours
+    contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    contours = sorted(contours, key = cv2.contourArea, reverse = True)
+
+    # Find largest contour and surround in min area box
+    largestContour = contours[0]
+
+    mergedcontour = mergecontours(contours)
+
+    # return cv2.drawContours(newImage, [mergedcontour], -1, (0,255,0), thickness=3)
+    minAreaRect = cv2.minAreaRect(mergedcontour)
+    minAreaRect = list(minAreaRect)
+    minAreaRect[1] = list(minAreaRect[1])
+    if (minAreaRect[1][0] > minAreaRect[1][1]):
+        temp = minAreaRect[1][0]
+        minAreaRect[1][0] = minAreaRect[1][1]
+        minAreaRect[1][1] = temp
+        minAreaRect[2] -= 90
+    # return cv2.drawContours(newImage, [largestContour], -1, (0,255,0), thickness=3)
+    # minAreaRect = cv2.minAreaRect(largestContour)
+
+    box = cv2.boxPoints(minAreaRect)
+    box = np.intp(box)       
+    newImage = cv2.drawContours(newImage, [box], -1, (0,255,0), thickness=3)
+    # return newImage
+
+    # Determine the angle. Convert it to the value that was originally used to obtain skewed image
+    angle = minAreaRect[-1]
+    # print(angle)
+    angle = anglecorrector(angle)+90
+    # print(angle)
+    return angle
+
+def receipttextdeskew(img, fill=(0,0,0)):
+    colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    angle = getreceipttextAngle(colourimg)
+    padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)
+    rotated = rotate(padimg, angle, fill=fill)
+    return rotated

 ## ------------------------------Full deskewing and cropping------------------------------
 def houghlineprocessing(image):
@ -953,10 +1046,16 @@ def houghlineprocessing(image):
    postprocessed = cropclarifying(croppedanddeskewed)
    # return postprocessed
    postprocessed = croptoblack(postprocessed)
+    
+    # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)
+    # return postprocessed
+    
+    # final = externaldeskew(postprocessed, fill=(255,255,255))
+    final = receipttextdeskew(postprocessed, fill=(255,255,255))
+    
+    final = cv2.cvtColor(final, cv2.COLOR_GRAY2BGR)

-    postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)
-
-    final = externaldeskew(postprocessed, fill=(255,255,255))
+    # final = mf.croptoblack(final)

    # cv2.imshow("postprocessed", mf.ResizeWithAspectRatio(postprocessed, 1000))
    # cv2.imshow("final", mf.ResizeWithAspectRatio(final, 1000))