Merge pull request 'Checkpoint in autocropping/binarization' (#26) from autocropper into main

Reviewed-on: #26
2023-12-05 17:06:10 -05:00 · 2023-12-05 17:06:10 -05:00 · 233c84e61e
commit 233c84e61e
parent a46015c778 839d987055
2 changed files with 794 additions and 276 deletions
--- a/code/autocropper/houghlinedevspace.ipynb
+++ b/code/autocropper/houghlinedevspace.ipynb
@ -2,9 +2,20 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 2312,
+   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/dist-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
+      "  warnings.warn(_BETA_TRANSFORMS_WARNING)\n",
+      "/usr/local/lib/python3.10/dist-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().\n",
+      "  warnings.warn(_BETA_TRANSFORMS_WARNING)\n"
+     ]
+    }
+   ],
   "source": [
    "import cv2\n",
    "import myfunctions as mf\n",
@ -15,200 +26,47 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2313,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
-    "# img = cv2.imread('/mnt/dataset/baseimages/1.jpg')\n",
-    "img = cv2.imread('/mnt/code/autocropper/test_images/IMG_7594.jpg')"
+    "import os\n",
+    "import pathlib\n",
+    "import time\n",
+    "\n",
+    "def removeextensionandnumeric(filename):\n",
+    "    suffix = pathlib.Path(filename).suffix\n",
+    "    num = filename[:-len(suffix)]\n",
+    "    numint = int(num)\n",
+    "    return numint\n",
+    "    \n",
+    "\n",
+    "def testondataset(pathtodataset, function):\n",
+    "    imagefileextensions = [\".jpg\", \".png\"]\n",
+    "    filenames = next(os.walk(pathtodataset), (None, None, []))[2]\n",
+    "    \n",
+    "    filenames.sort(key=removeextensionandnumeric)\n",
+    "    # print(filenames)\n",
+    "    outs = []\n",
+    "    tdiffs = []\n",
+    "    for filename in filenames:\n",
+    "        suffix = pathlib.Path(filename).suffix\n",
+    "        if (suffix not in imagefileextensions):\n",
+    "            print(\"Not a valid image \"+filename)\n",
+    "            continue\n",
+    "        img = cv2.imread(pathtodataset+filename)\n",
+    "        t1 = time.time()\n",
+    "        outs.append(function(img))\n",
+    "        tdiffs.append(time.time() - t1)\n",
+    "    tdiffs = np.array(tdiffs)\n",
+    "    print(\"average time: \" + str(np.mean(tdiffs))+\"(s)\")\n",
+    "    return outs\n",
+    "    "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2314,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def whiteoutbackground(image):\n",
-    "    imagecpy = image.copy()\n",
-    "    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
-    "    \n",
-    "    # blur = cv2.blur(gray, (7,7))\n",
-    "    \n",
-    "    # window = 51\n",
-    "    window = min(gray.shape[1], gray.shape[0])//20\n",
-    "    if window % 2 == 0:\n",
-    "        window += 1\n",
-    "    thresh1 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, window, 5)\n",
-    "    thresh2 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]\n",
-    "    thresh = cv2.bitwise_and(thresh1, thresh2)\n",
-    "    # return thresh\n",
-    "\n",
-    "    # dim = int(min(thresh.shape[0], thresh.shape[1])/400)\n",
-    "    dim = 3\n",
-    "    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
-    "    morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)\n",
-    "    # return morphedthresh\n",
-    "    \n",
-    "    \n",
-    "    \n",
-    "    # contours1, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
-    "    contours2, heirarchy = cv2.findContours(morphedthresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
-    "    \n",
-    "    # biggestcontour1 = max(contours1, key=cv2.contourArea)\n",
-    "    biggestcontour2 = max(contours2, key=cv2.contourArea)\n",
-    "    # imagecpy = cv2.drawContours(imagecpy, [biggestcontour1], -1, (0,255,0), thickness=3)\n",
-    "    # imagecpy = cv2.drawContours(imagecpy, [biggestcontour2], -1, (0,0,255), thickness=3)\n",
-    "    # return imagecpy\n",
-    "    \n",
-    "    blank = np.full(thresh.shape, 255, dtype=np.uint8)\n",
-    "    mask = blank.copy()\n",
-    "    # mask = cv2.drawContours(mask, [biggestcontour1], -1, (0,0,0), thickness=cv2.FILLED)\n",
-    "    mask = cv2.drawContours(mask, [biggestcontour2], -1, (0,0,0), thickness=cv2.FILLED)\n",
-    "\n",
-    "    # return mask\n",
-    "\n",
-    "    invertmask = 255 - mask\n",
-    "    \n",
-    "    \n",
-    "    dim = int(min(invertmask.shape[0], invertmask.shape[1])/200)\n",
-    "    # # dim = 21\n",
-    "    # print(dim)\n",
-    "    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
-    "    # invertmask = cv2.morphologyEx(invertmask, cv2.MORPH_DILATE, kernel)\n",
-    "    mask = 255 - cv2.morphologyEx(invertmask, cv2.MORPH_ERODE, kernel, iterations=1)\n",
-    "    # return mask\n",
-    "    \n",
-    "    mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)\n",
-    "    whitedbackground = cv2.bitwise_or(image, mask)\n",
-    "    # return whitedbackground\n",
-    "    \n",
-    "    gray2 = cv2.cvtColor(whitedbackground, cv2.COLOR_BGR2GRAY)\n",
-    "\n",
-    "    canny = cv2.Canny(gray2, 0, 500, None, 3)\n",
-    "    \n",
-    "    maskgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)\n",
-    "    \n",
-    "    \n",
-    "\n",
-    "    invert = 255-maskgray\n",
-    "    # dim = 21\n",
-    "    # print(dim)\n",
-    "    dim = int(min(maskgray.shape[0], maskgray.shape[1])/200)\n",
-    "    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
-    "    morphedmask = cv2.morphologyEx(invert, cv2.MORPH_ERODE, kernel)\n",
-    "    dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)\n",
-    "    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
-    "    morphedmask = cv2.morphologyEx(morphedmask, cv2.MORPH_OPEN, kernel, iterations=7)\n",
-    "    # return 255 - morphedmask\n",
-    "    morphedmask = 255 - morphedmask\n",
-    "\n",
-    "    finalmask = cv2.bitwise_or(morphedmask, maskgray)\n",
-    "    # return finalmask\n",
-    "    \n",
-    "    # edgecontours, _ = cv2.findContours(255-morphedmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
-    "    finalmaskbgr = cv2.cvtColor(finalmask, cv2.COLOR_GRAY2BGR)\n",
-    "    # return finalmaskbgr\n",
-    "\n",
-    "    whitedbackground = cv2.bitwise_or(whitedbackground, finalmaskbgr)\n",
-    "    # return whitedbackground\n",
-    "    \n",
-    "    test = cv2.inpaint(whitedbackground, finalmask, 3, cv2.INPAINT_TELEA)\n",
-    "    return test"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2315,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def cropclarifying(image):\n",
-    "    whitedbackground = whiteoutbackground(image)\n",
-    "    # return whitedbackground\n",
-    "\n",
-    "    textrefined = mf.textClarifying(whitedbackground)\n",
-    "    # return textrefined\n",
-    "    #maybe now is when I put in the line removing function\n",
-    "\n",
-    "    lineout = mf.removeLinesFromText(textrefined)\n",
-    "\n",
-    "    return lineout\n",
-    "    # implement a function that's called refine text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2316,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def houghlineprocessing(image):\n",
-    "    croppedanddeskewed, _ = mf.houghlinedeskewandcrop(image)\n",
-    "    \n",
-    "    postprocessed = cropclarifying(croppedanddeskewed)\n",
-    "    # return postprocessed\n",
-    "    postprocessed = mf.croptoblack(postprocessed)\n",
-    "    \n",
-    "    # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
-    "    # return postprocessed\n",
-    "    \n",
-    "    # final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
-    "    final = mf.receipttextdeskew(postprocessed, fill=(255,255,255))\n",
-    "    \n",
-    "    # final = mf.croptoblack(final)\n",
-    "    \n",
-    "    # cv2.imshow(\"postprocessed\", mf.ResizeWithAspectRatio(postprocessed, 1000))\n",
-    "    # cv2.imshow(\"final\", mf.ResizeWithAspectRatio(final, 1000))\n",
-    "    # cv2.waitKey(0)\n",
-    "    # cv2.destroyAllWindows()\n",
-    "    \n",
-    "    return final"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2317,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# prepped, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
-    "outs = mf.houghlineprocessing(img)\n",
-    "# outs = houghlinedeskewandcrop(img)\n",
-    "# outs = outs[0]\n",
-    "# print(croprect)\n",
-    "#need to fix premorphCrop. it removes too much"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2318,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# shrunk, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
-    "# shrunk1, croprect = mf.premorphCrop(shrunk)\n",
-    "# print(croprect)\n",
-    "# print(int(30*4.032 - 0))\n",
-    "# # temp = img[100:, :, :]\n",
-    "# temp = shrunk[croprect[1]:croprect[1]+croprect[3], croprect[0]:croprect[0]+croprect[2], :]\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2319,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# cv2.imshow(\"temp\", mf.ResizeWithAspectRatio(out, height=1000))\n",
-    "# # cv2.imshow(\"shrunk1\", mf.ResizeWithAspectRatio(shrunk1, height=1000))\n",
-    "# cv2.waitKey(0)\n",
-    "# cv2.destroyAllWindows()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2320,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@ -230,16 +88,524 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2321,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
-    "showimgs(outs)"
+    "def writeimgs(directorypath, imgs):\n",
+    "    if (isinstance(imgs, np.ndarray)):\n",
+    "        cv2.imwrite(directorypath+\"test.png\", imgs)\n",
+    "    else:\n",
+    "        for i, out in enumerate(imgs):\n",
+    "            cv2.imwrite(directorypath+\"test\"+str(i)+\".png\", out)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2322,
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "img = cv2.imread('/mnt/dataset/baseimages/12.jpg')\n",
+    "# img = cv2.imread('/mnt/code/autocropper/test_images/IMG_7605.jpg')\n",
+    "testall = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## NEED TO FIX THE EARLIER PARTS SO THAT IT DOESN'T HAVE THOSE BLACK SECTIONS AFTER THE ROTATION\n",
+    "\n",
+    "\n",
+    "def whiteoutbackground(image):\n",
+    "    ogshape = image.shape\n",
+    "    shrunkdim=1000\n",
+    "    if (image.shape[1] > image.shape[0]):\n",
+    "        shrunkimg, scaler = mf.ResizeWithAspectRatio(image, width=shrunkdim, retscale=True)\n",
+    "    else:\n",
+    "        shrunkimg, scaler = mf.ResizeWithAspectRatio(image, height=shrunkdim, retscale=True)\n",
+    "        \n",
+    "    mainimage = shrunkimg\n",
+    "    \n",
+    "    sdim = int(min(mainimage.shape[0], mainimage.shape[1])/5)\n",
+    "    srkernel = cv2.getStructuringElement(cv2.MORPH_RECT, (sdim, sdim))\n",
+    "    skernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (sdim, sdim))\n",
+    "    \n",
+    "    \n",
+    "    lab = cv2.cvtColor(mainimage, cv2.COLOR_BGR2LAB)\n",
+    "    \n",
+    "    imglist = []\n",
+    "    # imglist.append(mainimage)\n",
+    "    \n",
+    "    labl = lab[:,:,0]\n",
+    "    # imglist.append(labl)\n",
+    "    # imglist.append(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))\n",
+    "    laba = lab[:,:,1]\n",
+    "    # imglist.append(laba)\n",
+    "    labb = lab[:,:,2]\n",
+    "    # imglist.append(labb)\n",
+    "    \n",
+    "    \n",
+    "    # canny = cv2.Canny(labl, 0, 500)\n",
+    "    threshl = cv2.threshold(labl, 0, 255, cv2.THRESH_OTSU)[1]\n",
+    "    # return threshl\n",
+    "    \n",
+    "    \n",
+    "    dim = int(min(mainimage.shape[0], mainimage.shape[1])/100)\n",
+    "    # dim = 2\n",
+    "    # dim = dotsize\n",
+    "    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
+    "    kernelell = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dim, dim))\n",
+    "    \n",
+    "    paddedl = mf.padWithColour(threshl, sdim*2, sdim*2, fill=0)\n",
+    "    # return paddedl\n",
+    "    \n",
+    "    \n",
+    "    # morphedl = 255-cv2.morphologyEx(255-threshl, cv2.MORPH_OPEN, kernel, iterations=3)\n",
+    "    morphedl = paddedl\n",
+    "    # morphedl = cv2.morphologyEx(morphedl, cv2.MORPH_ERODE, kernel, iterations=1)\n",
+    "    morphed1l = cv2.morphologyEx(morphedl, cv2.MORPH_ERODE, kernelell, iterations=1)\n",
+    "\n",
+    "    # return morphedl\n",
+    "    \n",
+    "    contours, heirarchy = cv2.findContours(morphed1l, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "    biggestcontour = max(contours, key=cv2.contourArea)\n",
+    "    \n",
+    "    \n",
+    "    blank = np.full(labl.shape, 255, dtype=np.uint8)\n",
+    "    mask1 = blank.copy()\n",
+    "    mask1 = mf.padWithColour(mask1, sdim*2, sdim*2, fill=255)\n",
+    "    mask1 = cv2.drawContours(mask1, [biggestcontour], -1, 0, thickness=cv2.FILLED)\n",
+    "    \n",
+    "    \n",
+    "    mask1 = cv2.morphologyEx(mask1, cv2.MORPH_DILATE, kernelell, iterations=2)\n",
+    "    \n",
+    "    \n",
+    "    # mask1 = mask1[(sdim*2):-(sdim*2), (sdim*2):-(sdim*2)]\n",
+    "    # return mask1\n",
+    "    \n",
+    "    # morphed2l = mf.padWithColour(morphedl, sdim*2, sdim*2, fill=255)\n",
+    "    morphed2l = cv2.morphologyEx(morphedl, cv2.MORPH_OPEN, kernel, iterations=1)\n",
+    "    # morphed2l = morphed2l[(sdim*2):-(sdim*2), (sdim*2):-(sdim*2)]\n",
+    "    \n",
+    "    # return morphed2l\n",
+    "    # print(mask1.shape)\n",
+    "    # print(morphed2l.shape)\n",
+    "    morphed2l = cv2.bitwise_or(morphed2l, 255-mask1)\n",
+    "    # return morphed2l\n",
+    "    \n",
+    "    morphed2l = morphed2l[(sdim*2):-(sdim*2), (sdim*2):-(sdim*2)]\n",
+    "    temp_final = cv2.bitwise_or(threshl, 255-morphed2l)\n",
+    "    return temp_final\n",
+    "    \n",
+    "    canny = cv2.Canny(morphed2l, 0, 500)\n",
+    "    # return canny\n",
+    "\n",
+    "    vminlength = mainimage.shape[0]//10\n",
+    "    vmaxgap = mainimage.shape[0]//50\n",
+    "    vlinesP = cv2.HoughLinesP(canny, 1, np.pi / 180, 10, None, vminlength, vmaxgap)\n",
+    "    \n",
+    "    hminlength = mainimage.shape[1]//15\n",
+    "    hmaxgap = mainimage.shape[1]//40\n",
+    "    hlinesP = cv2.HoughLinesP(canny, 1, np.pi / 180, 10, None, hminlength, hmaxgap)\n",
+    "    # print(linesP)\n",
+    "    \n",
+    "    vmarginlines = mf.WithinXDegrees(vlinesP, 15)\n",
+    "    hmarginlines = mf.WithinXDegrees(hlinesP, 15, baseangle=90)\n",
+    "    \n",
+    "    marginlines = np.append(vmarginlines, hmarginlines, axis=0)\n",
+    "    # marginlines = marginlines.astype(int)\n",
+    "    # # print(marginlines)\n",
+    "    # reshaped = np.reshape(marginlines, (-1,1, 2))\n",
+    "    # # reshaped = cv2.convexHull(reshaped)\n",
+    "    # # print(reshaped)\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    colourdst = cv2.cvtColor(morphedl, cv2.COLOR_GRAY2BGR)\n",
+    "    # out = cv2.drawContours(colourdst, [reshaped], -1, (0,255,0), thickness=3)\n",
+    "    # return out\n",
+    "    \n",
+    "    \n",
+    "    #### NEW IDEA: MERGE THE WHITEOUT BACKGROUND AND TEXT CLARIFICATION STEP BECAUSE DOING THE OTSU THRESHOLD SEEMS TO WORK PRETTY WELL AND IF I JUST WHITE OUT THE OUTER AREA (ACTUALLY WHITE)\n",
+    "    # THEN I HAVE JUST THE TEXT\n",
+    "    \n",
+    "\n",
+    "    if marginlines is not None:\n",
+    "        for l in marginlines:\n",
+    "            cv2.line(colourdst, (int(l[0]), int(l[1])), (int(l[2]), int(l[3])), (0,0,255), 3, cv2.LINE_AA)\n",
+    "    return colourdst\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "    ## IDEA:\n",
+    "    # MASK OUT THE WORDS USING OUR MASKS MADE FROM THE STUFF BELOW. THEN WHEN CANNY IS DONE TO IT, IT SHOULDN'T HAVE A WHOLE BUNCH OF SHIT IN THE CENTER. STILL NEED TO FIGURE OUT HOW TO LINK THE HOUGH LINES AROUND THE RECEIPT\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "    # morphedl = 255-cv2.morphologyEx(255-threshl, cv2.MORPH_OPEN, kernel, iterations=3)\n",
+    "    morphedl = paddedl\n",
+    "    morphedl = cv2.morphologyEx(morphedl, cv2.MORPH_ERODE, kernel, iterations=1)\n",
+    "    morphedl = cv2.morphologyEx(morphedl, cv2.MORPH_ERODE, kernelell, iterations=1)\n",
+    "\n",
+    "    # return morphedl\n",
+    "    \n",
+    "    contours, heirarchy = cv2.findContours(morphedl, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "    # print(contours[0].shape)\n",
+    "    print(contours[0])\n",
+    "    biggestcontour = max(contours, key=cv2.contourArea)\n",
+    "    return canny\n",
+    "    \n",
+    "    \n",
+    "    blank = np.full(labl.shape, 255, dtype=np.uint8)\n",
+    "    mask1 = blank.copy()\n",
+    "    mask1 = mf.padWithColour(mask1, sdim*2, sdim*2, fill=255)\n",
+    "    mask1 = cv2.drawContours(mask1, [biggestcontour], -1, 0, thickness=cv2.FILLED)\n",
+    "    \n",
+    "    \n",
+    "    mask1 = mask1[(sdim*2):-(sdim*2), (sdim*2):-(sdim*2)]\n",
+    "    \n",
+    "    \n",
+    "    # resizemask = cv2.resize(mask1, (ogshape[1], ogshape[0]))\n",
+    "    # return resizemask\n",
+    "    maskc = cv2.cvtColor(mask1, cv2.COLOR_GRAY2BGR)\n",
+    "    # print(maskc.shape)\n",
+    "    # print(image.shape)\n",
+    "    whitedbackground = cv2.bitwise_or(mainimage, maskc)\n",
+    "    # return whitedbackground\n",
+    "    \n",
+    "    \n",
+    "    lab2 = cv2.cvtColor(whitedbackground, cv2.COLOR_BGR2LAB)\n",
+    "    \n",
+    "    lab2l = lab2[:,:,0]\n",
+    "    \n",
+    "    \n",
+    "    otsu2 = cv2.threshold(lab2l, 0, 255, cv2.THRESH_OTSU)[1]\n",
+    "    \n",
+    "    expandedmask1 = cv2.morphologyEx(mask1, cv2.MORPH_DILATE, kernel, iterations=1)\n",
+    "    expandedmask1 = cv2.morphologyEx(expandedmask1, cv2.MORPH_DILATE, kernelell, iterations=1)\n",
+    "    # return expandedmask1\n",
+    "    \n",
+    "    maskmerge = cv2.bitwise_and(otsu2, 255-expandedmask1)\n",
+    "    return mask1\n",
+    "    return maskmerge\n",
+    "    \n",
+    "    # return otsu2\n",
+    "    \n",
+    "    mpad = mf.padWithColour(maskmerge, sdim*2, sdim*2, fill=0)\n",
+    "    return mpad\n",
+    "    \n",
+    "    #MORPHOLOGIES   \n",
+    "    morphed2 = cv2.morphologyEx(mpad, cv2.MORPH_ERODE, kernel, iterations=1)\n",
+    "    morphed2 = cv2.morphologyEx(morphed2, cv2.MORPH_ERODE, kernelell, iterations=1)\n",
+    "    return morphed2\n",
+    "    \n",
+    "    contours, heirarchy = cv2.findContours(morphed2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "    biggestcontour = max(contours, key=cv2.contourArea)\n",
+    "    \n",
+    "    \n",
+    "    mask2 = blank.copy()\n",
+    "    mask2 = mf.padWithColour(mask2, sdim*2, sdim*2, fill=255)\n",
+    "    mask2 = cv2.drawContours(mask2, [biggestcontour], -1, 0, thickness=cv2.FILLED)\n",
+    "    \n",
+    "    \n",
+    "    mask2 = mask2[(sdim*2):-(sdim*2), (sdim*2):-(sdim*2)]\n",
+    "    \n",
+    "    return mask2\n",
+    "    \n",
+    "    test = cv2.inpaint(whitedbackground, resizemask, 3, cv2.INPAINT_TELEA)\n",
+    "    \n",
+    "    return test\n",
+    "    \n",
+    "    contours, heirarchy = cv2.findContours(255-labl, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "    \n",
+    "    imgout = cv2.drawContours(mainimage, contours, -1, (0,255,0), thickness=3)\n",
+    "    return imgout\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def textleaver(image):\n",
+    "    ogshape = image.shape\n",
+    "    shrunkdim=1000\n",
+    "    if (image.shape[1] > image.shape[0]):\n",
+    "        shrunkimg, scaler = mf.ResizeWithAspectRatio(image, width=shrunkdim, retscale=True)\n",
+    "    else:\n",
+    "        shrunkimg, scaler = mf.ResizeWithAspectRatio(image, height=shrunkdim, retscale=True)\n",
+    "        \n",
+    "    mainimage = shrunkimg\n",
+    "    \n",
+    "    sdim = int(min(mainimage.shape[0], mainimage.shape[1])/5)\n",
+    "    srkernel = cv2.getStructuringElement(cv2.MORPH_RECT, (sdim, sdim))\n",
+    "    skernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (sdim, sdim))\n",
+    "    \n",
+    "    oglab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)\n",
+    "    lab = cv2.cvtColor(mainimage, cv2.COLOR_BGR2LAB)\n",
+    "    \n",
+    "    imglist = []\n",
+    "    # imglist.append(mainimage)\n",
+    "    \n",
+    "    labl = lab[:,:,0]\n",
+    "    oglabl = oglab[:,:,0]\n",
+    "    # # imglist.append(labl)\n",
+    "    # # imglist.append(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))\n",
+    "    # laba = lab[:,:,1]\n",
+    "    # # imglist.append(laba)\n",
+    "    # labb = lab[:,:,2]\n",
+    "    # # imglist.append(labb)\n",
+    "    \n",
+    "    divisor = 1.5\n",
+    "    window = int(min(labl.shape)/divisor)\n",
+    "    window = window if window%2 == 1 else window + 1\n",
+    "    # canny = cv2.Canny(labl, 0, 500)\n",
+    "    ethreshl = cv2.threshold(labl, 0, 255, cv2.THRESH_OTSU)[1]\n",
+    "    threshl = cv2.threshold(labl, 0, 255, cv2.THRESH_OTSU)[1]\n",
+    "    # threshl = cv2.adaptiveThreshold(labl, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 35)\n",
+    "    \n",
+    "    \n",
+    "    ogwindow = int(min(oglabl.shape)/divisor)\n",
+    "    ogwindow = window if window%2 == 1 else window + 1\n",
+    "    print(ogwindow)\n",
+    "    ogthreshl = cv2.threshold(oglabl, 0, 255, cv2.THRESH_TRIANGLE)[1]\n",
+    "    return ogthreshl\n",
+    "    # ogthreshl = cv2.adaptiveThreshold(oglabl, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, ogwindow, 35)\n",
+    "    # return threshl\n",
+    "    \n",
+    "    colourthresh = cv2.cvtColor(threshl, cv2.COLOR_GRAY2BGR)\n",
+    "    \n",
+    "    dim = int(min(mainimage.shape[0], mainimage.shape[1])/100)\n",
+    "    # dim = 2\n",
+    "    # dim = dotsize\n",
+    "    dim = max(3,dim)\n",
+    "    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))\n",
+    "    kernelell = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dim, dim))\n",
+    "    \n",
+    "    # paddedl = mf.padWithColour(threshl, sdim*2, sdim*2, fill=0)\n",
+    "    paddedl = threshl\n",
+    "    # return paddedl\n",
+    "    \n",
+    "    \n",
+    "    # morphedl = 255-cv2.morphologyEx(255-threshl, cv2.MORPH_OPEN, kernel, iterations=3)\n",
+    "    morphedl = paddedl\n",
+    "    morphed1l = cv2.morphologyEx(morphedl, cv2.MORPH_ERODE, kernel, iterations=1)\n",
+    "    # morphed1l = cv2.morphologyEx(morphed1l, cv2.MORPH_OPEN, kernel, iterations=1)\n",
+    "    # morphed1l = cv2.morphologyEx(morphed1l, cv2.MORPH_OPEN, kernel, iterations=1)\n",
+    "    # morphed1l = cv2.morphologyEx(morphedl, cv2.MORPH_ERODE, kernelell, iterations=2)\n",
+    "    \n",
+    "    emorphed1l = cv2.morphologyEx(ethreshl, cv2.MORPH_ERODE, kernel, iterations=1)\n",
+    "\n",
+    "    # return morphedl\n",
+    "    \n",
+    "    contours, heirarchy = cv2.findContours(morphed1l, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "    biggestcontour = max(contours, key=cv2.contourArea)\n",
+    "    \n",
+    "    # temp = cv2.drawContours(colourthresh, [biggestcontour], -1, (0,255,0), thickness=1)\n",
+    "    # return temp\n",
+    "    \n",
+    "    \n",
+    "    blank = np.full(labl.shape, 255, dtype=np.uint8)\n",
+    "    mask1 = blank.copy()\n",
+    "    # mask1 = mf.padWithColour(mask1, sdim*2, sdim*2, fill=255)\n",
+    "    mask1 = cv2.drawContours(mask1, [biggestcontour], -1, 0, thickness=cv2.FILLED)\n",
+    "    ## need to change the erosion so that if the paper goes to the edge, it doesn't get eroded in (because that means the paper is right to the edge and writing may be close)\n",
+    "    \n",
+    "    contours, heirarchy = cv2.findContours(morphed1l, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "    biggestcontour = max(contours, key=cv2.contourArea)\n",
+    "    \n",
+    "    emask1 = blank.copy()\n",
+    "    emask1 = cv2.drawContours(emask1, [biggestcontour], -1, 0, thickness=cv2.FILLED)\n",
+    "    \n",
+    "    mask1 = 255-cv2.morphologyEx(255-mask1, cv2.MORPH_ERODE, kernel, iterations=2)\n",
+    "    \n",
+    "    emask1 = 255-cv2.morphologyEx(255-emask1, cv2.MORPH_ERODE, kernel, iterations=2)\n",
+    "    \n",
+    "    \n",
+    "    # mask1 = mask1[(sdim*2):-(sdim*2), (sdim*2):-(sdim*2)]\n",
+    "    # return mask1\n",
+    "    \n",
+    "    # morphed2l = mf.padWithColour(morphedl, sdim*2, sdim*2, fill=255)\n",
+    "    morphed2l = cv2.morphologyEx(morphedl, cv2.MORPH_OPEN, kernel, iterations=1)\n",
+    "    morphed2l = cv2.morphologyEx(morphedl, cv2.MORPH_ERODE, kernel, iterations=1)\n",
+    "    # morphed2l = morphed2l[(sdim*2):-(sdim*2), (sdim*2):-(sdim*2)]\n",
+    "    \n",
+    "    # return morphed2l\n",
+    "    # print(mask1.shape)\n",
+    "    # print(morphed2l.shape)\n",
+    "    morphed2l = cv2.bitwise_or(morphed2l, 255-mask1)\n",
+    "    # return morphed2l\n",
+    "\n",
+    "    # paddedthreshl = mf.padWithColour(morphed2l, sdim*2, sdim*2, fill=255)\n",
+    "    # temp = cv2.drawContours(colourthresh, [biggestcontour], -1, (0,255,0), thickness=1)\n",
+    "    # return temp\n",
+    "\n",
+    "\n",
+    "    morphed2l = cv2.morphologyEx(morphed2l, cv2.MORPH_ERODE, kernel, iterations=1)\n",
+    "    morphed2l = cv2.morphologyEx(morphed2l, cv2.MORPH_ERODE, kernelell, iterations=1)\n",
+    "    # return morphed2l\n",
+    "    # morphed2l = cv2.bitwise_or(morphed2l, 255-emask1)\n",
+    "    \n",
+    "    # morphed2l = morphed2l[(sdim*2):-(sdim*2), (sdim*2):-(sdim*2)]\n",
+    "    \n",
+    "    resizedmask = cv2.resize(255-morphed2l, (ogshape[1], ogshape[0]))\n",
+    "    temp_final = cv2.bitwise_or(ogthreshl, resizedmask)\n",
+    "    \n",
+    "    dim=3\n",
+    "    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dim, dim))\n",
+    "    temp_final = cv2.morphologyEx(temp_final, cv2.MORPH_OPEN, kernel)\n",
+    "    temp_final = cv2.morphologyEx(temp_final, cv2.MORPH_OPEN, kernel)\n",
+    "    # temp_final = cv2.morphologyEx(temp_final, cv2.MORPH_CLOSE, kernel)\n",
+    "    # temp_final = cv2.morphologyEx(temp_final, cv2.MORPH_OPEN, kernel)\n",
+    "    return temp_final"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cropclarifying(image):\n",
+    "    # whitedbackground = whiteoutbackground(image)\n",
+    "    # return whitedbackground\n",
+    "\n",
+    "    # textrefined = mf.textClarifying(whitedbackground)\n",
+    "    textrefined = textleaver(image)\n",
+    "    return textrefined\n",
+    "    #maybe now is when I put in the line removing function\n",
+    "\n",
+    "    lineout = mf.removeLinesFromText(textrefined)\n",
+    "\n",
+    "    return lineout\n",
+    "    # implement a function that's called refine text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def houghlineprocessing(image):\n",
+    "    croppedanddeskewed, angle = mf.houghlinedeskewandcrop(image)\n",
+    "    # return croppedanddeskewed\n",
+    "    \n",
+    "    \n",
+    "    # postprocessed = cropclarifying(croppedanddeskewed)\n",
+    "    postprocessed = croppedanddeskewed\n",
+    "    # return postprocessed\n",
+    "    # postprocessed = mf.croptoblack(postprocessed)\n",
+    "    \n",
+    "    # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)\n",
+    "    # return postprocessed\n",
+    "    \n",
+    "    # final = mf.externaldeskew(postprocessed, fill=(255,255,255))\n",
+    "    # rotangle = mf.receipttextdeskew(postprocessed, fill=(255,255,255), returnangle=True)\n",
+    "    final = postprocessed\n",
+    "    \n",
+    "    \n",
+    "    # final = mf.croptoblack(final)\n",
+    "    \n",
+    "    # cv2.imshow(\"postprocessed\", mf.ResizeWithAspectRatio(postprocessed, 1000))\n",
+    "    # cv2.imshow(\"final\", mf.ResizeWithAspectRatio(final, 1000))\n",
+    "    # cv2.waitKey(0)\n",
+    "    # cv2.destroyAllWindows()\n",
+    "    \n",
+    "    return final"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print(img.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# prepped, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
+    "outs = houghlineprocessing(img)\n",
+    "# outs = prepimageforhoughline(img, returnrect=True)\n",
+    "# print(img.shape)\n",
+    "# outs = houghlinedeskewandcrop(img)\n",
+    "# outs = outs[0]\n",
+    "# print(croprect)\n",
+    "#need to fix premorphCrop. it removes too much"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# shrunk, scaler, hp, vp = mf.squareandthenresize(img, fill=255, width=1000, returnscalerinfo=True)\n",
+    "# shrunk1, croprect = mf.premorphCrop(shrunk)\n",
+    "# print(croprect)\n",
+    "# print(int(30*4.032 - 0))\n",
+    "# # temp = img[100:, :, :]\n",
+    "# temp = shrunk[croprect[1]:croprect[1]+croprect[3], croprect[0]:croprect[0]+croprect[2], :]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# cv2.imshow(\"temp\", mf.ResizeWithAspectRatio(out, height=1000))\n",
+    "# # cv2.imshow(\"shrunk1\", mf.ResizeWithAspectRatio(shrunk1, height=1000))\n",
+    "# cv2.waitKey(0)\n",
+    "# cv2.destroyAllWindows()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "testall = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not testall:\n",
+    "    showimgs(outs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
@ -266,6 +632,69 @@
    "# cv2.waitKey(0)\n",
    "# cv2.destroyAllWindows()"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.9740282517223996\n",
+      "-2.0053522829578814\n",
+      "-0.9740282517223996\n",
+      "0.0\n",
+      "0.9740282517223996\n",
+      "-0.9740282517223996\n",
+      "-0.011669615052326776\n",
+      "2.0053522829578814\n",
+      "0.0\n",
+      "0.0\n",
+      "0.0\n",
+      "-2.979380534680281\n",
+      "0.0\n",
+      "0.0\n",
+      "-2.0053522829578814\n",
+      "-11.000789666511807\n",
+      "average time: 0.19967518746852875(s)\n"
+     ]
+    }
+   ],
+   "source": [
+    "if testall:\n",
+    "    results = testondataset(\"/mnt/dataset/baseimages/\", houghlineprocessing)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# if testall:\n",
+    "#     showimgs(results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print(results[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if testall:\n",
+    "    writeimgs(\"/mnt/code/autocropper/result_images/\", results)"
+   ]
  }
 ],
 "metadata": {
--- a/code/autocropper/myfunctions.py
+++ b/code/autocropper/myfunctions.py
@ -92,6 +92,7 @@ def mergecontours(contours):
    return finalcontour


+
 # funtion to correct the median-angle to give it to the cv2.warpaffine() function
 # specifically, when getting the angle from a minAreaRect rectangle
 def anglecorrector(angle):
@ -341,10 +342,14 @@ def rotateLine(img, line, angle, returnint=True):
    pt2 = rotatePoint(img, (line[2],line[3]), angle, returnint)
    return (pt1[0], pt1[1], pt2[0], pt2[1])

-def prepimageforhoughline(image):
+def prepimageforhoughline(image, returnrect=True):
    prepped, scaler, hp, vp = squareandthenresize(image, fill=255, width=1000, returnscalerinfo=True)
+    ogpreppedshape = prepped.shape
    prepped, croprect = premorphCrop(prepped)
-    prepped = squareandthenresize(prepped, fill=255, width=1000)
+    if (prepped.shape[1] > prepped.shape[0]):
+        prepped, preppedscaler = ResizeWithAspectRatio(prepped, width=1000, retscale=True)
+    else:
+        prepped, preppedscaler = ResizeWithAspectRatio(prepped, height=1000, retscale=True)
    finalcroprect = (int(croprect[0]*scaler - hp), int(croprect[1]*scaler - vp), int(croprect[2]*scaler), int(croprect[3]*scaler))
    gray1 = cv2.cvtColor(prepped, cv2.COLOR_BGR2GRAY)

@ -361,8 +366,17 @@ def prepimageforhoughline(image):
    dst1 = cv2.Canny(dst1, 0, 500, None, 3)
    # return dst1
    accompaniedimage = image[finalcroprect[1]:finalcroprect[1]+finalcroprect[3], finalcroprect[0]:finalcroprect[0]+finalcroprect[2], :]
-    accompaniedimage = squarepad(accompaniedimage, fill=255)
-    return dst1, accompaniedimage
+    if returnrect:
+        borderType = cv2.BORDER_CONSTANT
+        preppadding = [croprect[0], croprect[1], ogpreppedshape[1]-(croprect[0]+croprect[2]), ogpreppedshape[0]-(croprect[1]+croprect[3])]
+        preppadding = [int(s/preppedscaler) for s in preppadding]
+        paddedprepped = cv2.copyMakeBorder(dst1, preppadding[1], preppadding[3], preppadding[0], preppadding[2], borderType, 0)
+        
+        squaredimage = squarepad(image, fill=0)
+        
+        return dst1, accompaniedimage, paddedprepped, squaredimage, finalcroprect
+    else:
+        return dst1, accompaniedimage

 def houghlinedeskewangle(image):
    lines = cv2.HoughLines(image, 1, np.pi/180, int(max(image.shape[0], image.shape[1])/6), None, 0, 0)
@ -391,43 +405,73 @@ def houghlinedeskewangle(image):
    rotationangle = np.rad2deg(mode)
    return rotationangle

-def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle):
-    rotateddst1 = rotate(preppedimage, rotationangle)
-    rotatedbaseimage = rotate(baseimage, rotationangle)
-    sizemultiplier = rotatedbaseimage.shape[0]/rotateddst1.shape[0]
-    # print(sizemultiplier)
-
-
-    linesP = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 30, None, 90, 30)
-
-
-    rotatedlines = [rotateLine(rotateddst1, line[0], rotationangle) for line in linesP]
-    rotatedlines = np.reshape(rotatedlines, (len(rotatedlines),1,4))
+def determineextrapadding(h,w, angle):
+    radangle = abs(np.deg2rad(angle))
+    # print(type(h), type(w), type(angle))
+    # print(h, w, angle)
+    # print(radangle)
+    totalheightrot = w*np.sin(radangle) + h*np.cos(radangle)
+    # print(h, totalheightrot)
+    totalwidthrot = h*np.sin(radangle) + w*np.cos(radangle)
+    # print(w, totalwidthrot)
+    vpad = int(max(0,math.ceil((totalheightrot - h)/2)))
+    hpad = int(max(0,math.ceil((totalwidthrot-w)/2)))
+    # print(vpad, hpad)
+    return hpad, vpad

+def rotatewithexactpadding(img, angle, fill=(0,0,0)):
+    h, w = img.shape[0], img.shape[1]
+    hpad, vpad = determineextrapadding(h=h,w=w, angle=angle)
+    # fill1 = fill
+    # print(fill)
+    baseimage = padWithColour(img, hpad, vpad, fill=fill)
+    # return baseimage
+    rotatedimg = rotate(baseimage, angle,fill=fill)
+    return rotatedimg

+def houghlinepcrop(baseimage, preppedimage, scalingmultiplier):
+    rotatedlines = cv2.HoughLinesP(preppedimage, 1, np.pi / 180, 30, None, 90, 30)
+    
    vmarginlines = WithinXDegrees(rotatedlines, 7)
    hmarginlines = WithinXDegrees(rotatedlines, 7, baseangle=90)
-    vrect = lineBoundingRect(vmarginlines,asRect=False, returnint=True)
-    hmarginlines = lineswithinrange(hmarginlines, (vrect[0], vrect[1]), (vrect[2],vrect[3]), x=True, y=False)
-
-    if (hmarginlines != []):
-        marginlines = np.append(vmarginlines, hmarginlines, axis=0)
-    else:
-        marginlines = vmarginlines
-
-    # print(marginlines)
-    rect = lineBoundingRect(marginlines,asRect=False, returnint=True)
-    # print(rect)
-    scaledrect = (int(rect[0]*sizemultiplier), int(rect[1]*sizemultiplier), int(rect[2]*sizemultiplier), int(rect[3]*sizemultiplier))
-
-    croppedbaseimage = rotatedbaseimage[scaledrect[1]:scaledrect[3], scaledrect[0]:scaledrect[2], :]
+    # vrect = lineBoundingRect(vmarginlines,asRect=False, returnint=True)
+    # hmarginlines = lineswithinrange(hmarginlines, (vrect[0], vrect[1]), (vrect[2],vrect[3]), x=True, y=False)
+    marginlines = np.append(vmarginlines, hmarginlines, axis=0)
    
+    # colourdst = cv2.cvtColor(preppedimage, cv2.COLOR_GRAY2BGR)
+    # if marginlines is not None:
+    #     for l in marginlines:
+    #         cv2.line(colourdst, (int(l[0]), int(l[1])), (int(l[2]), int(l[3])), (0,0,255), 3, cv2.LINE_AA)
+    # return colourdst
+    
+    rect = lineBoundingRect(marginlines,asRect=False, returnint=True)
+    scaledrect = (int(rect[0]*scalingmultiplier), int(rect[1]*scalingmultiplier), int(rect[2]*scalingmultiplier), int(rect[3]*scalingmultiplier))
+    croppedbaseimage = baseimage[scaledrect[1]:scaledrect[3], scaledrect[0]:scaledrect[2], :]
+    return croppedbaseimage

-    # print(croppedbaseimage.shape)
-    shrunkencbi, sizemultiplier = ResizeWithAspectRatio(croppedbaseimage, width=1000, retscale=True)
+def contourcrop(baseimage):
+    shrunkencbi, sizemultiplier = ResizeWithAspectRatio(baseimage, width=1000, retscale=True)
    gray = cv2.cvtColor(shrunkencbi, cv2.COLOR_BGR2GRAY)
-    thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
+    # thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
+    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_TRIANGLE)[1]
+    # window = gray.shape[1]//7
+    # if window % 2 == 0:
+    #     window += 1
+    # thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 10)
+
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
+    # thresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel, iterations=2)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
+    # return thresh
+    
    contours, heirarchy = cv2.findContours(thresh,cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    
+    # temp = cv2.drawContours(shrunkencbi, contours, -1, (0,255,0), thickness=3)
+    # biggestcontour = max(contours, key=cv2.contourArea)
+    # temp = cv2.drawContours(shrunkencbi, [biggestcontour], -1, (0,255,0), thickness=3)
+    
+    # return temp
+    
    mx = (0,0,0,0)
    mx_area = 0

@ -440,25 +484,52 @@ def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle):

    
    scaledmx = (int(mx[0]*sizemultiplier), int(mx[1]*sizemultiplier), int(mx[2]*sizemultiplier), int(mx[3]*sizemultiplier))
-    finalbaseimage = croppedbaseimage[scaledmx[1]:scaledmx[1]+scaledmx[3], scaledmx[0]:scaledmx[0]+scaledmx[2], :]
+    finalbaseimage = baseimage[scaledmx[1]:scaledmx[1]+scaledmx[3], scaledmx[0]:scaledmx[0]+scaledmx[2], :]
+    return finalbaseimage

+def houghlinedeskewthencrop(baseimage, preppedimage, rotationangle, croprect):
+    rotatedbaseimage = rotatewithexactpadding(baseimage, rotationangle, fill=(0,0,0))
+    rotateddst1 = rotatewithexactpadding(preppedimage, rotationangle, fill=(0,0,0))
+    sizemultiplier = rotatedbaseimage.shape[0]/rotateddst1.shape[0]
+    
+
+    croppedbaseimage = houghlinepcrop(rotatedbaseimage, rotateddst1, sizemultiplier)
+
+    finalbaseimage = contourcrop(croppedbaseimage)
+    
    return finalbaseimage, rotationangle

 def houghlinedeskewandcrop(image):
-    canny, croppedogimage = prepimageforhoughline(image)  ## scaling and cropping occurs. need to also return the changes done
-    # return canny, croppedogimage
+    croppedcanny, croppedimage, canny, ogimage, rect = prepimageforhoughline(image, returnrect=True)  ## scaling and cropping occurs. need to also return the changes done
+    # return canny, ogimage
    # print(canny.shape)
    # print(croppedogimage.shape)

    ## -----------------finding angle to deskew-----------------
-    rotationangle = houghlinedeskewangle(canny)
+    rotationangle = houghlinedeskewangle(croppedcanny)
+    # print(croppedcanny.shape)
+    # print(abs(rotationangle))
+    if (croppedcanny.shape[0] > croppedcanny.shape[1]):
+        if (rotationangle > 45):
+            rotationangle -= 90
+        elif rotationangle < -45:
+            rotationangle += 90
    # print(rotationangle)
+    # elif (croppedcanny.shape[1] > croppedcanny.shape[0]):
+        # if (rotationangle > 45):
+        #     rotationangle -= 90
+        # elif rotationangle < -45:
+        #     rotationangle += 90
+    # print(rotationangle)
+
+    
+    # rotatorrect = findcroprectforangle(rect, angle)

    # -----------------end of finding angle to deskew-----------------

    ## -----------------deskewing and then cropping-----------------
-
-    return houghlinedeskewthencrop(croppedogimage, canny, rotationangle)
+    outimg, angle = houghlinedeskewthencrop(ogimage, canny, rotationangle, rect)
+    return outimg, angle

 def bruteforceprocessrects(greaterrects, lesserrects):
    # squaredgrects = np.array([mf.xywhrectto2prect(rect) for rect in greaterrects])
@ -504,35 +575,49 @@ def whiteoutbackground(image):
    # blur = cv2.blur(gray, (7,7))
    
    # window = 51
-    window = min(gray.shape[1], gray.shape[0])//20
+    window = gray.shape[1]//8
    if window % 2 == 0:
        window += 1
-    thresh1 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, window, 5)
-    thresh2 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]
-    thresh = cv2.bitwise_and(thresh1, thresh2)
+    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, window, 2)
+    # thresh2 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]
+    # thresh = cv2.bitwise_and(thresh1, thresh2)
    # return thresh

    # dim = int(min(thresh.shape[0], thresh.shape[1])/400)
-    dim = 3
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
-    morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)
+    # dim = 3
+    # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
+    # morphedthresh = cv2.morphologyEx(thresh, cv2.MORPH_ERODE, kernel)
    # return morphedthresh
    
    
    
-    # contours1, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    contours2, heirarchy = cv2.findContours(morphedthresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours1, heirarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    # contours2, heirarchy = cv2.findContours(morphedthresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    
+
+
+    biggestcontour1 = max(contours1, key=cv2.contourArea)
+    # biggestcontour2 = max(contours2, key=cv2.contourArea)
+    
+    epsilon = 0.0005*cv2.arcLength(biggestcontour1,True)
+    approx = cv2.approxPolyDP(biggestcontour1,epsilon,True)
+    # approx = cv2.convexHull(approx)
+    epsilon = 0.001*cv2.arcLength(approx,True)
+    approx = cv2.approxPolyDP(approx,epsilon,True)
+    # approx = cv2.convexHull(biggestcontour1)
+    # print(approx)
    
-    # biggestcontour1 = max(contours1, key=cv2.contourArea)
-    biggestcontour2 = max(contours2, key=cv2.contourArea)
    # imagecpy = cv2.drawContours(imagecpy, [biggestcontour1], -1, (0,255,0), thickness=3)
    # imagecpy = cv2.drawContours(imagecpy, [biggestcontour2], -1, (0,0,255), thickness=3)
+    
+    # imagecpy = cv2.drawContours(imagecpy, [approx], -1, (0,255,0), thickness=3)
    # return imagecpy
    
    blank = np.full(thresh.shape, 255, dtype=np.uint8)
    mask = blank.copy()
-    # mask = cv2.drawContours(mask, [biggestcontour1], -1, (0,0,0), thickness=cv2.FILLED)
-    mask = cv2.drawContours(mask, [biggestcontour2], -1, (0,0,0), thickness=cv2.FILLED)
+    mask = cv2.drawContours(mask, [biggestcontour1], -1, (0,0,0), thickness=cv2.FILLED)
+    # mask = cv2.drawContours(mask, [approx], -1, (0,0,0), thickness=cv2.FILLED)
+    # mask = cv2.drawContours(mask, [biggestcontour2], -1, (0,0,0), thickness=cv2.FILLED)

    # return mask

@ -547,34 +632,21 @@ def whiteoutbackground(image):
    mask = 255 - cv2.morphologyEx(invertmask, cv2.MORPH_ERODE, kernel, iterations=1)
    # return mask
    
-    mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
-    whitedbackground = cv2.bitwise_or(image, mask)
+    mask1 = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
+    whitedbackground = cv2.bitwise_or(image, mask1)
    # return whitedbackground
    
-    gray2 = cv2.cvtColor(whitedbackground, cv2.COLOR_BGR2GRAY)
-
-    canny = cv2.Canny(gray2, 0, 500, None, 3)
+    mask2 = blank.copy()
+    mask2 = 255-cv2.drawContours(mask2, [approx], -1, (0,0,0), thickness=cv2.FILLED)
    
-    maskgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
-    
-    
-
-    invert = 255-maskgray
-    # dim = 21
-    # print(dim)
-    dim = int(min(maskgray.shape[0], maskgray.shape[1])/200)
+    dim = int(min(mask2.shape[0], mask2.shape[1])/50)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
-    morphedmask = cv2.morphologyEx(invert, cv2.MORPH_ERODE, kernel)
-    dim = int(min(maskgray.shape[0], maskgray.shape[1])/50)
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (dim, dim))
-    morphedmask = cv2.morphologyEx(morphedmask, cv2.MORPH_OPEN, kernel, iterations=7)
-    # return 255 - morphedmask
-    morphedmask = 255 - morphedmask
-
-    finalmask = cv2.bitwise_or(morphedmask, maskgray)
-    # return finalmask
+    morphedmask = 255-cv2.morphologyEx(mask2, cv2.MORPH_OPEN, kernel, iterations=3)
+    # return morphedmask
+    
+    finalmask = cv2.bitwise_or(mask, morphedmask)
+    
    
-    # edgecontours, _ = cv2.findContours(255-morphedmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    finalmaskbgr = cv2.cvtColor(finalmask, cv2.COLOR_GRAY2BGR)
    # return finalmaskbgr

@ -668,7 +740,7 @@ def cropclarifying(image):
    return lineout
    # implement a function that's called refine text
    
-def croptoblack(image, extraborder=10):
+def croptoblack(image, extraborder=10, returnrect=False):
    invertedimage = cv2.bitwise_not(image)
    blackpixels = cv2.findNonZero(invertedimage)
    mins = np.min(blackpixels, axis=0)
@ -678,6 +750,8 @@ def croptoblack(image, extraborder=10):
    maxx = min(maxs[0][0]+extraborder, image.shape[1])
    maxy = min(maxs[0][1]+extraborder, image.shape[0])
    # print(blackpixels)
+    if (returnrect):
+        return [minx,miny,maxx-minx,maxy-miny]
    return image[miny:maxy, minx:maxx]

 def reduceColours(x, centering=127):
@ -702,7 +776,7 @@ def textClarifying(image):
    autothreshold = np.clip(np.mean(gray)/1.2, 0, 255)
    
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
-    hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
+    # hls = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
    
    kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
@ -1056,26 +1130,41 @@ def getreceipttextAngle(cvImage) -> float:
    # print(angle)
    return angle

-def receipttextdeskew(img, fill=(0,0,0)):
+def receipttextdeskew(img, fill=(0,0,0), returnangle=False):
    colourimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    angle = getreceipttextAngle(colourimg)
-    padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)
-    rotated = rotate(padimg, angle, fill=fill)
+    if returnangle:
+        return angle
+    # padimg = padWithColour(img, hpadding=50, vpadding=50, fill=fill)
+    # print(img.shape)
+    # grayfill = int((fill[0]*0.299) + (fill[1]*0.587) + (fill[2]*0.114))
+    rotated = rotatewithexactpadding(colourimg, angle, fill=fill)
+    grayrotated = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
+    # print(grayrotated)
+    croprect = croptoblack(grayrotated, returnrect=True)
+    # rotated = cv2.cvtColor(rotated, cv2.COLOR_GRAY2BGR)
+    rotated = rotated[croprect[1]:croprect[1]+croprect[3], croprect[0]:croprect[0]+croprect[2], :]
+    rotated = padWithColour(rotated, hpadding=50, vpadding=50, fill=fill)
    return rotated

 ## ------------------------------Full deskewing and cropping------------------------------
 def houghlineprocessing(image):
-    croppedanddeskewed, _ = houghlinedeskewandcrop(image)
+    croppedanddeskewed, angle = mf.houghlinedeskewandcrop(image)
+    # return croppedanddeskewed
    
-    postprocessed = cropclarifying(croppedanddeskewed)
+    
+    # postprocessed = cropclarifying(croppedanddeskewed)
+    postprocessed = croppedanddeskewed
    # return postprocessed
-    postprocessed = croptoblack(postprocessed)
+    # postprocessed = mf.croptoblack(postprocessed)
    
    # postprocessed = cv2.cvtColor(postprocessed, cv2.COLOR_GRAY2BGR)
    # return postprocessed
    
    # final = mf.externaldeskew(postprocessed, fill=(255,255,255))
-    final = receipttextdeskew(postprocessed, fill=(255,255,255))
+    # rotangle = mf.receipttextdeskew(postprocessed, fill=(255,255,255), returnangle=True)
+    final = postprocessed
+    
    
    # final = mf.croptoblack(final)