receipt_indexer/code/autocropper/notebooks/oldnotebooks/testcropper.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ORIGINAL DOCUMENT FOR MORPHOLOGY CROP can maybe be deleted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "import numpy as np\n",
    "\n",
    "import torch\n",
    "from torch.utils.data import DataLoader\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as fn\n",
    "import torch.optim as optim\n",
    "import torchvision.transforms.functional as tvf\n",
    "import torchvision.transforms.v2 as v2\n",
    "import torchvision.models as models\n",
    "import torchvision.transforms as t\n",
    "\n",
    "import myfunctions as mf\n",
    "\n",
    "from PIL import Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read image as grayscale\n",
    "img = cv2.imread('./test_images/IMG_7640.jpg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):\n",
    "#     dim = None\n",
    "#     (h, w) = image.shape[:2]\n",
    "\n",
    "#     if width is None and height is None:\n",
    "#         return image\n",
    "#     if width is None:\n",
    "#         r = height / float(h)\n",
    "#         dim = (int(w * r), height)\n",
    "#     else:\n",
    "#         r = width / float(w)\n",
    "#         dim = (width, int(h * r))\n",
    "\n",
    "#     return cv2.resize(image, dim, interpolation=inter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert to grayscale\n",
    "gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)\n",
    "\n",
    "# threshold\n",
    "thresh = cv2.threshold(gray, 190, 255, cv2.THRESH_BINARY)[1]\n",
    "\n",
    "# apply morphology\n",
    "kernel = np.ones((7,7), np.uint8)\n",
    "morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)\n",
    "kernel = np.ones((9,9), np.uint8)\n",
    "morph = cv2.morphologyEx(morph, cv2.MORPH_ERODE, kernel)\n",
    "\n",
    "# get largest contour\n",
    "contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)\n",
    "contours = contours[0] if len(contours) == 2 else contours[1]\n",
    "area_thresh = 0\n",
    "for c in contours:\n",
    "    area = cv2.contourArea(c)\n",
    "    if area > area_thresh:\n",
    "        area_thresh = area\n",
    "        big_contour = c\n",
    "\n",
    "\n",
    "# get bounding box\n",
    "x,y,w,h = cv2.boundingRect(big_contour)\n",
    "\n",
    "# draw filled contour on black background\n",
    "mask = np.zeros_like(gray)\n",
    "mask = cv2.merge([mask,mask,mask])\n",
    "cv2.drawContours(mask, [big_contour], -1, (255,255,255), cv2.FILLED)\n",
    "\n",
    "# apply mask to input\n",
    "result1 = img.copy()\n",
    "result1 = cv2.bitwise_and(result1, mask)\n",
    "\n",
    "# crop result\n",
    "result2 = result1[y:y+h, x:x+w]\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# view result\n",
    "# cv2.imshow(\"threshold\", thresh)\n",
    "# cv2.imshow(\"morph\", morph)\n",
    "# cv2.imshow(\"mask\", mask)\n",
    "# cv2.imshow(\"result1\", result1)\n",
    "resizedresult2 = mf.ResizeWithAspectRatio(result2, 1000)\n",
    "cv2.imwrite(\"./testing_space/cropped1.jpg\", resizedresult2)\n",
    "cv2.imshow(\"result2\", resizedresult2)\n",
    "cv2.waitKey(0)\n",
    "cv2.destroyAllWindows()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "class RotationDeterminer(nn.Module):\n",
    "    def __init__(self, new=False):\n",
    "        super(RotationDeterminer,self).__init__()\n",
    "        \n",
    "        torch.cuda.empty_cache()\n",
    "        \n",
    "        self.device = torch.device(\"cpu\")\n",
    "        if torch.cuda.is_available:\n",
    "            self.device = torch.device(\"cuda:0\")\n",
    "            \n",
    "            \n",
    "        self.appliers = [v2.RandomApply(transforms=[v2.RandomPosterize(bits=1)], p=0.25),\n",
    "                v2.RandomApply(transforms=[v2.ElasticTransform(alpha=25.0)],  p=0.25), # maybe add fill=appliedFill\n",
    "                v2.RandomApply(transforms=[v2.GaussianBlur(kernel_size=(5,9), sigma=(0.1,2.))],p=0.25),\n",
    "                v2.RandomApply(transforms=[v2.RandomEqualize()],p=0.25)]\n",
    "        \n",
    "        \n",
    "        # self.conv = nn.Sequential(nn.Conv2d(3, 9, kernel_size=11,stride=3), # 1100 x 1100 => 201 x 201\n",
    "        #                              nn.ReLU(inplace=True),\n",
    "        #                              nn.Conv2d(9, 18, kernel_size=5,stride=1),\n",
    "        #                              nn.ReLU(inplace=True),\n",
    "        #                              nn.MaxPool2d(kernel_size=4, stride=2),\n",
    "        #                              nn.Conv2d(18, 36, kernel_size=3,stride=2),\n",
    "        #                              nn.BatchNorm2d(36),\n",
    "        #                              nn.ReLU(inplace=True),\n",
    "        #                              nn.Conv2d(36, 72, kernel_size=3,stride=2),\n",
    "        #                              nn.ReLU(inplace=True),\n",
    "        #                              nn.AvgPool2d(kernel_size=5, stride=3),\n",
    "        #                              nn.Conv2d(72, 144, kernel_size=3,stride=1),\n",
    "        #                              nn.ReLU(inplace=True),\n",
    "        #                              nn.Conv2d(144, 288, kernel_size=5,stride=1),\n",
    "        #                              nn.ReLU(inplace=True),\n",
    "        #                              nn.MaxPool2d(kernel_size=4, stride=1),\n",
    "        #                              nn.Conv2d(288, 192, kernel_size=3,stride=1),\n",
    "        #                              nn.ReLU(inplace=True),\n",
    "        #                              nn.Conv2d(192, 192, kernel_size=3,stride=1), # => 1\n",
    "        #                              nn.ReLU(inplace=True))\n",
    "        # print(\"hi\")\n",
    "        self.conv = models.resnet18(pretrained=new)\n",
    "        \n",
    "        self.classifier = nn.Sequential(nn.Linear(1000, 4096),\n",
    "                                     nn.ReLU(inplace=True),\n",
    "                                     nn.Linear(4096,1))\n",
    "        \n",
    "        self.lossfunc = nn.MSELoss()\n",
    "        \n",
    "        self.imageprep = v2.Compose([self.SquarePad(),v2.Resize(512),v2.Grayscale(num_output_channels=3),v2.CenterCrop(512),v2.ToImageTensor(), v2.ConvertImageDtype()])\n",
    "        \n",
    "        \n",
    "    class SquarePad:\n",
    "        def __call__(self, image):\n",
    "            # print(\"hi type:\", type(image))\n",
    "            temp = image.size()\n",
    "            w = temp[-2]\n",
    "            h = temp[-1]\n",
    "            max_wh = max([w, h])\n",
    "            hp = int((max_wh - w) / 2)\n",
    "            vp = int((max_wh - h) / 2)\n",
    "            padding = (hp, vp, hp, vp)\n",
    "            return tvf.pad(image, padding, 0, 'edge')\n",
    "\n",
    "\n",
    "    \n",
    "\n",
    "        \n",
    "    def forward(self, image):\n",
    "\n",
    "        transformedimage = self.imageprep(image)\n",
    "        transformedimage = transformedimage.to(self.device)\n",
    "\n",
    "        if (len(transformedimage.shape) != 4 and len(transformedimage.shape) != 3):\n",
    "            raise Exception(\"Sorry, Dimension of image is incorrect (\", len(transformedimage.shape),\"). Expected a 3D (single image) or 4D (batch of images) tensor\")\n",
    "\n",
    "        if (len(transformedimage.shape) == 3):\n",
    "            x = transformedimage.unsqueeze(0)\n",
    "        else:\n",
    "            x = transformedimage\n",
    "        \n",
    "        x = self.conv(x)\n",
    "        # print(x.shape)\n",
    "        # x = nn.Flatten(start_dim=-1)(x)\n",
    "        # print(x.shape)\n",
    "        x = self.classifier(x)\n",
    "        # print(x.shape)\n",
    "        guessRotation = nn.Flatten(start_dim=0)(x)\n",
    "        \n",
    "        return guessRotation\n",
    "        \n",
    "    def loss(self, guess, trueAnswer):\n",
    "        return self.lossfunc(guess, trueAnswer)\n",
    "        \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.\n",
      "  warnings.warn(msg)\n"
     ]
    }
   ],
   "source": [
    "model = RotationDeterminer(new=True)\n",
    "device = torch.device(\"cpu\")\n",
    "if torch.cuda.is_available:\n",
    "    device = torch.device(\"cuda:0\")\n",
    "    model = model.to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([1, 1174, 1000])\n",
      "torch.Size([3, 1174, 1000])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.10/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.1470905989408493\n"
     ]
    }
   ],
   "source": [
    "tensorize = v2.Compose([v2.ToImageTensor(), v2.ConvertImageDtype()])\n",
    "grayscaler = v2.Grayscale(num_output_channels=3)\n",
    "\n",
    "imagetobeprocessed = cv2.cvtColor(resizedresult2,cv2.COLOR_BGR2GRAY)\n",
    "\n",
    "\n",
    "tensorizedimage = torch.unsqueeze(torch.from_numpy(imagetobeprocessed),0)\n",
    "print(tensorizedimage.shape)\n",
    "adjustedtensorizedimage = tensorize(grayscaler(t.ToPILImage()(tensorizedimage)))\n",
    "print(adjustedtensorizedimage.shape)\n",
    "rotation = model(adjustedtensorizedimage).item()\n",
    "print(rotation)\n",
    "rotatedimage = t.Resize(size=1000)(tvf.rotate(adjustedtensorizedimage, rotation))\n",
    "# imS = mf.ResizeWithAspectRatio(filereadimage, 1000)\n",
    "# imS = cv2.resize(filereadimage, (960, 540)) \n",
    "open_cv_image = np.array(t.ToPILImage()(rotatedimage))\n",
    "cv2.imshow(f'image', open_cv_image)\n",
    "key = cv2.waitKey(0)\n",
    "cv2.destroyAllWindows()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # save result\n",
    "# cv2.imwrite(\"paper_thresh.jpg\", thresh)\n",
    "# cv2.imwrite(\"paper_morph.jpg\", morph)\n",
    "# cv2.imwrite(\"paper_mask.jpg\", mask)\n",
    "# cv2.imwrite(\"paper_result1.jpg\", result1)\n",
    "# cv2.imwrite(\"paper_result2.jpg\", result2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}