receipt_indexer/code/autocropper/notebooks/oldnotebooks/manualrotationchecker.ipynb
Ethan Wellenreiter 423b511dd9 Cleanup commit
Moving around the testing notebooks. Autocropping is about done
with exception to any new versions or converting the stuff to C
code.

Signed-off-by: Ethan Wellenreiter <ewellenreiter@gmail.com>
2023-10-18 22:48:24 -04:00

317 lines
9.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# can probably be deleted or put somewhere. Was the original code for the rowsumdeskew"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"src = 255 - cv2.imread('./testing_space/cropped1.jpg',0)\n",
"scores = []\n",
"\n",
"h,w = src.shape\n",
"small_dimention = min(h,w)\n",
"src = src[:small_dimention, :small_dimention]\n",
"\n",
"out = cv2.VideoWriter('./temp/video.avi',\n",
" cv2.VideoWriter_fourcc('M','J','P','G'),\n",
" 15, (320,320))\n",
"\n",
"src = cv2.threshold(src, 100, 255, cv2.THRESH_BINARY)[1]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"def rotate(img, angle):\n",
" rows,cols = img.shape\n",
" M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)\n",
" dst = cv2.warpAffine(img,M,(cols,rows))\n",
" return dst\n",
"\n",
"def sum_rows(img):\n",
" # Create a list to store the row sums\n",
" row_sums = []\n",
" # Iterate through the rows\n",
" for r in range(img.shape[0]-1):\n",
" # Sum the row\n",
" row_sum = sum(sum(img[r:r+1,:]))\n",
" # Add the sum to the list\n",
" row_sums.append(row_sum)\n",
" # Normalize range to (0,255)\n",
" row_sums = (row_sums/max(row_sums)) * 255\n",
" # Return\n",
" return row_sums\n",
"\n",
"def display_data(roi, row_sums, buffer): \n",
" # Create background to draw transform on\n",
" bg = np.zeros((buffer*2, buffer*2), np.uint8) \n",
" # Iterate through the rows and draw on the background\n",
" for row in range(roi.shape[0]-1):\n",
" row_sum = row_sums[row]\n",
" bg[row:row+1, :] = row_sum\n",
" left_side = int(buffer/3)\n",
" bg[:, left_side:] = roi[:,left_side:] \n",
" cv2.imshow('bg1', bg)\n",
" k = cv2.waitKey(1)\n",
" out.write(cv2.cvtColor(cv2.resize(bg, (320,320)), cv2.COLOR_GRAY2BGR))\n",
" return k\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"count = 0\n",
"othercount = 0\n",
"goodangle = 0"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"# cv2.imshow('bg1', src)\n",
"# cv2.waitKey(0)\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"found optimal rotation\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n",
"found optimal rotation\n"
]
}
],
"source": [
"# Rotate the image around in a circle\n",
"angle = 0\n",
"while angle <= 360:\n",
" # Rotate the source image\n",
" img = rotate(src, angle) \n",
" # Crop the center 1/3rd of the image (roi is filled with text)\n",
" h,w = img.shape\n",
" buffer = min(h, w) - int(min(h,w)/1.5)\n",
" roi = img[int(h/2-buffer):int(h/2+buffer), int(w/2-buffer):int(w/2+buffer)]\n",
" # Create background to draw transform on\n",
" bg = np.zeros((buffer*2, buffer*2), np.uint8)\n",
" # Compute the sums of the rows\n",
" row_sums = sum_rows(roi)\n",
" # High score --> Zebra stripes\n",
" score = np.count_nonzero(row_sums)\n",
" scores.append(score)\n",
" othercount = othercount + 1\n",
" # Image has best rotation\n",
" if score <= min(scores):\n",
" count = count + 1\n",
" # Save the rotatied image\n",
" print('found optimal rotation')\n",
" best_rotation = img.copy()\n",
" goodangle = angle\n",
" k = display_data(roi, row_sums, buffer)\n",
" if k == 27: break\n",
" # Increment angle and try again\n",
" angle += .75\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"25\n",
"481\n",
"349.5\n"
]
}
],
"source": [
"print(count)\n",
"print(othercount)\n",
"print(goodangle)\n",
"cv2.imshow('bg1', best_rotation)\n",
"cv2.waitKey(0)\n",
"cv2.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"start\")\n",
"\n",
"# Rotate the image around in a circle\n",
"angle = 0\n",
"while angle <= 360:\n",
" # Rotate the source image\n",
" img = rotate(src, angle) \n",
" # Crop the center 1/3rd of the image (roi is filled with text)\n",
" h,w = img.shape\n",
" buffer = min(h, w) - int(min(h,w)/1.5)\n",
" #roi = img.copy()\n",
" roi = img[int(h/2-buffer):int(h/2+buffer), int(w/2-buffer):int(w/2+buffer)]\n",
" # Create background to draw transform on\n",
" bg = np.zeros((buffer*2, buffer*2), np.uint8)\n",
" # Threshold image\n",
" _, roi = cv2.threshold(roi, 140, 255, cv2.THRESH_BINARY)\n",
" # Compute the sums of the rows\n",
" row_sums = sum_rows(roi)\n",
" # High score --> Zebra stripes\n",
" score = np.count_nonzero(row_sums)\n",
" if sum(row_sums) < 100000: scores.append(angle)\n",
" k = display_data(roi, row_sums, buffer)\n",
" if k == 27: break\n",
" # Increment angle and try again\n",
" angle += .5\n",
" print(\"loop\")\n",
"cv2.destroyAllWindows()\n",
"\n",
"print(\"endofrotate\")\n",
"\n",
"# Create images for display purposes\t\n",
"display = src.copy()\n",
"# Create an image that contains bins. \n",
"bins_image = np.zeros_like(display)\n",
"for angle in scores:\n",
" # Rotate the image and draw a line on it\n",
" display = rotate(display, angle) \n",
" cv2.line(display, (0,int(h/2)), (w,int(h/2)), 255, 1)\n",
" display = rotate(display, -angle)\n",
" # Rotate the bins image\n",
" bins_image = rotate(bins_image, angle)\n",
" # Draw a line on a temporary image\n",
" temp = np.zeros_like(bins_image)\n",
" cv2.line(temp, (0,int(h/2)), (w,int(h/2)), 50, 1)\n",
" # 'Fill' up the bins\n",
" bins_image += temp\n",
" bins_image = rotate(bins_image, -angle)\n",
" \n",
"print(\"endofbins\")\n",
"\n",
"# Find the most filled bin\n",
"for col in range(bins_image.shape[0]-1):\n",
"\tcolumn = bins_image[:, col:col+1]\n",
"\tif np.amax(column) == np.amax(bins_image): x = col\n",
"for col in range(bins_image.shape[0]-1):\n",
"\tcolumn = bins_image[:, col:col+1]\n",
"\tif np.amax(column) == np.amax(bins_image): y = col\n",
"# Draw circles showing the most filled bin\n",
"cv2.circle(display, (x,y), 560, 255, 5)\n",
"\n",
"print(\"plotting\")\n",
"\n",
"# Plot with Matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.image as mpimg\n",
"f, axarr = plt.subplots(1,3, sharex=True)\n",
"axarr[0].imshow(src)\n",
"axarr[1].imshow(display)\n",
"axarr[2].imshow(bins_image)\n",
"axarr[0].set_title('Source Image')\n",
"axarr[1].set_title('Output')\n",
"axarr[2].set_title('Bins Image')\n",
"axarr[0].axis('off')\n",
"axarr[1].axis('off')\n",
"axarr[2].axis('off')\n",
"plt.show()\n",
"\n",
"cv2.waitKey()\n",
"cv2.destroyAllWindows()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}