{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"mount_file_id":"1FNIJvnGw3Cy7v-E4KPSol9T7BPO6sqtG","authorship_tag":"ABX9TyObVzIUPKCdPvZRkEKouvoq"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Feature Extraction"],"metadata":{"id":"ZwcjPKTRf88I"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"TU2H_jK-2tUV"},"outputs":[],"source":["import pandas as pd\n","from glob import glob\n","import matplotlib.pyplot as plt\n","import cv2\n","import numpy as np\n","import os\n","from tqdm.auto import tqdm\n","import shutil\n","\n","# path for current files\n","BASE_PATH = \"/content/drive/MyDrive/AI_DM_ECOLOGY\"\n","DATA_PATH = \"/content/drive/MyDrive/AI_DM_ECOLOGY/shared/data\"\n","\n","# paths for future data\n","DATA_MASK_PATH = \"/content/drive/MyDrive/AI_DM_ECOLOGY/data_mask\"\n","DATA_CROPPED_PATH = \"/content/drive/MyDrive/AI_DM_ECOLOGY/data_cropped\"\n","DATA_CROPPED_WB_PATH = \"/content/drive/MyDrive/AI_DM_ECOLOGY/data_cropped_wb\"\n","FEATURES_PATH = \"/content/drive/MyDrive/AI_DM_ECOLOGY/features\"\n","FEATURES_WB_PATH = \"/content/drive/MyDrive/AI_DM_ECOLOGY/features_wb\"\n","\n","! cp /content/drive/MyDrive/AI_DM_ECOLOGY/shared/code/utils.py ."]},{"cell_type":"code","source":["df = pd.read_csv(DATA_PATH+\"/info.csv\")\n","df"],"metadata":{"id":"1nGSJKJT8zFx"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["First of all, let's create a dataframe with all the paths of the images in data."],"metadata":{"id":"UtD9wnsGgLR3"}},{"cell_type":"code","source":["paths = []\n","for name in df.img_name:\n","    path = f\"{DATA_PATH}/{name}\"\n","    paths.append(path)\n","df[\"path\"] = paths\n","df"],"metadata":{"id":"kNJEMdMS9Nsp"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["We will then focus on just a couple of images.\n","\n","Our objective is to look at them and understand why they belong to different classes."],"metadata":{"id":"i2EbjV3ugXcY"}},{"cell_type":"code","source":["path0 = paths[1465]\n","path1 = paths[1507]\n","path2 = paths[394]\n","\n","image0 = plt.imread(path0)\n","image1 = plt.imread(path1)\n","image2 = plt.imread(path2)\n","\n","plt.imshow(image0)\n","plt.title(\"Healthy leaf\")\n","plt.show()\n","\n","plt.imshow(image1)\n","plt.title(\"Symptomatic leaf\")\n","plt.show()\n","\n","plt.imshow(image2)\n","plt.title(\"Very symptomatic leaf\")\n","plt.show()"],"metadata":{"id":"LU7ik7fE39A-"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["Now, let's calculate the color histogram of the images.\n","\n","Let's write a function calc_histogram which will help us visualizing the histogram of future images too."],"metadata":{"id":"jC2zWhdvhKbe"}},{"cell_type":"code","source":["def calc_histogram(image, plot=True, histogram_only=False):\n","\n","    # ... TODO\n","\n","    if plot:\n","\n","        fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(10,4))\n","        ax0 = axs[0]\n","        ax1 = axs[1]\n","\n","        if not histogram_only:\n","            ax0.imshow(image)\n","        ax0.set_xticks([])\n","        ax0.set_yticks([])\n","\n","        # ... TODO\n","\n","        ax1.set_yscale(\"log\")\n","\n","        plt.show()\n","\n","    return feature_df"],"metadata":{"id":"8ziwjLpscs_v"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["feature_df = calc_histogram(image0)\n","print()\n","feature_df = calc_histogram(image1)\n","print()\n","feature_df = calc_histogram(image2)"],"metadata":{"id":"6tXGJ2rcfd5_"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["But there are other colorspaces too!\n","\n","For example the **Lab** colorspace, which has separate axes for \"blue-yellow\" and \"green-red\"\n","\n","<img src=\"https://drive.google.com/uc?export=view&id=1rIblfGWoFdhEC0tUsA3qNjctdhYrOTX2\">"],"metadata":{"id":"emqfnaRbkD1A"}},{"cell_type":"code","source":["image0_cvt = # ... TODO\n","image1_cvt = # ... TODO\n","image2_cvt = # ... TODO\n","\n","feature_df = calc_histogram(image0_cvt)\n","print()\n","feature_df = calc_histogram(image1_cvt)\n","print()\n","feature_df = calc_histogram(image2_cvt)"],"metadata":{"id":"7xRDQB7MkB6J"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["conversion_fun = {\n","    \"RGB\": lambda x: x,\n","    \"YCC\": lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2YCrCb),\n","    \"Lab\": lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2Lab),\n","    \"YUV\": lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2YUV),\n","}"],"metadata":{"id":"xOFp17AWzyn0"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Cropping images"],"metadata":{"id":"lxr8hi8_z7v7"}},{"cell_type":"markdown","source":["Focus on one"],"metadata":{"id":"-9kx7HpF0LEM"}},{"cell_type":"code","source":["image = image0\n","image = plt.imread(paths[0])\n","\n","image_thresh = # ... TODO\n","print(image_thresh.shape)\n","plt.imshow(image_thresh)\n","plt.show()"],"metadata":{"id":"HTjfGIi48AvA"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["image_thresh = image_thresh.sum(-1)\n","print(image_thresh.shape)\n","plt.imshow(image_thresh, cmap=\"binary\")\n","plt.show()"],"metadata":{"id":"uuDxtn5t8B2g"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["image_thresh = (image_thresh<image_thresh[0,0]).astype(np.uint8)\n","print(image_thresh.shape)\n","plt.imshow(image_thresh, cmap=\"binary\")\n","plt.show()"],"metadata":{"id":"5vWt-SFO4TmZ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["contours, hierarchy = cv2.findContours(image_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n","\n","out_img = image.copy()\n","cv2.drawContours(\n","    image=out_img,\n","    contours=contours,\n","    contourIdx=-1,\n","    color=(255,0,0),\n","    thickness=5,\n",")\n","plt.imshow(out_img)\n","plt.show()"],"metadata":{"id":"zio__KO8454H"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# calculate the bounding box (rectangle) for each contour\n","bboxes = []\n","for contour in contours:\n","    bbox = cv2.boundingRect(contour)\n","    print(\"bbox (x, y, w, h):\", bbox)\n","    bboxes.append(bbox)\n","\n","# plot the contours in red\n","out_img = image.copy()\n","cv2.drawContours(\n","    image=out_img,\n","    contours=contours,\n","    contourIdx=-1,\n","    color=(255,0,0),\n","    thickness=5,\n",")\n","\n","# plot the bboxes as blue rectangles\n","for bbox in bboxes:\n","    x,y,w,h = bbox\n","    cv2.rectangle(\n","        out_img,\n","        (x,y),\n","        (x+w,y+h),\n","        color=(0,0,255),\n","        thickness=5,\n","    )\n","\n","# calculate the center coordinates of the image\n","img_center_x = image.shape[1]//2\n","img_center_y = image.shape[0]//2\n","print(\"img_center:\", img_center_x, img_center_y)\n","\n","# plot the center of the image as a black dot\n","plt.plot(img_center_x, img_center_y, marker=\"o\", color=\"k\")\n","\n","plt.imshow(out_img)\n","plt.show()"],"metadata":{"id":"priaUUkn8vFh"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# check if the bboxes contain the center of the image\n","\n","okay_contours = []\n","\n","for (bbox, contour) in zip(bboxes, contours):\n","    x,y,w,h = bbox\n","    x_min = x\n","    x_max = x+w\n","    y_min = y\n","    y_max = y+h\n","\n","    print(x_min, img_center_x, x_max)\n","    print(y_min, img_center_y, y_max)\n","\n","    if (x_min<=img_center_x<=x_max) and (y_min<=img_center_y<=y_max):\n","            okay_contours.append(contour)\n","\n","\n","# plot the contours in red\n","out_img = image.copy()\n","cv2.drawContours(\n","    image=out_img,\n","    contours=okay_contours,\n","    contourIdx=-1,\n","    color=(255,0,0),\n","    thickness=5,\n",")\n","\n","plt.imshow(out_img)\n","plt.show()\n","\n","\n","# create binary mask\n","out_img = np.zeros_like(image)\n","cv2.drawContours(\n","    image=out_img,\n","    contours=okay_contours,\n","    contourIdx=-1,\n","    color=(255,255,255),\n","    thickness=cv2.FILLED,\n",")\n","\n","plt.imshow(out_img)\n","plt.show()"],"metadata":{"id":"O6bCET6t-oVX"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# saving the image as test.jpg just to test it\n","# in the temporary space, not on google drive (BASE_PATH)\n","plt.imsave(\"test.jpg\", out_img)"],"metadata":{"id":"Ub4BWRLUIKJ0"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def get_contour_and_rect(image):\n","\n","    # ... TODO\n","\n","    return contour, rect, mask\n","\n","def plot_image_contour(image, contour, rect):\n","\n","    # plot the contours in red\n","    # ... TODO\n","\n","    # plot the bbox as blue rectangles\n","    # ... TODO\n","\n","    plt.imshow(out_img)\n","    plt.show()"],"metadata":{"id":"CvWbXYc1CJpA"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["contour, rect, mask = get_contour_and_rect(image0)\n","# ... TODO"],"metadata":{"id":"9UP89G0QEOfi"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["contour, rect, mask = get_contour_and_rect(image1)\n","# ... TODO"],"metadata":{"id":"JEMuZeRbFuCK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["contour, rect, mask = get_contour_and_rect(image2)\n","# ... TODO"],"metadata":{"id":"4XnXWIjGGBv7"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["image3 = plt.imread(paths[1124])\n","# ... TODO"],"metadata":{"id":"KSAcj8ZpGE7X"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["image4 = plt.imread(paths[1152])\n","# ... TODO"],"metadata":{"id":"IFlBzJ41Zkrk"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def crop_image(image, rect):\n","    # ... TODO"],"metadata":{"id":"4Vhb_oPUGZsg"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["contour, rect, mask = get_contour_and_rect(image4)\n","plt.imshow(image4)\n","plt.show()\n","cropped_image = crop_image(image4, rect)\n","plt.imshow(cropped_image)\n","plt.show()"],"metadata":{"id":"jNmHLiClG0rV"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["from utils import plot_hist"],"metadata":{"id":"C49Njr87n2AV"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["fig, axs = plt.subplots(nrows=4, ncols=5, figsize=(20,10))\n","\n","axs[0,0].set_title(\"Full\")\n","axs[0,1].set_title(\"Cropped\")\n","axs[0,2].set_title(\"Full\")\n","axs[0,3].set_title(\"Cropped\")\n","axs[0,4].set_title(\"Cropped Masked\")\n","\n","for row_idx, image in enumerate([image0, image1, image2, image4]):\n","\n","    row = axs[row_idx]\n","\n","    contour, rect, mask = # ... TODO\n","    cropped_image = # ... TODO\n","    masked_image = # ... TODO\n","    masked_cropped_image = # ... TODO\n","\n","    # plot original image on row[0]\n","    row[0]. # ... TODO\n","\n","    # plot cropped image on row[1]\n","    row[1]. # ... TODO\n","\n","    # plot full img hist on row[2]\n","    # ... TODO calculate features\n","    # ... TODO plot the features\n","\n","    # plot cropped img hist  on row[3]\n","    # ... TODO calculate features\n","    # ... TODO plot the features\n","\n","    # plot cropped img masked hist  on row[4]\n","    # ... TODO calculate features\n","    # ... TODO plot the features\n","\n","for ax in axs.flatten():\n","    ax.xaxis.set_visible(False)\n","    ax.yaxis.set_visible(False)\n","\n","\n","plt.tight_layout()\n","plt.show()"],"metadata":{"id":"8snNRBqaZxpc"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["fig, axs = plt.subplots(nrows=4, ncols=5, figsize=(20,10))\n","\n","axs[0,0].set_title(\"Full\")\n","axs[0,1].set_title(\"Cropped\")\n","axs[0,2].set_title(\"Full\")\n","axs[0,3].set_title(\"Cropped\")\n","axs[0,4].set_title(\"Cropped Masked\")\n","\n","for row_idx, image in enumerate([image0, image1, image2, image4]):\n","\n","    row = axs[row_idx]\n","\n","    contour, rect, mask = # ... TODO\n","    cropped_image = crop_image(image, rect)\n","\n","    conversion_function = # ... TODO\n","\n","    image_cvt = # ... TODO\n","    cropped_image_cvt = # ... TODO\n","    masked_image_cvt = # ... TODO\n","    masked_cropped_image_cvt = # ... TODO\n","\n","    # plot original image on row[0]\n","    row[0]. # ... TODO\n","\n","    # plot cropped image on row[1]\n","    row[1]. # ... TODO\n","\n","    # plot full img hist on row[2]\n","    # ... TODO calculate features\n","    # ... TODO plot the features\n","\n","    # plot cropped img hist  on row[3]\n","    # ... TODO calculate features\n","    # ... TODO plot the features\n","\n","    # plot cropped img masked hist  on row[4]\n","    # ... TODO calculate features\n","    # ... TODO plot the features\n","\n","for ax in axs.flatten():\n","    ax.xaxis.set_visible(False)\n","    ax.yaxis.set_visible(False)\n","\n","\n","plt.tight_layout()\n","plt.show()"],"metadata":{"id":"I7mh3TW_iKPe"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(DATA_CROPPED_PATH)\n","print(DATA_MASK_PATH)\n","\n","# ... TODO check if the folders exist, create them if they don't\n","\n","cropped_paths = []\n","# ... TODO\n","df[\"cropped_path\"] = cropped_paths\n","\n","mask_paths = []\n","# ... TODO\n","df[\"mask_path\"] = mask_paths\n","\n","for row in tqdm(df.iterrows(), total=len(df)):\n","\n","    # ... TODO\n","\n","    plt.imsave(cropped_path, cropped_image)\n","    plt.imsave(mask_path, cropped_mask)"],"metadata":{"id":"BL34rRlBqyp8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(FEATURES_PATH)\n","\n","# ... TODO check if the folders exist, create them if they don't\n","\n","for colorspace in [\"RGB\", \"Lab\"]:#, \"YCC\", \"YUV\"]:\n","    print(colorspace)\n","    out_path = f\"{FEATURES_PATH}/{colorspace}.pkl\"\n","    # ... TODO"],"metadata":{"id":"YuDKtfsuwe_i"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def whitemask_balancing(\n","    image,\n","    mask,\n","):\n","    # ... TODO\n","\n","    return image_max"],"metadata":{"id":"1RqzKv5KzBd6"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(DATA_CROPPED_WB_PATH)\n","\n","# ... TODO check if the folders exist, create them if they don't\n","\n","\n","cropped_wb_paths = []\n","# ... TODO\n","df[\"cropped_wb_path\"] = cropped_wb_paths\n","\n","for row in tqdm(df.iterrows(), total=len(df)):\n","    input_path = row.path\n","    cropped_wb_path = row.cropped_wb_path\n","    mask_path = row.mask_path\n","\n","    # ... TODO"],"metadata":{"id":"ZLHkERxf4W98"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(FEATURES_WB_PATH)\n","\n","# ... TODO check if the folders exist, create them if they don't\n","\n","for colorspace in [\"RGB\", \"Lab\"]:#, \"YCC\", \"YUV\"]:\n","    print(colorspace)\n","    out_path = f\"{FEATURES_WB_PATH}/{colorspace}.pkl\"\n","    # ... TODO"],"metadata":{"id":"ngC9NbP387Vd"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"S53Zw4XP9LFh"},"execution_count":null,"outputs":[]}]}