{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2fe4c392-fd2c-4dc5-8ed2-866a26447165",
   "metadata": {},
   "source": [
    "# Classification in Python using pyjeo and sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cbe6356c-b543-4886-a321-f3e2b71495c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import display\n",
    "import geopandas as gpd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "633dfbb3-d00a-45d2-8ebf-2ad9d0984b6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import accuracy_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0a414281-01c7-45b7-b303-2df17a488653",
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pyjeo as pj\n",
    "import pandas as pd\n",
    "import geopandas as gpd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a5d851ff-d154-4481-8872-63e67e491766",
   "metadata": {},
   "source": [
    "## Create reference data for training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "64fea62b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ! wget -P /media/sf_LVM_shared/my_SE_data/exercise https://github.com/ec-jrc/jeolib-pyjeo/blob/master/tests/data/modis_ndvi_2010.tif\n",
    "# ! wget -P /media/sf_LVM_shared/my_SE_data/exercise https://github.com/ec-jrc/jeolib-pyjeo/blob/master/tests/data/modis_ndvi_training.sqlite\n",
    "\n",
    "\n",
    "! curl -H 'Accept: application/vnd.github.v3.raw' -O -L 'https://github.com/ec-jrc/jeolib-pyjeo/raw/master/tests/data/modis_ndvi_training.sqlite'\n",
    "! curl -H 'Accept: application/vnd.github.v3.raw' -O -L 'https://github.com/ec-jrc/jeolib-pyjeo/raw/master/tests/data/modis_ndvi_2010.tif'\n",
    "\n",
    "\n",
    "!pwd\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba2afdd4-03a1-4862-b360-476c6c46c0fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "reference = pj.JimVect('modis_ndvi_training.sqlite')\n",
    "jim = pj.Jim('modis_ndvi_2010.tif', band2plane=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d372258-7e66-4582-9e03-42678d5cf07e",
   "metadata": {},
   "outputs": [],
   "source": [
    "dates = [datetime.strptime('01-' + str(month) + '-2010', \"%d-%m-%Y\") for month in range(1, 13)]\n",
    "jim.properties.setDimension({'band': ['NDVI'], 'plane': dates})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9b362c0-d50a-48ec-9004-bc852d7164e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "jim.xr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8fbc58f-61f3-4f57-9b0a-49bd491f02f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "jim.xr().NDVI.plot(col='time', col_wrap=6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63f8de4a-0780-4f35-9ffe-301dd55bde46",
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(reference.dict())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b61a3c4-1289-400a-bb71-3ce6b03c5ac9",
   "metadata": {},
   "outputs": [],
   "source": [
    "featurevect = pj.geometry.extract(reference, jim, rule=['allpoints'],\n",
    "                                  output='/vsimem/features.sqlite',\n",
    "                                  oformat='SQLite',\n",
    "                                  co=['OVERWRITE=YES'],\n",
    "                                  classes=[1, 2],\n",
    "                                  copy='label')\n",
    "gdf = gpd.read_file('/vsimem/features.sqlite')\n",
    "gdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39f069fa-399d-43a9-817d-dab7f4886011",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(16, 8))\n",
    "ax = plt.subplot()\n",
    "jim.xr().NDVI.isel(time = 0).plot(ax = ax)\n",
    "gdf.plot(column = 'label', ax = ax, legend = True, categorical=True, cmap='Set1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "89f69fb9-f9d3-4161-878e-cdb1d2938993",
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(featurevect.dict())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f199150c-ec9e-47e9-a921-da6ce3b4eca2",
   "metadata": {},
   "source": [
    "## Train the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "428e7974-5a8b-4884-b415-e69dc5e3c98d",
   "metadata": {},
   "outputs": [],
   "source": [
    "x = featurevect.np()[:, 1:]\n",
    "y = featurevect.np()[:, 0:1]\n",
    "x_train, x_test, y_train, y_test = train_test_split(x, y,\n",
    "                                                    test_size=0.33,\n",
    "                                                    random_state=42)\n",
    "rfModel = RandomForestClassifier(n_estimators=100,\n",
    "                                 max_depth=9,\n",
    "                                 min_samples_leaf=5,\n",
    "                                 min_samples_split=3,\n",
    "                                 criterion='gini')\n",
    "rfModel.fit(x_train, y_train.ravel())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "87c1348f-1cb9-49e7-8863-3e19c70c93af",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_predict = rfModel.predict(x_test)\n",
    "print(confusion_matrix(y_test, y_predict))\n",
    "print('accuracy score: {}'.format(accuracy_score(y_test, y_predict)))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ba85ab47-957a-4321-a349-6aa282620710",
   "metadata": {},
   "source": [
    "## Prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d71fbc73-2972-4c8f-a09f-69f1dfdec7fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "x = jim.np()\n",
    "x = x.reshape(jim.properties.nrOfPlane(), jim.properties.nrOfRow() * \\\n",
    "                      jim.properties.nrOfCol()).T\n",
    "\n",
    "jim_class = pj.Jim(ncol=jim.properties.nrOfCol(),\n",
    "                   nrow=jim.properties.nrOfRow(),\n",
    "                   otype='Byte')\n",
    "jim_class.properties.copyGeoReference(jim)\n",
    "jim_class.np()[:] = rfModel.predict(x).astype(np.dtype(np.uint8)).\\\n",
    "            reshape(jim.properties.nrOfRow(), jim.properties.nrOfCol())\n",
    "jim_class.properties.setDimension(['water'], 'band')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0abc73c2-e3f9-4a6a-8782-7d45fbdd256c",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(20, 10))\n",
    "ax1 = plt.subplot(121)\n",
    "jim.xr().NDVI.isel(time = 0).plot(ax = ax1)\n",
    "ax2 = plt.subplot(122)\n",
    "jim_class.xr().water.plot(cmap = 'Set2', levels = [1, 2], ax = ax2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0bc990f9-4c84-4881-85d7-6e92557bfbae",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Exercise 1\n",
    "Use a single feature to train the classifier (e.g., month of June only)  \n",
    "Check the accuracy"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2c11a3e7-882d-4a30-bb84-a96f2fdb7cac",
   "metadata": {},
   "source": [
    "What is the accuracy?"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3a7c5ec4-fcc5-4feb-a87f-24811e644873",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Exercise 2\n",
    "Replace the Random Forest with a Support Vector Machine\n",
    "(hint: use the `preprocessing.MinMaxScaler` to scale the input data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d7b8afb3-15ab-434c-9293-44895a8b6cdc",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.svm import SVC\n",
    "from sklearn import preprocessing"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a46d633b-9c7b-4cb3-a221-19915b75699b",
   "metadata": {},
   "source": [
    "What is the accuracy?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76b1e8d6-05df-4d47-aff8-d4dcabaec18e",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_predict = svmModel.predict(preprocessing.MinMaxScaler().fit_transform(x_test))\n",
    "print(confusion_matrix(y_test, y_predict))\n",
    "print('accuracy score: {}'.format(accuracy_score(y_test, y_predict)))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "96e45eb6-b556-4449-9e16-ac50d9fab2a7",
   "metadata": {},
   "source": [
    "Prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "655ca3c4-d2d5-4db7-b3a4-bc9d1d931756",
   "metadata": {},
   "outputs": [],
   "source": [
    "x = jim.np()\n",
    "x = x.reshape(jim.properties.nrOfPlane(), jim.properties.nrOfRow() * \\\n",
    "              jim.properties.nrOfCol()).T\n",
    "\n",
    "\n",
    "jim_class = pj.Jim(ncol=jim.properties.nrOfCol(),\n",
    "                   nrow=jim.properties.nrOfRow(), otype='Byte')\n",
    "jim_class.properties.copyGeoReference(jim)\n",
    "jim_class.np()[:] = \n",
    "jim_class.properties.setDimension(['water'], 'band')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f3440c6c-ff2d-46b2-b2de-934e16a3c495",
   "metadata": {},
   "source": [
    "Plot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "653c0ae1-ec5c-487d-a3f7-bc0c7a1d64f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(20, 10))\n",
    "ax1 = plt.subplot(121)\n",
    "jim.xr().NDVI.isel(time = 0).plot(ax = ax1)\n",
    "ax2 = plt.subplot(122)\n",
    "jim_class.xr().water.plot(cmap = 'Set2', levels = [1, 2], ax = ax2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}