{ "cells": [ { "cell_type": "markdown", "id": "2008dbe7-d283-4355-99d5-a56df86adb35", "metadata": {}, "source": [ "# Hyeyoung Sim: Clustering Electric Vehicle Charging Pattern with DTW and EV Charging energy demand Prediction with LSTM\n", "[Presentation](http://spatial-ecology.net/docs/source/STUDENTSPROJECTS/Proj_2022_Matera/Clustering_Electric_Vehicle_Charging_Pattern_with_DTW_and_EV_Charging_energy_demand_Prediction_with_LSTM_Hyeyoung_Sim.pdf) \n", "[Video recording](https://youtu.be/-PGvRM6hVtM)" ] }, { "cell_type": "code", "execution_count": 246, "id": "7960f670", "metadata": {}, "outputs": [], "source": [ "# !pip install dtw-python\n", "# !pip install numpy\n", "# !pip install pandas\n", "# !pip install time\n", "# !pip install datetime\n", "# !pip install openpyxl\n", "# !pip install tensorflow\n", "# !pip install seaborn\n", "# !pip install sklearn\n", "# !pip install keras\n", "# !pip install shap" ] }, { "cell_type": "code", "execution_count": 1, "id": "250514d0-79df-4fe8-b447-49575355ba92", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Importing the dtw module. When using in academic works please cite:\n", " T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.\n", " J. Stat. Soft., doi:10.18637/jss.v031.i07.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-06-17 03:50:12.753342: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", "2022-06-17 03:50:12.753381: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n" ] } ], "source": [ "import time\n", "from datetime import datetime\n", "import numpy as np\n", "import dtw\n", "import pandas as pd\n", "import os\n", "\n", "import matplotlib.pyplot as plt\n", "import tensorflow as tf\n", "import seaborn as sns\n", "from sklearn.metrics import r2_score\n", "from sklearn.metrics import mean_absolute_error , mean_squared_error\n", "from sklearn.preprocessing import MinMaxScaler\n", "from sklearn.preprocessing import LabelEncoder\n", "from keras.callbacks import EarlyStopping\n", "from keras.models import load_model\n", "from keras.callbacks import ModelCheckpoint\n", "\n", "from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization\n", "from tensorflow.keras.models import Sequential" ] }, { "cell_type": "code", "execution_count": 2, "id": "020d726a-8258-4e0b-8d8b-ec300e922550", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/media/sf_LVM_shared/tmpp\n" ] } ], "source": [ "print(os.getcwd())" ] }, { "cell_type": "code", "execution_count": 3, "id": "db08cffd-e455-4f0e-a75b-2fbd30b966e0", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "outputs": [], "source": [ "# print(os.path.realpath('21_keco.xlsx'))\n", "# base_dir = '/media/sf_LVM_shared/tmpp'\n", "# excel_file = '21_keco.xlsx'\n", "# excel_dir = os.path.join(base_dir, excel_file)\n", "\n", "# a = pd.read_excel(excel_dir, sheet_name = '21_1')\n", "# a2 = pd.read_excel(excel_dir, sheet_name = '21_2')\n", "# a3 = pd.read_excel(excel_dir, sheet_name = '21_3')\n", "# a4 = pd.read_excel(excel_dir, sheet_name = '21_4')\n", "# a5 = pd.read_excel(excel_dir, sheet_name = '21_5')\n", "# a6 = pd.read_excel(excel_dir, sheet_name = '21_6')\n", "# a7 = pd.read_excel(excel_dir, sheet_name = '21_7')\n", "# a8 = pd.read_excel(excel_dir, sheet_name = '21_8')\n", "# a9 = pd.read_excel(excel_dir, sheet_name = '21_9')\n", "# a10 = pd.read_excel(excel_dir, sheet_name = '21_10')\n", "# a11 = pd.read_excel(excel_dir, sheet_name = '21_11')\n", "# a12 = pd.read_excel(excel_dir, sheet_name = '21_12')" ] }, { "cell_type": "code", "execution_count": 4, "id": "214c99c6-d84d-48aa-9923-6135c8fa3d41", "metadata": {}, "outputs": [], "source": [ "a1 = pd.read_table('/media/sf_LVM_shared/tmpp/18_keco.txt' , sep = \"/\")\n", "a2 = pd.read_table('/media/sf_LVM_shared/tmpp/19_keco.txt' , sep = \"/\")\n", "a3 = pd.read_table('/media/sf_LVM_shared/tmpp/20_keco.txt' , sep = \"/\")\n", "a4 = pd.read_table('/media/sf_LVM_shared/tmpp/21_keco.txt' , sep = \":\")\n", "a5 = pd.read_table('/media/sf_LVM_shared/tmpp/22_keco.txt' , sep = \"/\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "14ed9f89-1a9d-4732-a559-8b2df4ea030b", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true, "source_hidden": true }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['dc', 'CNM', 'add', 'Max.vol', 'acvol', 'diffhour', 'starttm', 'endtm'], dtype='object')\n", "Index(['dc', 'CNM', 'add', 'Max.vol', 'acvol', 'diffhour', 'starttm', 'endtm'], dtype='object')\n", "Index(['dc', 'CNM', 'add', 'Max.vol', 'acvol', 'diffhour', 'starttm', 'endtm'], dtype='object')\n", "Index(['type', 'dc', 'CNM', 'add', 'Max.vol', 'acvol', 'diffhour', 'diffmin',\n", " 'starttm', 'endtm'],\n", " dtype='object')\n", "Index(['type', 'dc', 'CNM', 'add', 'Max.vol', 'acvol', 'diffhour', 'diffmin',\n", " 'starttm', 'endtm'],\n", " dtype='object')\n" ] } ], "source": [ "print(a1.columns)\n", "print(a2.columns)\n", "print(a3.columns)\n", "print(a4.columns)\n", "print(a5.columns)" ] }, { "cell_type": "code", "execution_count": 6, "id": "73df4f01-94b3-43c4-ac56-35e32841ddd1", "metadata": {}, "outputs": [], "source": [ "a1 = a1[['CNM', 'add', 'Max.vol', 'acvol', 'starttm', 'endtm']]\n", "a2 = a2[['CNM', 'add', 'Max.vol', 'acvol', 'starttm', 'endtm']]\n", "a3 = a3[['CNM', 'add', 'Max.vol', 'acvol', 'starttm', 'endtm']]\n", "a4 = a4[['CNM', 'add', 'Max.vol', 'acvol', 'starttm', 'endtm']]\n", "a5 = a5[['CNM', 'add', 'Max.vol', 'acvol', 'starttm', 'endtm']]" ] }, { "cell_type": "code", "execution_count": 7, "id": "496c2356", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(855137, 6)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c = pd.concat([a1,a2,a3,a4,a5])\n", "c.shape" ] }, { "cell_type": "code", "execution_count": 8, "id": "5a7685d6-06ae-4fe9-b562-c1612f523c2a", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true, "source_hidden": true }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "CNM object\n", "add object\n", "Max.vol int64\n", "acvol float64\n", "starttm int64\n", "endtm int64\n", "dtype: object" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c.dtypes" ] }, { "cell_type": "code", "execution_count": 11, "id": "5191a817-f779-4b3e-84c9-6457f3baa899", "metadata": {}, "outputs": [], "source": [ "c['start'] = c['starttm'].astype(str)\n", "c['end'] = c['endtm'].astype(str)" ] }, { "cell_type": "code", "execution_count": 13, "id": "9b0cdc64-8d8f-4dc6-8b6d-6b3f44aa07e7", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 2018-01-01 00:17:16+09:00\n", "2 2018-01-01 00:35:24+09:00\n", "3 2018-01-01 00:59:13+09:00\n", "4 2018-01-01 01:06:31+09:00\n", "5 2018-01-01 01:24:24+09:00\n", " ... \n", "125453 2022-03-30 16:22:47+09:00\n", "125454 2022-03-30 16:32:43+09:00\n", "125455 2022-03-30 16:38:09+09:00\n", "125456 2022-03-30 16:30:22+09:00\n", "125457 2022-03-30 16:27:55+09:00\n", "Name: start, Length: 855137, dtype: datetime64[ns, Asia/Seoul]\n" ] } ], "source": [ "c['start'] = pd.to_datetime(c['start'])\n", "c['start'] = c['start'].dt.tz_localize('Asia/Seoul')\n", "print(c['start'])" ] }, { "cell_type": "code", "execution_count": 14, "id": "312dc23c-e27d-4360-b5d6-7cae3b4c6f13", "metadata": {}, "outputs": [], "source": [ "c['endsec'] =c['end'].str.slice(12,14)" ] }, { "cell_type": "code", "execution_count": 15, "id": "b5c3b0a2-882b-40a1-bf7d-23fc602c0934", "metadata": {}, "outputs": [], "source": [ "c['endsec'] = pd.to_numeric(c['endsec'], errors = 'coerce')\n", "c = c[c.endsec <=59]" ] }, { "cell_type": "code", "execution_count": 16, "id": "ad68c489-9c6a-4715-a82d-d6bf803847b7", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 2018-01-01 00:54:56+09:00\n", "2 2018-01-01 01:09:26+09:00\n", "3 2018-01-01 01:22:13+09:00\n", "4 2018-01-01 01:36:11+09:00\n", "5 2018-01-01 01:35:49+09:00\n", " ... \n", "125453 2022-03-30 16:42:32+09:00\n", "125454 2022-03-30 17:13:06+09:00\n", "125455 2022-03-30 17:13:47+09:00\n", "125456 2022-03-30 17:10:22+09:00\n", "125457 2022-03-30 17:07:55+09:00\n", "Name: end, Length: 851558, dtype: datetime64[ns, Asia/Seoul]\n" ] } ], "source": [ "c['endmin'] = c['end'].str.slice(10,12) #min <=59\n", "c['endmin'] = pd.to_numeric(c['endmin'], errors = 'coerce')\n", "c = c[c.endmin <=59]\n", "\n", "c['end'] = pd.to_datetime(c['end'])\n", "c['end'] = c['end'].dt.tz_localize('Asia/Seoul')\n", "print(c['end'])" ] }, { "cell_type": "code", "execution_count": 17, "id": "51c87f62-45d6-4bfe-a118-72acc246928f", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/user/SE_data/juplab_env/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py:1162: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", " warnings.warn(\n" ] } ], "source": [ "c['date_time']= c.start.dt.to_period('H')" ] }, { "cell_type": "code", "execution_count": 18, "id": "4aebcea2-f3ca-41f2-ac5b-ba76cc05fe77", "metadata": {}, "outputs": [], "source": [ "c['SIG'] = c['add'].str.slice(6,9)" ] }, { "cell_type": "code", "execution_count": 19, "id": "10fa60c9-66c7-4e31-95d6-3e238dfaa30d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "200\n", "50\n" ] } ], "source": [ "# print(c.isnull().sum())\n", "print(c['Max.vol'].max())\n", "print(c['Max.vol'].min())" ] }, { "cell_type": "code", "execution_count": 20, "id": "a0aea93f-e230-4823-90cf-8c5d023b5ce1", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "1 0 days 00:37:40\n", "2 0 days 00:34:02\n", "3 0 days 00:23:00\n", "4 0 days 00:29:40\n", "5 0 days 00:11:25\n", "Name: diff, dtype: timedelta64[ns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datetime import datetime\n", "c['diff'] = (c['end'] - c['start'])\n", "c['difhour'] = c['diff'].dt.seconds\n", "c['diff'].head()" ] }, { "cell_type": "code", "execution_count": 21, "id": "0771c171-bc86-4d26-a14d-a90e3cd55c1f", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "1 2260\n", "2 2042\n", "3 1380\n", "4 1780\n", "5 685\n", "Name: difhour, dtype: int64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c['difhour'].head()" ] }, { "cell_type": "code", "execution_count": 22, "id": "7cbecf38-1f4a-4653-877f-21c543bf5254", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "1 0.627778\n", "2 0.567222\n", "3 0.383333\n", "4 0.494444\n", "5 0.190278\n", "Name: difhour, dtype: float64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c['difhour'] = c['difhour']/3600\n", "c['difhour'].head()" ] }, { "cell_type": "code", "execution_count": 23, "id": "62f21b6e-31d4-4591-81b3-4f3bf2b15a90", "metadata": {}, "outputs": [], "source": [ "c['log_date'] = c['start'].dt.date\n", "c['log_hour']=c['start'].dt.hour\n", "# # print(c['start'].strftime(\"%c\"))\n", "# print(c['log_date'])\n", "# print(c['log_hour'])\n", "# # c['date_time'] = " ] }, { "cell_type": "code", "execution_count": 24, "id": "76db180a-3b5a-4e5a-beed-bd3639a79d63", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "851558\n", "850664\n" ] } ], "source": [ "c['volh'] = c['acvol']/c['difhour']\n", "c['outlier'] = np.where( c['volh']<= c['Max.vol'], 'ok', 'no')\n", "c2 = c[c['outlier'] == 'ok']\n", "print(len(c))\n", "print(len(c2))" ] }, { "cell_type": "code", "execution_count": 25, "id": "9a8c6151-9d88-4ef0-8775-717fbba62013", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "827489\n", "23.988333333333333\n", "0.08333333333333333\n" ] } ], "source": [ "# min hour/ max hour\n", "c2 = c2[(c2['difhour'] >= 0.083333)&(c2['difhour'] <= 24) ] #5/60 = 0.08333\n", "print(len(c2))\n", "print(c2['difhour'].max())\n", "print(c2['difhour'].min())" ] }, { "cell_type": "code", "execution_count": 26, "id": "017e8a84-2e59-429b-aec0-b99887765fe1", "metadata": {}, "outputs": [], "source": [ "c2['year'] = c2['start'].dt.year\n", "c2['month'] = c2['start'].dt.month\n", "c2['day'] = c2['start'].dt.day" ] }, { "cell_type": "code", "execution_count": 27, "id": "b801599f-cf6f-4a4f-8afd-bbc8a7b9b1ef", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 2.018010e+07\n", "2 2.018010e+07\n", "3 2.018010e+07\n", "4 2.018010e+07\n", "5 2.018010e+07\n", "Name: dateint, dtype: float64\n" ] } ], "source": [ "c2['dateint'] = c2['year']*10000 + c2['month']*100 + c2['day'] + c2['log_hour']/24\n", "print(c2.dateint.head())" ] }, { "cell_type": "code", "execution_count": 28, "id": "9815aa41-3601-4efe-8b12-c82d9b190f41", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['CNM', 'add', 'Max.vol', 'acvol', 'starttm', 'endtm', 'start', 'end',\n", " 'endsec', 'endmin', 'date_time', 'SIG', 'diff', 'difhour', 'log_date',\n", " 'log_hour', 'volh', 'outlier', 'year', 'month', 'day', 'dateint'],\n", " dtype='object')" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c2.columns" ] }, { "cell_type": "code", "execution_count": 29, "id": "a0aa4b42-57f1-4d73-97f0-da4d9a085d4d", "metadata": {}, "outputs": [], "source": [ "df= c2.drop(['add','Max.vol', 'acvol', 'starttm', 'endtm', 'end', 'diff', 'outlier'], axis=1)" ] }, { "cell_type": "code", "execution_count": 30, "id": "12e42914-8a7e-44e0-abac-017859b6ac51", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "25\n", " CNM start endsec endmin date_time \\\n", "1 볏골공원 지하공영주차장 2018-01-01 00:17:16+09:00 56.0 54 2018-01-01 00:00 \n", "2 노보텔 2018-01-01 00:35:24+09:00 26.0 9 2018-01-01 00:00 \n", "3 종묘 공영주차장 2018-01-01 00:59:13+09:00 13.0 22 2018-01-01 00:00 \n", "4 현대블루핸즈 대치북부점 2018-01-01 01:06:31+09:00 11.0 36 2018-01-01 01:00 \n", "5 노보텔 2018-01-01 01:24:24+09:00 49.0 35 2018-01-01 01:00 \n", "\n", " SIG difhour log_date log_hour volh year month day \\\n", "1 강서구 0.627778 2018-01-01 0 37.529204 2018 1 1 \n", "2 강남구 0.567222 2018-01-01 0 26.515181 2018 1 1 \n", "3 종로구 0.383333 2018-01-01 0 36.652174 2018 1 1 \n", "4 강남구 0.494444 2018-01-01 1 41.420225 2018 1 1 \n", "5 강남구 0.190278 2018-01-01 1 36.367883 2018 1 1 \n", "\n", " dateint \n", "1 2.018010e+07 \n", "2 2.018010e+07 \n", "3 2.018010e+07 \n", "4 2.018010e+07 \n", "5 2.018010e+07 \n" ] } ], "source": [ "print(len(df['SIG'].unique()))\n", "print(df.head()) #FINAL DATA" ] }, { "cell_type": "code", "execution_count": 31, "id": "31bd3a8c-6686-417d-a571-8a37a8aca8f0", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | SIG | \n", "date_time | \n", "volh | \n", "
---|---|---|---|
0 | \n", "강남구 | \n", "2018-01-01 00:00 | \n", "26.515181 | \n", "
1 | \n", "강남구 | \n", "2018-01-01 01:00 | \n", "32.737450 | \n", "
2 | \n", "강남구 | \n", "2018-01-01 02:00 | \n", "28.364020 | \n", "
3 | \n", "강남구 | \n", "2018-01-01 05:00 | \n", "30.652220 | \n", "
4 | \n", "강남구 | \n", "2018-01-01 06:00 | \n", "32.791086 | \n", "