From 9d143399edeebc17ba362ca8bc17925d084d7205 Mon Sep 17 00:00:00 2001
From: Hanzhang Ma <hanzhang@plunder.dbs.ifi.lmu.de>
Date: Mon, 13 May 2024 15:24:44 +0200
Subject: [PATCH] get new intensity file

---
 read_data/convert_data.ipynb | 372 +++++++++++++++++++++++++++++++++++
 1 file changed, 372 insertions(+)
 create mode 100644 read_data/convert_data.ipynb

diff --git a/read_data/convert_data.ipynb b/read_data/convert_data.ipynb
new file mode 100644
index 0000000..1c0e2aa
--- /dev/null
+++ b/read_data/convert_data.ipynb
@@ -0,0 +1,372 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_csv(filename):\n",
+    "    skip_rows = list(range(1, 17))\n",
+    "    data = pd.read_csv(filename, sep=';', skiprows=skip_rows)\n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_3075037/3659192646.py:3: DtypeWarning: Columns (32,33,35) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "  data = pd.read_csv(filename, sep=';', skiprows=skip_rows)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Time', 'Irradiance onto horizontal plane ',\n",
+       "       'Diffuse Irradiation onto Horizontal Plane ', 'Outside Temperature ',\n",
+       "       'Module Area 1: Height of Sun ',\n",
+       "       'Module Area 1: Irradiance onto tilted surface ',\n",
+       "       'Module Area 1: Module Temperature ', 'Grid Export ',\n",
+       "       'Energy from Grid ', 'Global radiation - horizontal ',\n",
+       "       'Deviation from standard spectrum ', 'Ground Reflection (Albedo) ',\n",
+       "       'Orientation and inclination of the module surface ', 'Shading ',\n",
+       "       'Reflection on the Module Surface ',\n",
+       "       'Irradiance on the rear side of the module ',\n",
+       "       'Global Radiation at the Module ',\n",
+       "       'Module Area 1: Reflection on the Module Surface ',\n",
+       "       'Module Area 1: Global Radiation at the Module ',\n",
+       "       'Global PV Radiation ', 'Bifaciality ', 'Soiling ',\n",
+       "       'STC Conversion (Rated Efficiency of Module) ', 'Rated PV Energy ',\n",
+       "       'Low-light performance ', 'Module-specific Partial Shading ',\n",
+       "       'Deviation from the nominal module temperature ', 'Diodes ',\n",
+       "       'Mismatch (Manufacturer Information) ',\n",
+       "       'Mismatch (Configuration/Shading) ',\n",
+       "       'Power optimizer (DC conversion/clipping) ',\n",
+       "       'PV Energy (DC) without inverter clipping ',\n",
+       "       'Failing to reach the DC start output ',\n",
+       "       'Clipping on account of the MPP Voltage Range ',\n",
+       "       'Clipping on account of the max. DC Current ',\n",
+       "       'Clipping on account of the max. DC Power ',\n",
+       "       'Clipping on account of the max. AC Power/cos phi ', 'MPP Matching ',\n",
+       "       'PV energy (DC) ',\n",
+       "       'Inverter 1 - MPP 1 - to Module Area 1: PV energy (DC) ',\n",
+       "       'Inverter 1 - MPP 2 - to Module Area 1: PV energy (DC) ',\n",
+       "       'Inverter 1 - MPP 3 - to Module Area 1: PV energy (DC) ',\n",
+       "       'Inverter 1 - MPP 4 - to Module Area 1: PV energy (DC) ',\n",
+       "       'Inverter 1 - MPP 5 - to Module Area 1: PV energy (DC) ',\n",
+       "       'Inverter 1 - MPP 6 - to Module Area 1: PV energy (DC) ',\n",
+       "       'Inverter 2 - MPP 1 - to Module Area 1: PV energy (DC) ',\n",
+       "       'Inverter 2 - MPP 2 - to Module Area 1: PV energy (DC) ',\n",
+       "       'Energy at the Inverter Input ',\n",
+       "       'Input voltage deviates from rated voltage ', 'DC/AC Conversion ',\n",
+       "       'Own Consumption (Standby or Night) ', 'Total Cable Losses ',\n",
+       "       'PV energy (AC) minus standby use ', 'Feed-in energy ',\n",
+       "       'Inverter 1 to Module Area 1: Own Consumption (Standby or Night) ',\n",
+       "       'Inverter 1 to Module Area 1: PV energy (AC) minus standby use ',\n",
+       "       'Inverter 2 to Module Area 1: Own Consumption (Standby or Night) ',\n",
+       "       'Inverter 2 to Module Area 1: PV energy (AC) minus standby use ',\n",
+       "       'Unnamed: 58'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "file_name = 'Riyahd_raw.csv'\n",
+    "df = read_csv(file_name)\n",
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "remain_column = ['Time','PV energy (AC) minus standby use ']\n",
+    "energy_row_name = remain_column[1]\n",
+    "\n",
+    "df = df[remain_column]\n",
+    "df[energy_row_name] = df[energy_row_name].str.replace(',','.').astype(float)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "770594.226863267"
+      ]
+     },
+     "execution_count": 89,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sum_energy = df[energy_row_name].sum()\n",
+    "sum_energy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1975.882632982736"
+      ]
+     },
+     "execution_count": 90,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sum_energy / 390"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "group_size = 15\n",
+    "df['group_id'] = df.index // group_size\n",
+    "\n",
+    "sums = df.groupby('group_id')[energy_row_name].sum()\n",
+    "sums_df = sums.reset_index(drop=True).to_frame(name = 'Energy')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<bound method NDFrame.head of        Energy\n",
+       "0         0.0\n",
+       "1         0.0\n",
+       "2         0.0\n",
+       "3         0.0\n",
+       "4         0.0\n",
+       "...       ...\n",
+       "35035     0.0\n",
+       "35036     0.0\n",
+       "35037     0.0\n",
+       "35038     0.0\n",
+       "35039     0.0\n",
+       "\n",
+       "[35040 rows x 1 columns]>"
+      ]
+     },
+     "execution_count": 92,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sums_df.head"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                 Time\n",
+      "0 2023-01-01 00:00:00\n",
+      "1 2023-01-01 00:15:00\n",
+      "2 2023-01-01 00:30:00\n",
+      "3 2023-01-01 00:45:00\n",
+      "4 2023-01-01 01:00:00\n",
+      "                     Time\n",
+      "35035 2023-12-31 22:45:00\n",
+      "35036 2023-12-31 23:00:00\n",
+      "35037 2023-12-31 23:15:00\n",
+      "35038 2023-12-31 23:30:00\n",
+      "35039 2023-12-31 23:45:00\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "start_date = '2023-01-01'\n",
+    "end_date = '2023-12-31'\n",
+    "\n",
+    "# 生成每天的15分钟间隔时间\n",
+    "all_dates = pd.date_range(start=start_date, end=end_date, freq='D')\n",
+    "all_times = pd.timedelta_range(start='0 min', end='1435 min', freq='15 min')\n",
+    "\n",
+    "# 生成完整的时间标签\n",
+    "date_times = [pd.Timestamp(date) + time for date in all_dates for time in all_times]\n",
+    "\n",
+    "# 创建DataFrame\n",
+    "time_frame = pd.DataFrame({\n",
+    "    'Time': date_times\n",
+    "})\n",
+    "\n",
+    "# 查看生成的DataFrame\n",
+    "print(time_frame.head())\n",
+    "print(time_frame.tail())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(35040, 1)\n",
+      "(35040, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(sums_df.shape)\n",
+    "print(time_frame.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# sums_df['Time'] = time_frame['Time']\n",
+    "sums_df = pd.concat([time_frame, sums_df], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                     Energy\n",
+      "Time                       \n",
+      "2023-01-01 00:00:00     0.0\n",
+      "2023-01-01 00:15:00     0.0\n",
+      "2023-01-01 00:30:00     0.0\n",
+      "2023-01-01 00:45:00     0.0\n",
+      "2023-01-01 01:00:00     0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "sums_df.set_index('Time', inplace=True)\n",
+    "print(sums_df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_value = sums_df['Energy'].max()\n",
+    "sums_df['Energy'] = sums_df['Energy'] / max_value\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def save_csv(df, filename, columns):\n",
+    "    tmp_df = df.copy()\n",
+    "    tmp_df[columns[1]] = tmp_df[columns[1]].round(4)\n",
+    "    with open(filename, 'w', newline='') as file:\n",
+    "        writer = csv.writer(file)\n",
+    "        writer.writerow(columns)\n",
+    "        for index, row in tmp_df.iterrows():\n",
+    "            time_formatted = index.strftime('%H:%M')\n",
+    "            writer.writerow([time_formatted, row[columns[1]]])\n",
+    "            \n",
+    "        print(f'The file is written to {filename}')\n",
+    "        \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The file is written to Riyahd.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "save_csv(sums_df, 'Riyahd.csv', ['Time', 'Energy'])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}