From 9d143399edeebc17ba362ca8bc17925d084d7205 Mon Sep 17 00:00:00 2001 From: Hanzhang Ma Date: Mon, 13 May 2024 15:24:44 +0200 Subject: [PATCH] get new intensity file --- read_data/convert_data.ipynb | 372 +++++++++++++++++++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 read_data/convert_data.ipynb diff --git a/read_data/convert_data.ipynb b/read_data/convert_data.ipynb new file mode 100644 index 0000000..1c0e2aa --- /dev/null +++ b/read_data/convert_data.ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import csv" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "def read_csv(filename):\n", + " skip_rows = list(range(1, 17))\n", + " data = pd.read_csv(filename, sep=';', skiprows=skip_rows)\n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3075037/3659192646.py:3: DtypeWarning: Columns (32,33,35) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " data = pd.read_csv(filename, sep=';', skiprows=skip_rows)\n" + ] + }, + { + "data": { + "text/plain": [ + "Index(['Time', 'Irradiance onto horizontal plane ',\n", + " 'Diffuse Irradiation onto Horizontal Plane ', 'Outside Temperature ',\n", + " 'Module Area 1: Height of Sun ',\n", + " 'Module Area 1: Irradiance onto tilted surface ',\n", + " 'Module Area 1: Module Temperature ', 'Grid Export ',\n", + " 'Energy from Grid ', 'Global radiation - horizontal ',\n", + " 'Deviation from standard spectrum ', 'Ground Reflection (Albedo) ',\n", + " 'Orientation and inclination of the module surface ', 'Shading ',\n", + " 'Reflection on the Module Surface ',\n", + " 'Irradiance on the rear side of the module ',\n", + " 'Global Radiation at the Module ',\n", + " 'Module Area 1: Reflection on the Module Surface ',\n", + " 'Module Area 1: Global Radiation at the Module ',\n", + " 'Global PV Radiation ', 'Bifaciality ', 'Soiling ',\n", + " 'STC Conversion (Rated Efficiency of Module) ', 'Rated PV Energy ',\n", + " 'Low-light performance ', 'Module-specific Partial Shading ',\n", + " 'Deviation from the nominal module temperature ', 'Diodes ',\n", + " 'Mismatch (Manufacturer Information) ',\n", + " 'Mismatch (Configuration/Shading) ',\n", + " 'Power optimizer (DC conversion/clipping) ',\n", + " 'PV Energy (DC) without inverter clipping ',\n", + " 'Failing to reach the DC start output ',\n", + " 'Clipping on account of the MPP Voltage Range ',\n", + " 'Clipping on account of the max. DC Current ',\n", + " 'Clipping on account of the max. DC Power ',\n", + " 'Clipping on account of the max. AC Power/cos phi ', 'MPP Matching ',\n", + " 'PV energy (DC) ',\n", + " 'Inverter 1 - MPP 1 - to Module Area 1: PV energy (DC) ',\n", + " 'Inverter 1 - MPP 2 - to Module Area 1: PV energy (DC) ',\n", + " 'Inverter 1 - MPP 3 - to Module Area 1: PV energy (DC) ',\n", + " 'Inverter 1 - MPP 4 - to Module Area 1: PV energy (DC) ',\n", + " 'Inverter 1 - MPP 5 - to Module Area 1: PV energy (DC) ',\n", + " 'Inverter 1 - MPP 6 - to Module Area 1: PV energy (DC) ',\n", + " 'Inverter 2 - MPP 1 - to Module Area 1: PV energy (DC) ',\n", + " 'Inverter 2 - MPP 2 - to Module Area 1: PV energy (DC) ',\n", + " 'Energy at the Inverter Input ',\n", + " 'Input voltage deviates from rated voltage ', 'DC/AC Conversion ',\n", + " 'Own Consumption (Standby or Night) ', 'Total Cable Losses ',\n", + " 'PV energy (AC) minus standby use ', 'Feed-in energy ',\n", + " 'Inverter 1 to Module Area 1: Own Consumption (Standby or Night) ',\n", + " 'Inverter 1 to Module Area 1: PV energy (AC) minus standby use ',\n", + " 'Inverter 2 to Module Area 1: Own Consumption (Standby or Night) ',\n", + " 'Inverter 2 to Module Area 1: PV energy (AC) minus standby use ',\n", + " 'Unnamed: 58'],\n", + " dtype='object')" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "file_name = 'Riyahd_raw.csv'\n", + "df = read_csv(file_name)\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "remain_column = ['Time','PV energy (AC) minus standby use ']\n", + "energy_row_name = remain_column[1]\n", + "\n", + "df = df[remain_column]\n", + "df[energy_row_name] = df[energy_row_name].str.replace(',','.').astype(float)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "770594.226863267" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum_energy = df[energy_row_name].sum()\n", + "sum_energy" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1975.882632982736" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum_energy / 390" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "group_size = 15\n", + "df['group_id'] = df.index // group_size\n", + "\n", + "sums = df.groupby('group_id')[energy_row_name].sum()\n", + "sums_df = sums.reset_index(drop=True).to_frame(name = 'Energy')" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sums_df.head" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Time\n", + "0 2023-01-01 00:00:00\n", + "1 2023-01-01 00:15:00\n", + "2 2023-01-01 00:30:00\n", + "3 2023-01-01 00:45:00\n", + "4 2023-01-01 01:00:00\n", + " Time\n", + "35035 2023-12-31 22:45:00\n", + "35036 2023-12-31 23:00:00\n", + "35037 2023-12-31 23:15:00\n", + "35038 2023-12-31 23:30:00\n", + "35039 2023-12-31 23:45:00\n" + ] + } + ], + "source": [ + "\n", + "start_date = '2023-01-01'\n", + "end_date = '2023-12-31'\n", + "\n", + "# 生成每天的15分钟间隔时间\n", + "all_dates = pd.date_range(start=start_date, end=end_date, freq='D')\n", + "all_times = pd.timedelta_range(start='0 min', end='1435 min', freq='15 min')\n", + "\n", + "# 生成完整的时间标签\n", + "date_times = [pd.Timestamp(date) + time for date in all_dates for time in all_times]\n", + "\n", + "# 创建DataFrame\n", + "time_frame = pd.DataFrame({\n", + " 'Time': date_times\n", + "})\n", + "\n", + "# 查看生成的DataFrame\n", + "print(time_frame.head())\n", + "print(time_frame.tail())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(35040, 1)\n", + "(35040, 1)\n" + ] + } + ], + "source": [ + "print(sums_df.shape)\n", + "print(time_frame.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "# sums_df['Time'] = time_frame['Time']\n", + "sums_df = pd.concat([time_frame, sums_df], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Energy\n", + "Time \n", + "2023-01-01 00:00:00 0.0\n", + "2023-01-01 00:15:00 0.0\n", + "2023-01-01 00:30:00 0.0\n", + "2023-01-01 00:45:00 0.0\n", + "2023-01-01 01:00:00 0.0\n" + ] + } + ], + "source": [ + "sums_df.set_index('Time', inplace=True)\n", + "print(sums_df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "max_value = sums_df['Energy'].max()\n", + "sums_df['Energy'] = sums_df['Energy'] / max_value\n" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "def save_csv(df, filename, columns):\n", + " tmp_df = df.copy()\n", + " tmp_df[columns[1]] = tmp_df[columns[1]].round(4)\n", + " with open(filename, 'w', newline='') as file:\n", + " writer = csv.writer(file)\n", + " writer.writerow(columns)\n", + " for index, row in tmp_df.iterrows():\n", + " time_formatted = index.strftime('%H:%M')\n", + " writer.writerow([time_formatted, row[columns[1]]])\n", + " \n", + " print(f'The file is written to {filename}')\n", + " \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file is written to Riyahd.csv\n" + ] + } + ], + "source": [ + "save_csv(sums_df, 'Riyahd.csv', ['Time', 'Energy'])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}