From 33871fba7728cf744c45a02f7ac73440b8abbb2e Mon Sep 17 00:00:00 2001 From: Hanzhang Ma Date: Mon, 13 May 2024 16:09:28 +0200 Subject: [PATCH] done with convert data --- read_data/convert_data.py | 75 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 read_data/convert_data.py diff --git a/read_data/convert_data.py b/read_data/convert_data.py new file mode 100644 index 0000000..c5f5201 --- /dev/null +++ b/read_data/convert_data.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# coding: utf-8 + + +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np +import os +import csv + +def generate_min_df(mins = 15): + end = 60/mins * 24 + start_date = '2023-01-01' + end_date = '2023-12-31' + + all_dates = pd.date_range(start=start_date, end=end_date, freq='D') + all_times = pd.timedelta_range(start='0 min', end=f'1435 min', freq=f'{mins} min') + + date_times = [pd.Timestamp(date) + time for date in all_dates for time in all_times] + + time_frame = pd.DataFrame({ + 'Time': date_times + }) + return time_frame + +def save_csv(df, filename, columns): + with open(filename, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(['Time', 'PV yield[kW/kWp]']) + for index, row in df.iterrows(): + time_formatted = index.strftime('%H:%M') + writer.writerow([time_formatted, row[columns[1]]]) + + print(f'The file is written to {filename}') + +def read_csv(filename): + skip_rows = list(range(1, 17)) + data = pd.read_csv(filename, sep=';', skiprows=skip_rows) + return data + +def process(file_name): + df = read_csv(file_name) + city = file_name.split('_')[0] + + remain_column = ['Time','PV energy (AC) minus standby use '] + energy_row_name = remain_column[1] + + df = df[remain_column] + df[energy_row_name] = df[energy_row_name].str.replace(',','.').astype(float) + + sum_energy = df[energy_row_name].sum() + group_size = 15 + df['group_id'] = df.index // group_size + + sums = df.groupby('group_id')[energy_row_name].sum() + sums_df = sums.reset_index(drop=True).to_frame(name = 'Energy') + + time_frame = generate_min_df(15) + sums_df = pd.concat([time_frame, sums_df], axis=1) + sums_df.set_index('Time', inplace=True) + max_value = sums_df['Energy'].max() + sums_df['Energy'] = sums_df['Energy'] / 390. + sums_df['Energy'] = sums_df['Energy'].round(4) + sums_df['Energy'].replace(0.0, -0.0) + + save_csv(sums_df, f'{city}.csv', ['Time', 'Energy']) + +if __name__ == '__main__': + city_list = ['Riyahd', 'Cambodge', 'Berlin', 'Serbia'] + for city in city_list: + print(f'Processing {city}') + file_name = f'{city}_raw.csv' + process(file_name) + print(f'Processing {city} is done\n') +