done with convert data
This commit is contained in:
		
							
								
								
									
										75
									
								
								read_data/convert_data.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								read_data/convert_data.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | |||||||
|  | #!/usr/bin/env python | ||||||
|  | # coding: utf-8 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | import matplotlib.pyplot as plt | ||||||
|  | import pandas as pd | ||||||
|  | import numpy as np | ||||||
|  | import os | ||||||
|  | import csv | ||||||
|  |  | ||||||
|  | def generate_min_df(mins = 15): | ||||||
|  |     end = 60/mins * 24 | ||||||
|  |     start_date = '2023-01-01' | ||||||
|  |     end_date = '2023-12-31' | ||||||
|  |  | ||||||
|  |     all_dates = pd.date_range(start=start_date, end=end_date, freq='D') | ||||||
|  |     all_times = pd.timedelta_range(start='0 min', end=f'1435 min', freq=f'{mins} min') | ||||||
|  |  | ||||||
|  |     date_times = [pd.Timestamp(date) + time for date in all_dates for time in all_times] | ||||||
|  |  | ||||||
|  |     time_frame = pd.DataFrame({ | ||||||
|  |         'Time': date_times | ||||||
|  |     }) | ||||||
|  |     return time_frame | ||||||
|  |  | ||||||
|  | def save_csv(df, filename, columns): | ||||||
|  |     with open(filename, 'w', newline='') as file: | ||||||
|  |         writer = csv.writer(file) | ||||||
|  |         writer.writerow(['Time', 'PV yield[kW/kWp]']) | ||||||
|  |         for index, row in df.iterrows(): | ||||||
|  |             time_formatted = index.strftime('%H:%M') | ||||||
|  |             writer.writerow([time_formatted, row[columns[1]]]) | ||||||
|  |              | ||||||
|  |         print(f'The file is written to {filename}') | ||||||
|  |  | ||||||
|  | def read_csv(filename): | ||||||
|  |     skip_rows = list(range(1, 17)) | ||||||
|  |     data = pd.read_csv(filename, sep=';', skiprows=skip_rows) | ||||||
|  |     return data | ||||||
|  |  | ||||||
|  | def process(file_name): | ||||||
|  |     df = read_csv(file_name) | ||||||
|  |     city = file_name.split('_')[0] | ||||||
|  |  | ||||||
|  |     remain_column = ['Time','PV energy (AC) minus standby use '] | ||||||
|  |     energy_row_name = remain_column[1] | ||||||
|  |  | ||||||
|  |     df = df[remain_column] | ||||||
|  |     df[energy_row_name] = df[energy_row_name].str.replace(',','.').astype(float) | ||||||
|  |  | ||||||
|  |     sum_energy = df[energy_row_name].sum() | ||||||
|  |     group_size = 15 | ||||||
|  |     df['group_id'] = df.index // group_size | ||||||
|  |  | ||||||
|  |     sums = df.groupby('group_id')[energy_row_name].sum() | ||||||
|  |     sums_df = sums.reset_index(drop=True).to_frame(name = 'Energy') | ||||||
|  |  | ||||||
|  |     time_frame = generate_min_df(15) | ||||||
|  |     sums_df = pd.concat([time_frame, sums_df], axis=1) | ||||||
|  |     sums_df.set_index('Time', inplace=True) | ||||||
|  |     max_value = sums_df['Energy'].max() | ||||||
|  |     sums_df['Energy'] = sums_df['Energy'] / 390. | ||||||
|  |     sums_df['Energy'] = sums_df['Energy'].round(4) | ||||||
|  |     sums_df['Energy'].replace(0.0, -0.0) | ||||||
|  |  | ||||||
|  |     save_csv(sums_df, f'{city}.csv', ['Time', 'Energy']) | ||||||
|  |  | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     city_list = ['Riyahd', 'Cambodge', 'Berlin', 'Serbia'] | ||||||
|  |     for city in city_list: | ||||||
|  |         print(f'Processing {city}') | ||||||
|  |         file_name = f'{city}_raw.csv' | ||||||
|  |         process(file_name) | ||||||
|  |         print(f'Processing {city} is done\n') | ||||||
|  |  | ||||||
		Reference in New Issue
	
	Block a user