-
Notifications
You must be signed in to change notification settings - Fork 0
/
eier_production_price_month_BO_v1.py
107 lines (92 loc) · 3.89 KB
/
eier_production_price_month_BO_v1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
Read csv-File with new values.
Read excel-File with old values.
Compare corresponding values according to Product_Name and Date.
Output results in txt-File.
"""
import pandas as pd
from datetime import datetime
""" set parameters """
# Enter NEW_FILENAME. An abbreviation of it will also be used to
# create the output-file name with the results.
NEW_FILENAME = "F_MARS_708_FACT_VW401_Public_Eggs_Production__Price_Month.csv"
# VERSION will be used to create the output-file name with the results.
VERSION = "v1"
# round to number of decimals.
# There will be as many iterations, as values are given.
ACCURACY = [2, 4]
""" read new file and create a dataframe for each Product_Name """
new_file = pd.read_csv(
f"new/{NEW_FILENAME}", header=0, sep=";"
)
list_of_new_dataframes = []
for product in new_file["Product_Name"].unique():
new_dataframe = new_file.loc[new_file["Product_Name"] == product]
new_dataframe.set_index("YearMonthCode", inplace=True)
new_dataframe = new_dataframe.sort_index()
new_dataframe.Name = product
list_of_new_dataframes.append(new_dataframe)
""" prepare old file
read in the old file. Names of columns had to be changed manually so that
the correspond to the Product_Name in the new files """
old_file = pd.read_excel(
"old/Eggs_Production_Price_Month_Year_OLD.xlsx",
sheet_name="Produzentenpreise Monat Eier",
names=[
"Year",
"Bio, <50g",
"Bio, 50-53g",
"Bio, >53g",
"Bio, 2. Klasse",
"Bio, gewichteter Mittelwert",
"Bodenhaltung, <50g",
"Bodenhaltung, 50-53g",
"Bodenhaltung, >53g",
"Bodenhaltung, 2. Klasse",
"Bodenhaltung, gewichteter Mittelwert",
"Freiland-/Auslaufhaltung, <50g",
"Freiland-/Auslaufhaltung, 50-53g",
"Freiland-/Auslaufhaltung, >53g",
"Freiland-/Auslaufhaltung, 2. Klasse",
"Freiland-/Auslaufhaltung, gewichteter Mittelwert",
"alle Produktionsformen, gewichteter Mittelwert", # "Alle Produktionsformen, CH"
],
header=None,
skiprows=1,
)
old_file["Year"] = pd.to_datetime(old_file["Year"]).dt.strftime("%Y%m")
old_file.set_index("Year", inplace=True)
""" parameters for output-file name """
output_name = NEW_FILENAME[22:-4]
now = datetime.now().replace(microsecond=0).strftime('%Y%m%d%H%M%S')
""" compare values in new and old files with writing result in an output-file """
iter_over = list_of_new_dataframes[0].index.to_list()
with open(f"output/{output_name}_{VERSION}_{now}.txt", "a") as f:
for round_to in ACCURACY:
f.write(
f'{"#"*100}\n\nValues accuracy: Values rounded to {round_to}\n\n{"#"*100}\n\n')
for _, df in enumerate(list_of_new_dataframes):
f.write(f'{"="*20}\n{df.Name}\n\n')
total = 0 # counter to keep track of total entries
correct = 0 # counter to keep track of correct entries
for date in df.index:
date_str = str(date)
try:
total += 1
old = old_file[df.Name].loc[date_str]
old = round(old, round_to)
new = df["KeyIndicator"].loc[date]
new = round(new, round_to)
differenz = (old-new)*100
if old != new:
f.write(
f"{date} : test passed: {old == new}. "
f"Old value: {old}, new value: {new}. "
f"Differenz <old - new> in "
f"Rappen = {differenz}\n"
)
else:
correct += 1
except KeyError as e:
f.write(f"{e} : No Value found.\n")
f.write(f"\nnumber correct entries: {correct} / {total} \n\n")