-
Notifications
You must be signed in to change notification settings - Fork 0
/
wucc_chemistry.py
148 lines (112 loc) · 4.51 KB
/
wucc_chemistry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import requests
from bs4 import BeautifulSoup
import pandas as pd
############### GET LIST OF TEAMS ###############
def get_teams_from_rows(rows):
teams = []
for row in rows:
x = row.find_all('td')
if len(x) > 0:
teams.append(x[0].text)
return pd.Series(teams)
def list_of_teams():
page_url = 'https://results.wfdf.sport/wucc/?view=teams&season=WUCC2022&list=allteams'
response = requests.get(page_url)
soup = BeautifulSoup(response.text, 'html.parser')
tables = soup.find_all('table')
open_rows = tables[7].find_all('tr')
open_teams = get_teams_from_rows(open_rows)
womens_rows = tables[8].find_all('tr')
womens_teams = get_teams_from_rows(womens_rows)
mixed_rows = tables[9].find_all('tr')
mixed_teams = get_teams_from_rows(mixed_rows)
df = pd.concat([open_teams, womens_teams, mixed_teams], axis=1)
df.columns = ['open teams', 'women\'s teams', 'mixed teams']
df.to_csv('team_list.csv')
############### GET TEAM TOTAL GOALS ###############
def get_team_total(team_num):
page_url = f'https://results.wfdf.sport/wucc/?view=teamcard&team={team_num}'
response = requests.get(page_url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find_all('table')[7]
name = ' '.join(soup.find('h1').text.split(' ')[:-1])
rows = table.find_all('tr')
total_goals = 0
for row in rows:
items = row.find_all('td')
if len(items) > 3:
total_goals += int(items[3].text)
return name, total_goals
def get_all_team_totals():
master_dict = dict()
for i in range(1, 130):
print(f'getting goals {i} of 129')
name, goals = get_team_total(i)
master_dict[name] = goals
df = pd.DataFrame(list(zip(master_dict.keys(), master_dict.values())))
df.columns = ['team', 'total goals']
df.to_csv('team_goals.csv')
############### GET TEAM GIVEN PEOPLE ###############
def get_team(team1, team2, team1_table, team2_table, person1, person2):
person1_names = person1.split()
person2_names = person2.split()
if len(person1_names) > 1:
team1_check = all([person1_name in team1_table for person1_name in person1_names[1:]])
if team1_check:
return team1
team2_check = all([person1_name in team2_table for person1_name in person1_names[1:]])
if team2_check:
return team2
if len(person2_names) > 1:
team1_check = all([person2_name in team1_table for person2_name in person2_names[1:]])
if team1_check:
return team1
team2_check = all([person2_name in team2_table for person2_name in person2_names[1:]])
if team2_check:
return team2
return "Unknown"
############### GET ENDZONE PAIRINGS ###############
def get_pairings(page_url):
response = requests.get(page_url)
soup = BeautifulSoup(response.text, 'html.parser')
match = soup.find('h1').text.split(' ')[0].split(' - ')
(team1, team2) = (match[0], match[1])
tables = soup.find_all('table')
rows = tables[-2].find_all('tr')
team1_table = str(tables[7].find_all('table')[1])
team2_table = str(tables[7].find_all('table')[3])
pairings = []
for row in rows:
entries = row.find_all('td')
if len(entries) >= 3:
person1 = entries[1].text[:-1]
person2 = entries[2].text[:-1]
team = get_team(team1, team2, team1_table, team2_table, person1, person2)
if person1 <= person2:
pairings.append((person1, person2, team))
else:
pairings.append((person2, person1, team))
return pairings
############### GET CSV OF POWER DUOS ###############
def generate_duos_csv():
master_dict = dict()
failed_games = []
for i in range(1,653):
print(f'working on {i} out of 652...')
page_url = f'https://results.wfdf.sport/wucc/?view=gameplay&game={i}'
try:
pairings = get_pairings(page_url)
except:
pairings = []
if len(pairings) == 0:
failed_games.append(i)
for pairing in pairings:
master_dict[pairing] = master_dict.get(pairing, 0) + 1
df = pd.Series(master_dict).reset_index()
df.columns = ['person 1', 'person 2', 'team', '# connections']
df = df.sort_values(by='# connections', ascending=False)
df.to_csv('wucc_duos.csv')
print(failed_games)
############### MAIN ###############
if __name__ == '__main__':
generate_duos_csv()