# import libraries
import numpy as np
import matplotlib as mpl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Import csv
df = pd.read_csv("WCD.csv")
df.head(50)
Year | Datetime | Stage | Stadium | City | Home Team Name | Home Team Goals | Away Team Goals | Away Team Name | Win conditions | Attendance | Half-time Home Goals | Half-time Away Goals | Referee | Assistant 1 | Assistant 2 | RoundID | MatchID | Home Team Initials | Away Team Initials | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1930 | 13 Jul 1930 - 15:00 | Group 1 | Pocitos | Montevideo | France | 4 | 1 | Mexico | 4444.0 | 3 | 0 | LOMBARDI Domingo (URU) | CRISTOPHE Henry (BEL) | REGO Gilberto (BRA) | 201 | 1096 | FRA | MEX | |
1 | 1930 | 13 Jul 1930 - 15:00 | Group 4 | Parque Central | Montevideo | USA | 3 | 0 | Belgium | 18346.0 | 2 | 0 | MACIAS Jose (ARG) | MATEUCCI Francisco (URU) | WARNKEN Alberto (CHI) | 201 | 1090 | USA | BEL | |
2 | 1930 | 14 Jul 1930 - 12:45 | Group 2 | Parque Central | Montevideo | Yugoslavia | 2 | 1 | Brazil | 24059.0 | 2 | 0 | TEJADA Anibal (URU) | VALLARINO Ricardo (URU) | BALWAY Thomas (FRA) | 201 | 1093 | YUG | BRA | |
3 | 1930 | 14 Jul 1930 - 14:50 | Group 3 | Pocitos | Montevideo | Romania | 3 | 1 | Peru | 2549.0 | 1 | 0 | WARNKEN Alberto (CHI) | LANGENUS Jean (BEL) | MATEUCCI Francisco (URU) | 201 | 1098 | ROU | PER | |
4 | 1930 | 15 Jul 1930 - 16:00 | Group 1 | Parque Central | Montevideo | Argentina | 1 | 0 | France | 23409.0 | 0 | 0 | REGO Gilberto (BRA) | SAUCEDO Ulises (BOL) | RADULESCU Constantin (ROU) | 201 | 1085 | ARG | FRA | |
5 | 1930 | 16 Jul 1930 - 14:45 | Group 1 | Parque Central | Montevideo | Chile | 3 | 0 | Mexico | 9249.0 | 1 | 0 | CRISTOPHE Henry (BEL) | APHESTEGUY Martin (URU) | LANGENUS Jean (BEL) | 201 | 1095 | CHI | MEX | |
6 | 1930 | 17 Jul 1930 - 12:45 | Group 2 | Parque Central | Montevideo | Yugoslavia | 4 | 0 | Bolivia | 18306.0 | 0 | 0 | MATEUCCI Francisco (URU) | LOMBARDI Domingo (URU) | WARNKEN Alberto (CHI) | 201 | 1092 | YUG | BOL | |
7 | 1930 | 17 Jul 1930 - 14:45 | Group 4 | Parque Central | Montevideo | USA | 3 | 0 | Paraguay | 18306.0 | 2 | 0 | MACIAS Jose (ARG) | APHESTEGUY Martin (URU) | TEJADA Anibal (URU) | 201 | 1097 | USA | PAR | |
8 | 1930 | 18 Jul 1930 - 14:30 | Group 3 | Estadio Centenario | Montevideo | Uruguay | 1 | 0 | Peru | 57735.0 | 0 | 0 | LANGENUS Jean (BEL) | BALWAY Thomas (FRA) | CRISTOPHE Henry (BEL) | 201 | 1099 | URU | PER | |
9 | 1930 | 19 Jul 1930 - 12:50 | Group 1 | Estadio Centenario | Montevideo | Chile | 1 | 0 | France | 2000.0 | 0 | 0 | TEJADA Anibal (URU) | LOMBARDI Domingo (URU) | REGO Gilberto (BRA) | 201 | 1094 | CHI | FRA | |
10 | 1930 | 19 Jul 1930 - 15:00 | Group 1 | Estadio Centenario | Montevideo | Argentina | 6 | 3 | Mexico | 42100.0 | 3 | 1 | SAUCEDO Ulises (BOL) | ALONSO Gualberto (URU) | RADULESCU Constantin (ROU) | 201 | 1086 | ARG | MEX | |
11 | 1930 | 20 Jul 1930 - 13:00 | Group 2 | Estadio Centenario | Montevideo | Brazil | 4 | 0 | Bolivia | 25466.0 | 1 | 0 | BALWAY Thomas (FRA) | MATEUCCI Francisco (URU) | VALLEJO Gaspar (MEX) | 201 | 1091 | BRA | BOL | |
12 | 1930 | 20 Jul 1930 - 15:00 | Group 4 | Estadio Centenario | Montevideo | Paraguay | 1 | 0 | Belgium | 12000.0 | 1 | 0 | VALLARINO Ricardo (URU) | MACIAS Jose (ARG) | LOMBARDI Domingo (URU) | 201 | 1089 | PAR | BEL | |
13 | 1930 | 21 Jul 1930 - 14:50 | Group 3 | Estadio Centenario | Montevideo | Uruguay | 4 | 0 | Romania | 70022.0 | 4 | 0 | REGO Gilberto (BRA) | WARNKEN Alberto (CHI) | SAUCEDO Ulises (BOL) | 201 | 1100 | URU | ROU | |
14 | 1930 | 22 Jul 1930 - 14:45 | Group 1 | Estadio Centenario | Montevideo | Argentina | 3 | 1 | Chile | 41459.0 | 2 | 1 | LANGENUS Jean (BEL) | CRISTOPHE Henry (BEL) | SAUCEDO Ulises (BOL) | 201 | 1084 | ARG | CHI | |
15 | 1930 | 26 Jul 1930 - 14:45 | Semi-finals | Estadio Centenario | Montevideo | Argentina | 6 | 1 | USA | 72886.0 | 1 | 0 | LANGENUS Jean (BEL) | VALLEJO Gaspar (MEX) | WARNKEN Alberto (CHI) | 202 | 1088 | ARG | USA | |
16 | 1930 | 27 Jul 1930 - 14:45 | Semi-finals | Estadio Centenario | Montevideo | Uruguay | 6 | 1 | Yugoslavia | 79867.0 | 3 | 1 | REGO Gilberto (BRA) | SAUCEDO Ulises (BOL) | BALWAY Thomas (FRA) | 202 | 1101 | URU | YUG | |
17 | 1930 | 30 Jul 1930 - 14:15 | Final | Estadio Centenario | Montevideo | Uruguay | 4 | 2 | Argentina | 68346.0 | 1 | 2 | LANGENUS Jean (BEL) | SAUCEDO Ulises (BOL) | CRISTOPHE Henry (BEL) | 405 | 1087 | URU | ARG | |
18 | 1934 | 27 May 1934 - 16:30 | Preliminary round | Stadio Benito Mussolini | Turin | Austria | 3 | 2 | France | Austria win after extra time | 16000.0 | 0 | 0 | VAN MOORSEL Johannes (NED) | CAIRONI Camillo (ITA) | BAERT Louis (BEL) | 204 | 1104 | AUT | FRA |
19 | 1934 | 27 May 1934 - 16:30 | Preliminary round | Giorgio Ascarelli | Naples | Hungary | 4 | 2 | Egypt | 9000.0 | 2 | 2 | BARLASSINA Rinaldo (ITA) | DATTILO Generoso (ITA) | SASSI Otello (ITA) | 204 | 1119 | HUN | EGY | |
20 | 1934 | 27 May 1934 - 16:30 | Preliminary round | San Siro | Milan | Switzerland | 3 | 2 | Netherlands | 33000.0 | 2 | 1 | EKLIND Ivan (SWE) | BERANEK Alois (AUT) | BONIVENTO Ferruccio (ITA) | 204 | 1133 | SUI | NED | |
21 | 1934 | 27 May 1934 - 16:30 | Preliminary round | Littorale | Bologna | Sweden | 3 | 2 | Argentina | 14000.0 | 1 | 1 | BRAUN Eugen (AUT) | CARRARO Albino (ITA) | TURBIANI Giuseppe (ITA) | 204 | 1102 | SWE | ARG | |
22 | 1934 | 27 May 1934 - 16:30 | Preliminary round | Giovanni Berta | Florence | Germany | 5 | 2 | Belgium | 8000.0 | 1 | 2 | MATTEA Francesco (ITA) | MELANDRI Ermenegildo (ITA) | BAERT Jacques (FRA) | 204 | 1108 | GER | BEL | |
23 | 1934 | 27 May 1934 - 16:30 | Preliminary round | Luigi Ferraris | Genoa | Spain | 3 | 1 | Brazil | 21000.0 | 3 | 0 | BIRLEM Alfred (GER) | CARMINATI Ettore (ITA) | IVANCSICS Mihaly (HUN) | 204 | 1111 | ESP | BRA | |
24 | 1934 | 27 May 1934 - 16:30 | Preliminary round | Nazionale PNF | Rome | Italy | 7 | 1 | USA | 25000.0 | 3 | 0 | MERCET Rene (SUI) | ESCARTIN Pedro (ESP) | ZENISEK Bohumil (TCH) | 204 | 1135 | ITA | USA | |
25 | 1934 | 27 May 1934 - 16:30 | Preliminary round | Littorio | Trieste | Czechoslovakia | 2 | 1 | Romania | 9000.0 | 0 | 1 | LANGENUS Jean (BEL) | SCARPI Giuseppe (ITA) | SCORZONI Raffaele (ITA) | 204 | 1141 | TCH | ROU | |
26 | 1934 | 31 May 1934 - 16:30 | Quarter-finals | Stadio Benito Mussolini | Turin | Czechoslovakia | 3 | 2 | Switzerland | 12000.0 | 1 | 1 | BERANEK Alois (AUT) | MOHAMED Youssuf (EGY) | BAERT Jacques (FRA) | 418 | 1143 | TCH | SUI | |
27 | 1934 | 31 May 1934 - 16:30 | Quarter-finals | San Siro | Milan | Germany | 2 | 1 | Sweden | 3000.0 | 0 | 0 | BARLASSINA Rinaldo (ITA) | MERCET Rene (SUI) | VAN MOORSEL Johannes (NED) | 418 | 1129 | GER | SWE | |
28 | 1934 | 31 May 1934 - 16:30 | Quarter-finals | Giovanni Berta | Florence | Italy | 1 | 1 | Spain | 35000.0 | 0 | 0 | BAERT Louis (BEL) | ZENISEK Bohumil (TCH) | IVANCSICS Mihaly (HUN) | 418 | 1122 | ITA | ESP | |
29 | 1934 | 31 May 1934 - 16:30 | Quarter-finals | Littorale | Bologna | Austria | 2 | 1 | Hungary | 23000.0 | 1 | 0 | MATTEA Francesco (ITA) | ESCARTIN Pedro (ESP) | BIRLEM Alfred (GER) | 418 | 1106 | AUT | HUN | |
30 | 1934 | 01 Jun 1934 - 16:30 | Quarter-finals | Giovanni Berta | Florence | Italy | 1 | 0 | Spain | 43000.0 | 1 | 0 | MERCET Rene (SUI) | IVANCSICS Mihaly (HUN) | ZENISEK Bohumil (TCH) | 418 | 1123 | ITA | ESP | |
31 | 1934 | 03 Jun 1934 - 16:30 | Semi-finals | San Siro | Milan | Italy | 1 | 0 | Austria | 35000.0 | 1 | 0 | EKLIND Ivan (SWE) | BAERT Louis (BEL) | ZENISEK Bohumil (TCH) | 3492 | 1107 | ITA | AUT | |
32 | 1934 | 03 Jun 1934 - 16:30 | Semi-finals | Nazionale PNF | Rome | Czechoslovakia | 3 | 1 | Germany | 15000.0 | 1 | 0 | BARLASSINA Rinaldo (ITA) | BERANEK Alois (AUT) | ESCARTIN Pedro (ESP) | 3492 | 1130 | TCH | GER | |
33 | 1934 | 07 Jun 1934 - 18:00 | Match for third place | Giorgio Ascarelli | Naples | Germany | 3 | 2 | Austria | 7000.0 | 3 | 1 | CARRARO Albino (ITA) | CAIRONI Camillo (ITA) | ESCARTIN Pedro (ESP) | 3491 | 1105 | GER | AUT | |
34 | 1934 | 10 Jun 1934 - 17:30 | Final | Nazionale PNF | Rome | Italy | 2 | 1 | Czechoslovakia | Italy win after extra time | 55000.0 | 0 | 0 | EKLIND Ivan (SWE) | BAERT Louis (BEL) | IVANCSICS Mihaly (HUN) | 3490 | 1134 | ITA | TCH |
35 | 1938 | 04 Jun 1938 - 17:00 | First round | Parc des Princes | Paris | Switzerland | 1 | 1 | Germany | 27152.0 | 0 | 0 | LANGENUS Jean (BEL) | MARENCO Paul (FRA) | VAN MOORSEL Johannes (NED) | 206 | 1165 | SUI | GER | |
36 | 1938 | 05 Jun 1938 - 17:00 | First round | Velodrome Municipale | Reims | Hungary | 6 | 0 | Dutch East Indies | 9000.0 | 4 | 0 | CONRIE Roger (FRA) | DE LA SALLE Charles (FRA) | WEINGARTNER Karl (AUT) | 206 | 1173 | HUN | INH | |
37 | 1938 | 05 Jun 1938 - 17:00 | First round | Stade Olympique | Colombes | France | 3 | 1 | Belgium | 30454.0 | 2 | 1 | WUETHRICH Hans (SUI) | KRIST Gustav (TCH) | BIRLEM Alfred (GER) | 206 | 1146 | FRA | BEL | |
38 | 1938 | 05 Jun 1938 - 17:00 | First round | Stade Municipal | Toulouse | Cuba | 3 | 3 | Romania | 7000.0 | 0 | 0 | SCARPI Giuseppe (ITA) | VALPREDE Ferdinand (FRA) | MERKCX Jean (FRA) | 206 | 1156 | CUB | ROU | |
39 | 1938 | 05 Jun 1938 - 17:00 | First round | Stade V�lodrome | Marseilles | Italy | 2 | 1 | Norway | Italy win after extra time | 19000.0 | 0 | 0 | BERANEK Alois (AUT) | BOUTOURE D. (FRA) | TREHOU D. (FRA) | 206 | 1179 | ITA | NOR |
40 | 1938 | 05 Jun 1938 - 17:30 | First round | Stade de la Meinau | Strasbourg | Brazil | 6 | 5 | Poland | Brazil win after extra time | 13452.0 | 0 | 0 | EKLIND Ivan (SWE) | POISSANT Louis (FRA) | KISSENBERGER Ernest (FRA) | 206 | 1150 | BRA | POL |
41 | 1938 | 05 Jun 1938 - 18:30 | First round | Cavee Verte | Le Havre | Czechoslovakia | 3 | 0 | Netherlands | Czechoslovakia win after extra time | 11000.0 | 0 | 0 | LECLERCQ Lucien (FRA) | OLIVE D. (FRA) | SDEZ Victor (FRA) | 206 | 1172 | TCH | NED |
42 | 1938 | 09 Jun 1938 - 18:00 | First round | Stade Municipal | Toulouse | Cuba | 2 | 1 | Romania | 8000.0 | 0 | 1 | BIRLEM Alfred (GER) | CAPDEVILLE Pierre (FRA) | MARENCO Paul (FRA) | 206 | 1157 | CUB | ROU | |
43 | 1938 | 09 Jun 1938 - 18:00 | First round | Parc des Princes | Paris | Switzerland | 4 | 2 | Germany | 20025.0 | 1 | 2 | EKLIND Ivan (SWE) | BAERT Louis (BEL) | VAN MOORSEL Johannes (NED) | 206 | 1166 | SUI | GER | |
44 | 1938 | 12 Jun 1938 - 17:00 | Quarter-finals | Stade du Parc Lescure | Bordeaux | Brazil | 1 | 1 | Czechoslovakia | 22021.0 | 0 | 0 | VON HERTZKA Pal (HUN) | SCARPI Giuseppe (ITA) | DE LA SALLE Charles (FRA) | 429 | 1152 | BRA | TCH | |
45 | 1938 | 12 Jun 1938 - 17:00 | Quarter-finals | Victor Boucquey | Lille | Hungary | 2 | 0 | Switzerland | 15000.0 | 1 | 0 | BARLASSINA Rinaldo (ITA) | BERANEK Alois (AUT) | BOUTOURE D. (FRA) | 429 | 1175 | HUN | SUI | |
46 | 1938 | 12 Jun 1938 - 17:00 | Quarter-finals | Fort Carree | Antibes | Sweden | 8 | 0 | Cuba | 7000.0 | 4 | 0 | KRIST Gustav (TCH) | WEINGARTNER Karl (AUT) | SDEZ Victor (FRA) | 429 | 1158 | SWE | CUB | |
47 | 1938 | 12 Jun 1938 - 17:00 | Quarter-finals | Stade Olympique | Colombes | Italy | 3 | 1 | France | 58455.0 | 1 | 1 | BAERT Louis (BEL) | WUETHRICH Hans (SUI) | EKLIND Ivan (SWE) | 429 | 1164 | ITA | FRA | |
48 | 1938 | 14 Jun 1938 - 18:00 | Quarter-finals | Stade du Parc Lescure | Bordeaux | Brazil | 2 | 1 | Czechoslovakia | 18141.0 | 0 | 1 | CAPDEVILLE Georges (FRA) | MARENCO Paul (FRA) | KISSENBERGER Ernest (FRA) | 429 | 1153 | BRA | TCH | |
49 | 1938 | 16 Jun 1938 - 18:00 | Semi-finals | Parc des Princes | Paris | Hungary | 5 | 1 | Sweden | 20000.0 | 3 | 1 | LECLERCQ Lucien (FRA) | VAN MOORSEL Johannes (NED) | SCARPI Giuseppe (ITA) | 3489 | 1176 | HUN | SWE |
# Add total goals column
df["Total Goals"] = df["Home Team Goals"] + df["Away Team Goals"]
# Plot average number of goals scored per game by year
sns.set_context("poster", font_scale=1.2)
sns.set_style("whitegrid")
f, ax = plt.subplots(figsize=(23,12))
sns.boxplot(data=df,x=df["Year"], y="Total Goals", palette="Spectral")
ax.set(ylabel = "Total Goals Per Game", title = "Distribution Of Goals Scored In World Cup Matches By Year")
[Text(0,0.5,'Total Goals Per Game'), Text(0.5,1,'Distribution Of Goals Scored In World Cup Matches By Year')]
# Add total goals column
df["Total Goals"] = df["Home Team Goals"] + df["Away Team Goals"]
# Plot average number of goals scored per game by year
sns.set_context("poster", font_scale=1.2)
sns.set_style("whitegrid")
f, ax = plt.subplots(figsize=(23,12))
sns.violinplot(data=df,x=df["Year"], y="Total Goals", palette="Spectral")
ax.set(ylabel = "Total Goals Per Game", title = "Distribution Of Goals Scored In World Cup Matches By Year")
[Text(0,0.5,'Total Goals Per Game'), Text(0.5,1,'Distribution Of Goals Scored In World Cup Matches By Year')]
# Half-time Home Goals
df["Second Half Home Goals"] = df["Home Team Goals"] - df["Half-time Home Goals"]
df["Second Half Away Goals"] = df["Away Team Goals"] - df["Half-time Away Goals"]
df.head()
Year | Datetime | Stage | Stadium | City | Home Team Name | Home Team Goals | Away Team Goals | Away Team Name | Win conditions | ... | Referee | Assistant 1 | Assistant 2 | RoundID | MatchID | Home Team Initials | Away Team Initials | Total Goals | Second Half Home Goals | Second Half Away Goals | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1930 | 13 Jul 1930 - 15:00 | Group 1 | Pocitos | Montevideo | France | 4 | 1 | Mexico | ... | LOMBARDI Domingo (URU) | CRISTOPHE Henry (BEL) | REGO Gilberto (BRA) | 201 | 1096 | FRA | MEX | 5 | 1 | 1 | |
1 | 1930 | 13 Jul 1930 - 15:00 | Group 4 | Parque Central | Montevideo | USA | 3 | 0 | Belgium | ... | MACIAS Jose (ARG) | MATEUCCI Francisco (URU) | WARNKEN Alberto (CHI) | 201 | 1090 | USA | BEL | 3 | 1 | 0 | |
2 | 1930 | 14 Jul 1930 - 12:45 | Group 2 | Parque Central | Montevideo | Yugoslavia | 2 | 1 | Brazil | ... | TEJADA Anibal (URU) | VALLARINO Ricardo (URU) | BALWAY Thomas (FRA) | 201 | 1093 | YUG | BRA | 3 | 0 | 1 | |
3 | 1930 | 14 Jul 1930 - 14:50 | Group 3 | Pocitos | Montevideo | Romania | 3 | 1 | Peru | ... | WARNKEN Alberto (CHI) | LANGENUS Jean (BEL) | MATEUCCI Francisco (URU) | 201 | 1098 | ROU | PER | 4 | 2 | 1 | |
4 | 1930 | 15 Jul 1930 - 16:00 | Group 1 | Parque Central | Montevideo | Argentina | 1 | 0 | France | ... | REGO Gilberto (BRA) | SAUCEDO Ulises (BOL) | RADULESCU Constantin (ROU) | 201 | 1085 | ARG | FRA | 1 | 1 | 0 |
5 rows × 23 columns
df["Second Half Goals"] = df["Second Half Home Goals"] + df["Second Half Away Goals"]
df["First Half Goals"] = df["Half-time Home Goals"] + df["Half-time Away Goals"]
df["Total Goals"] = df["First Half Goals"]+ df["First Half Goals"]
df_slice = df[["Year", "First Half Goals", "Second Half Goals", "Total Goals" ]]
df_slice.head()
Year | First Half Goals | Second Half Goals | Total Goals | |
---|---|---|---|---|
0 | 1930 | 3 | 2 | 6 |
1 | 1930 | 2 | 1 | 4 |
2 | 1930 | 2 | 1 | 4 |
3 | 1930 | 1 | 3 | 2 |
4 | 1930 | 0 | 1 | 0 |
# create a DataFrame with first half/ second half split
firstHalf_goals = []
secondHalf_goals = []
allGoals = []
homeVaway = []
year = []
for row_index,row in df_slice.iterrows():
allGoals.append(row["First Half Goals"])
homeVaway.append("First")
allGoals.append(row["Second Half Goals"])
homeVaway.append("Second")
year.append(row["Year"])
year.append(row["Year"])
# print(len(homeVaway))
# print(len(allGoals))
# print(len(year))
d = {
"Goals": allGoals,
"Year": year,
"Half": homeVaway
}
goals_df = pd.DataFrame(d)
goals_df.head()
Goals | Half | Year | |
---|---|---|---|
0 | 3 | First | 1930 |
1 | 2 | Second | 1930 |
2 | 2 | First | 1930 |
3 | 1 | Second | 1930 |
4 | 2 | First | 1930 |
# plot average goals by year, split by home vs. away team
sns.set_context("poster", font_scale=1.5)
sns.set_style("whitegrid")
f, ax = plt.subplots(figsize=(30,15))
sns.violinplot(data=goals_df,x="Year", y="Goals", palette="Set1", hue="Half", split = True)
ax.set(ylabel = "Total Goals Per Game", title = "Distribution Of Goals Scored In World Cup Matches By Year-Split By First And Second Half Goals ")
[Text(0,0.5,'Total Goals Per Game'), Text(0.5,1,'Distribution Of Goals Scored In World Cup Matches By Year-Split By First And Second Half Goals ')]