In [2]:
# import libraries
import numpy as np
import matplotlib as mpl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [3]:
# Import csv
df = pd.read_csv("WCD.csv")
df.head(50)
Out[3]:
Year Datetime Stage Stadium City Home Team Name Home Team Goals Away Team Goals Away Team Name Win conditions Attendance Half-time Home Goals Half-time Away Goals Referee Assistant 1 Assistant 2 RoundID MatchID Home Team Initials Away Team Initials
0 1930 13 Jul 1930 - 15:00 Group 1 Pocitos Montevideo France 4 1 Mexico 4444.0 3 0 LOMBARDI Domingo (URU) CRISTOPHE Henry (BEL) REGO Gilberto (BRA) 201 1096 FRA MEX
1 1930 13 Jul 1930 - 15:00 Group 4 Parque Central Montevideo USA 3 0 Belgium 18346.0 2 0 MACIAS Jose (ARG) MATEUCCI Francisco (URU) WARNKEN Alberto (CHI) 201 1090 USA BEL
2 1930 14 Jul 1930 - 12:45 Group 2 Parque Central Montevideo Yugoslavia 2 1 Brazil 24059.0 2 0 TEJADA Anibal (URU) VALLARINO Ricardo (URU) BALWAY Thomas (FRA) 201 1093 YUG BRA
3 1930 14 Jul 1930 - 14:50 Group 3 Pocitos Montevideo Romania 3 1 Peru 2549.0 1 0 WARNKEN Alberto (CHI) LANGENUS Jean (BEL) MATEUCCI Francisco (URU) 201 1098 ROU PER
4 1930 15 Jul 1930 - 16:00 Group 1 Parque Central Montevideo Argentina 1 0 France 23409.0 0 0 REGO Gilberto (BRA) SAUCEDO Ulises (BOL) RADULESCU Constantin (ROU) 201 1085 ARG FRA
5 1930 16 Jul 1930 - 14:45 Group 1 Parque Central Montevideo Chile 3 0 Mexico 9249.0 1 0 CRISTOPHE Henry (BEL) APHESTEGUY Martin (URU) LANGENUS Jean (BEL) 201 1095 CHI MEX
6 1930 17 Jul 1930 - 12:45 Group 2 Parque Central Montevideo Yugoslavia 4 0 Bolivia 18306.0 0 0 MATEUCCI Francisco (URU) LOMBARDI Domingo (URU) WARNKEN Alberto (CHI) 201 1092 YUG BOL
7 1930 17 Jul 1930 - 14:45 Group 4 Parque Central Montevideo USA 3 0 Paraguay 18306.0 2 0 MACIAS Jose (ARG) APHESTEGUY Martin (URU) TEJADA Anibal (URU) 201 1097 USA PAR
8 1930 18 Jul 1930 - 14:30 Group 3 Estadio Centenario Montevideo Uruguay 1 0 Peru 57735.0 0 0 LANGENUS Jean (BEL) BALWAY Thomas (FRA) CRISTOPHE Henry (BEL) 201 1099 URU PER
9 1930 19 Jul 1930 - 12:50 Group 1 Estadio Centenario Montevideo Chile 1 0 France 2000.0 0 0 TEJADA Anibal (URU) LOMBARDI Domingo (URU) REGO Gilberto (BRA) 201 1094 CHI FRA
10 1930 19 Jul 1930 - 15:00 Group 1 Estadio Centenario Montevideo Argentina 6 3 Mexico 42100.0 3 1 SAUCEDO Ulises (BOL) ALONSO Gualberto (URU) RADULESCU Constantin (ROU) 201 1086 ARG MEX
11 1930 20 Jul 1930 - 13:00 Group 2 Estadio Centenario Montevideo Brazil 4 0 Bolivia 25466.0 1 0 BALWAY Thomas (FRA) MATEUCCI Francisco (URU) VALLEJO Gaspar (MEX) 201 1091 BRA BOL
12 1930 20 Jul 1930 - 15:00 Group 4 Estadio Centenario Montevideo Paraguay 1 0 Belgium 12000.0 1 0 VALLARINO Ricardo (URU) MACIAS Jose (ARG) LOMBARDI Domingo (URU) 201 1089 PAR BEL
13 1930 21 Jul 1930 - 14:50 Group 3 Estadio Centenario Montevideo Uruguay 4 0 Romania 70022.0 4 0 REGO Gilberto (BRA) WARNKEN Alberto (CHI) SAUCEDO Ulises (BOL) 201 1100 URU ROU
14 1930 22 Jul 1930 - 14:45 Group 1 Estadio Centenario Montevideo Argentina 3 1 Chile 41459.0 2 1 LANGENUS Jean (BEL) CRISTOPHE Henry (BEL) SAUCEDO Ulises (BOL) 201 1084 ARG CHI
15 1930 26 Jul 1930 - 14:45 Semi-finals Estadio Centenario Montevideo Argentina 6 1 USA 72886.0 1 0 LANGENUS Jean (BEL) VALLEJO Gaspar (MEX) WARNKEN Alberto (CHI) 202 1088 ARG USA
16 1930 27 Jul 1930 - 14:45 Semi-finals Estadio Centenario Montevideo Uruguay 6 1 Yugoslavia 79867.0 3 1 REGO Gilberto (BRA) SAUCEDO Ulises (BOL) BALWAY Thomas (FRA) 202 1101 URU YUG
17 1930 30 Jul 1930 - 14:15 Final Estadio Centenario Montevideo Uruguay 4 2 Argentina 68346.0 1 2 LANGENUS Jean (BEL) SAUCEDO Ulises (BOL) CRISTOPHE Henry (BEL) 405 1087 URU ARG
18 1934 27 May 1934 - 16:30 Preliminary round Stadio Benito Mussolini Turin Austria 3 2 France Austria win after extra time 16000.0 0 0 VAN MOORSEL Johannes (NED) CAIRONI Camillo (ITA) BAERT Louis (BEL) 204 1104 AUT FRA
19 1934 27 May 1934 - 16:30 Preliminary round Giorgio Ascarelli Naples Hungary 4 2 Egypt 9000.0 2 2 BARLASSINA Rinaldo (ITA) DATTILO Generoso (ITA) SASSI Otello (ITA) 204 1119 HUN EGY
20 1934 27 May 1934 - 16:30 Preliminary round San Siro Milan Switzerland 3 2 Netherlands 33000.0 2 1 EKLIND Ivan (SWE) BERANEK Alois (AUT) BONIVENTO Ferruccio (ITA) 204 1133 SUI NED
21 1934 27 May 1934 - 16:30 Preliminary round Littorale Bologna Sweden 3 2 Argentina 14000.0 1 1 BRAUN Eugen (AUT) CARRARO Albino (ITA) TURBIANI Giuseppe (ITA) 204 1102 SWE ARG
22 1934 27 May 1934 - 16:30 Preliminary round Giovanni Berta Florence Germany 5 2 Belgium 8000.0 1 2 MATTEA Francesco (ITA) MELANDRI Ermenegildo (ITA) BAERT Jacques (FRA) 204 1108 GER BEL
23 1934 27 May 1934 - 16:30 Preliminary round Luigi Ferraris Genoa Spain 3 1 Brazil 21000.0 3 0 BIRLEM Alfred (GER) CARMINATI Ettore (ITA) IVANCSICS Mihaly (HUN) 204 1111 ESP BRA
24 1934 27 May 1934 - 16:30 Preliminary round Nazionale PNF Rome Italy 7 1 USA 25000.0 3 0 MERCET Rene (SUI) ESCARTIN Pedro (ESP) ZENISEK Bohumil (TCH) 204 1135 ITA USA
25 1934 27 May 1934 - 16:30 Preliminary round Littorio Trieste Czechoslovakia 2 1 Romania 9000.0 0 1 LANGENUS Jean (BEL) SCARPI Giuseppe (ITA) SCORZONI Raffaele (ITA) 204 1141 TCH ROU
26 1934 31 May 1934 - 16:30 Quarter-finals Stadio Benito Mussolini Turin Czechoslovakia 3 2 Switzerland 12000.0 1 1 BERANEK Alois (AUT) MOHAMED Youssuf (EGY) BAERT Jacques (FRA) 418 1143 TCH SUI
27 1934 31 May 1934 - 16:30 Quarter-finals San Siro Milan Germany 2 1 Sweden 3000.0 0 0 BARLASSINA Rinaldo (ITA) MERCET Rene (SUI) VAN MOORSEL Johannes (NED) 418 1129 GER SWE
28 1934 31 May 1934 - 16:30 Quarter-finals Giovanni Berta Florence Italy 1 1 Spain 35000.0 0 0 BAERT Louis (BEL) ZENISEK Bohumil (TCH) IVANCSICS Mihaly (HUN) 418 1122 ITA ESP
29 1934 31 May 1934 - 16:30 Quarter-finals Littorale Bologna Austria 2 1 Hungary 23000.0 1 0 MATTEA Francesco (ITA) ESCARTIN Pedro (ESP) BIRLEM Alfred (GER) 418 1106 AUT HUN
30 1934 01 Jun 1934 - 16:30 Quarter-finals Giovanni Berta Florence Italy 1 0 Spain 43000.0 1 0 MERCET Rene (SUI) IVANCSICS Mihaly (HUN) ZENISEK Bohumil (TCH) 418 1123 ITA ESP
31 1934 03 Jun 1934 - 16:30 Semi-finals San Siro Milan Italy 1 0 Austria 35000.0 1 0 EKLIND Ivan (SWE) BAERT Louis (BEL) ZENISEK Bohumil (TCH) 3492 1107 ITA AUT
32 1934 03 Jun 1934 - 16:30 Semi-finals Nazionale PNF Rome Czechoslovakia 3 1 Germany 15000.0 1 0 BARLASSINA Rinaldo (ITA) BERANEK Alois (AUT) ESCARTIN Pedro (ESP) 3492 1130 TCH GER
33 1934 07 Jun 1934 - 18:00 Match for third place Giorgio Ascarelli Naples Germany 3 2 Austria 7000.0 3 1 CARRARO Albino (ITA) CAIRONI Camillo (ITA) ESCARTIN Pedro (ESP) 3491 1105 GER AUT
34 1934 10 Jun 1934 - 17:30 Final Nazionale PNF Rome Italy 2 1 Czechoslovakia Italy win after extra time 55000.0 0 0 EKLIND Ivan (SWE) BAERT Louis (BEL) IVANCSICS Mihaly (HUN) 3490 1134 ITA TCH
35 1938 04 Jun 1938 - 17:00 First round Parc des Princes Paris Switzerland 1 1 Germany 27152.0 0 0 LANGENUS Jean (BEL) MARENCO Paul (FRA) VAN MOORSEL Johannes (NED) 206 1165 SUI GER
36 1938 05 Jun 1938 - 17:00 First round Velodrome Municipale Reims Hungary 6 0 Dutch East Indies 9000.0 4 0 CONRIE Roger (FRA) DE LA SALLE Charles (FRA) WEINGARTNER Karl (AUT) 206 1173 HUN INH
37 1938 05 Jun 1938 - 17:00 First round Stade Olympique Colombes France 3 1 Belgium 30454.0 2 1 WUETHRICH Hans (SUI) KRIST Gustav (TCH) BIRLEM Alfred (GER) 206 1146 FRA BEL
38 1938 05 Jun 1938 - 17:00 First round Stade Municipal Toulouse Cuba 3 3 Romania 7000.0 0 0 SCARPI Giuseppe (ITA) VALPREDE Ferdinand (FRA) MERKCX Jean (FRA) 206 1156 CUB ROU
39 1938 05 Jun 1938 - 17:00 First round Stade V�lodrome Marseilles Italy 2 1 Norway Italy win after extra time 19000.0 0 0 BERANEK Alois (AUT) BOUTOURE D. (FRA) TREHOU D. (FRA) 206 1179 ITA NOR
40 1938 05 Jun 1938 - 17:30 First round Stade de la Meinau Strasbourg Brazil 6 5 Poland Brazil win after extra time 13452.0 0 0 EKLIND Ivan (SWE) POISSANT Louis (FRA) KISSENBERGER Ernest (FRA) 206 1150 BRA POL
41 1938 05 Jun 1938 - 18:30 First round Cavee Verte Le Havre Czechoslovakia 3 0 Netherlands Czechoslovakia win after extra time 11000.0 0 0 LECLERCQ Lucien (FRA) OLIVE D. (FRA) SDEZ Victor (FRA) 206 1172 TCH NED
42 1938 09 Jun 1938 - 18:00 First round Stade Municipal Toulouse Cuba 2 1 Romania 8000.0 0 1 BIRLEM Alfred (GER) CAPDEVILLE Pierre (FRA) MARENCO Paul (FRA) 206 1157 CUB ROU
43 1938 09 Jun 1938 - 18:00 First round Parc des Princes Paris Switzerland 4 2 Germany 20025.0 1 2 EKLIND Ivan (SWE) BAERT Louis (BEL) VAN MOORSEL Johannes (NED) 206 1166 SUI GER
44 1938 12 Jun 1938 - 17:00 Quarter-finals Stade du Parc Lescure Bordeaux Brazil 1 1 Czechoslovakia 22021.0 0 0 VON HERTZKA Pal (HUN) SCARPI Giuseppe (ITA) DE LA SALLE Charles (FRA) 429 1152 BRA TCH
45 1938 12 Jun 1938 - 17:00 Quarter-finals Victor Boucquey Lille Hungary 2 0 Switzerland 15000.0 1 0 BARLASSINA Rinaldo (ITA) BERANEK Alois (AUT) BOUTOURE D. (FRA) 429 1175 HUN SUI
46 1938 12 Jun 1938 - 17:00 Quarter-finals Fort Carree Antibes Sweden 8 0 Cuba 7000.0 4 0 KRIST Gustav (TCH) WEINGARTNER Karl (AUT) SDEZ Victor (FRA) 429 1158 SWE CUB
47 1938 12 Jun 1938 - 17:00 Quarter-finals Stade Olympique Colombes Italy 3 1 France 58455.0 1 1 BAERT Louis (BEL) WUETHRICH Hans (SUI) EKLIND Ivan (SWE) 429 1164 ITA FRA
48 1938 14 Jun 1938 - 18:00 Quarter-finals Stade du Parc Lescure Bordeaux Brazil 2 1 Czechoslovakia 18141.0 0 1 CAPDEVILLE Georges (FRA) MARENCO Paul (FRA) KISSENBERGER Ernest (FRA) 429 1153 BRA TCH
49 1938 16 Jun 1938 - 18:00 Semi-finals Parc des Princes Paris Hungary 5 1 Sweden 20000.0 3 1 LECLERCQ Lucien (FRA) VAN MOORSEL Johannes (NED) SCARPI Giuseppe (ITA) 3489 1176 HUN SWE
In [4]:
# Add total goals column
df["Total Goals"] = df["Home Team Goals"] + df["Away Team Goals"]

# Plot average number of goals scored per game by year
sns.set_context("poster", font_scale=1.2)
sns.set_style("whitegrid")
f, ax = plt.subplots(figsize=(23,12))
sns.boxplot(data=df,x=df["Year"], y="Total Goals", palette="Spectral")

ax.set(ylabel = "Total Goals Per Game", title = "Distribution Of Goals Scored In World Cup Matches By Year")
Out[4]:
[Text(0,0.5,'Total Goals Per Game'),
 Text(0.5,1,'Distribution Of Goals Scored In World Cup Matches By Year')]
In [8]:
# Add total goals column
df["Total Goals"] = df["Home Team Goals"] + df["Away Team Goals"]

# Plot average number of goals scored per game by year
sns.set_context("poster", font_scale=1.2)
sns.set_style("whitegrid")
f, ax = plt.subplots(figsize=(23,12))
sns.violinplot(data=df,x=df["Year"], y="Total Goals", palette="Spectral")

ax.set(ylabel = "Total Goals Per Game", title = "Distribution Of Goals Scored In World Cup Matches By Year")
Out[8]:
[Text(0,0.5,'Total Goals Per Game'),
 Text(0.5,1,'Distribution Of Goals Scored In World Cup Matches By Year')]
In [9]:
# Half-time Home Goals
df["Second Half Home Goals"] = df["Home Team Goals"] - df["Half-time Home Goals"]
df["Second Half Away Goals"] = df["Away Team Goals"] - df["Half-time Away Goals"]


df.head()
Out[9]:
Year Datetime Stage Stadium City Home Team Name Home Team Goals Away Team Goals Away Team Name Win conditions ... Referee Assistant 1 Assistant 2 RoundID MatchID Home Team Initials Away Team Initials Total Goals Second Half Home Goals Second Half Away Goals
0 1930 13 Jul 1930 - 15:00 Group 1 Pocitos Montevideo France 4 1 Mexico ... LOMBARDI Domingo (URU) CRISTOPHE Henry (BEL) REGO Gilberto (BRA) 201 1096 FRA MEX 5 1 1
1 1930 13 Jul 1930 - 15:00 Group 4 Parque Central Montevideo USA 3 0 Belgium ... MACIAS Jose (ARG) MATEUCCI Francisco (URU) WARNKEN Alberto (CHI) 201 1090 USA BEL 3 1 0
2 1930 14 Jul 1930 - 12:45 Group 2 Parque Central Montevideo Yugoslavia 2 1 Brazil ... TEJADA Anibal (URU) VALLARINO Ricardo (URU) BALWAY Thomas (FRA) 201 1093 YUG BRA 3 0 1
3 1930 14 Jul 1930 - 14:50 Group 3 Pocitos Montevideo Romania 3 1 Peru ... WARNKEN Alberto (CHI) LANGENUS Jean (BEL) MATEUCCI Francisco (URU) 201 1098 ROU PER 4 2 1
4 1930 15 Jul 1930 - 16:00 Group 1 Parque Central Montevideo Argentina 1 0 France ... REGO Gilberto (BRA) SAUCEDO Ulises (BOL) RADULESCU Constantin (ROU) 201 1085 ARG FRA 1 1 0

5 rows × 23 columns

In [10]:
df["Second Half Goals"] = df["Second Half Home Goals"] + df["Second Half Away Goals"]
df["First Half Goals"] = df["Half-time Home Goals"] + df["Half-time Away Goals"]
df["Total Goals"] = df["First Half Goals"]+ df["First Half Goals"] 

df_slice = df[["Year", "First Half Goals", "Second Half Goals", "Total Goals" ]]
df_slice.head()
Out[10]:
Year First Half Goals Second Half Goals Total Goals
0 1930 3 2 6
1 1930 2 1 4
2 1930 2 1 4
3 1930 1 3 2
4 1930 0 1 0
In [11]:
# create a DataFrame with first half/ second half split

firstHalf_goals = []
secondHalf_goals = []
allGoals = []
homeVaway = []
year = []

for row_index,row in df_slice.iterrows():
    allGoals.append(row["First Half Goals"])
    homeVaway.append("First")
    allGoals.append(row["Second Half Goals"])
    homeVaway.append("Second")
    year.append(row["Year"])
    year.append(row["Year"])

# print(len(homeVaway))
# print(len(allGoals))
# print(len(year))

d = {
    
    "Goals": allGoals,
    "Year": year,
    "Half": homeVaway
}

goals_df = pd.DataFrame(d)
goals_df.head()
Out[11]:
Goals Half Year
0 3 First 1930
1 2 Second 1930
2 2 First 1930
3 1 Second 1930
4 2 First 1930
In [12]:
# plot average goals by year, split by home vs. away team
sns.set_context("poster", font_scale=1.5)
sns.set_style("whitegrid")
f, ax = plt.subplots(figsize=(30,15))
sns.violinplot(data=goals_df,x="Year", y="Goals", palette="Set1", hue="Half", split = True)

ax.set(ylabel = "Total Goals Per Game", title = "Distribution Of Goals Scored In World Cup Matches By Year-Split By First And Second Half Goals ")
Out[12]:
[Text(0,0.5,'Total Goals Per Game'),
 Text(0.5,1,'Distribution Of Goals Scored In World Cup Matches By Year-Split By First And Second Half Goals ')]