import pandas as pd
import matplotlib.pyplot as plt
plData = pd.read_csv("PLstats.csv")

plData.head()

plData.tail()

plData.dropna(inplace=True)
for row in plData.index:
    if plData.loc[row,"saves"] < 60:
        plData.drop(row,inplace= True)
print(plData.shape)

(100, 42)

plData.set_index(['team','season'],inplace = True)
print(plData.index)

MultiIndex([(         'Manchester City', '2013-2014'),
            (               'Liverpool', '2013-2014'),
            (                 'Chelsea', '2013-2014'),
            (                 'Arsenal', '2013-2014'),
            (                 'Everton', '2013-2014'),
            (       'Tottenham Hotspur', '2013-2014'),
            (       'Manchester United', '2013-2014'),
            (        'Newcastle United', '2013-2014'),
            (             'Southampton', '2013-2014'),
            (          'Crystal Palace', '2013-2014'),
            (              'Stoke City', '2013-2014'),
            (            'Swansea City', '2013-2014'),
            (         'West Ham United', '2013-2014'),
            (             'Aston Villa', '2013-2014'),
            (               'Hull City', '2013-2014'),
            (              'Sunderland', '2013-2014'),
            (                  'Fulham', '2013-2014'),
            (            'Norwich City', '2013-2014'),
            (            'Cardiff City', '2013-2014'),
            (    'West Bromwich Albion', '2013-2014'),
            (                 'Chelsea', '2014-2015'),
            (         'Manchester City', '2014-2015'),
            (                 'Arsenal', '2014-2015'),
            (       'Manchester United', '2014-2015'),
            (       'Tottenham Hotspur', '2014-2015'),
            (               'Liverpool', '2014-2015'),
            (             'Southampton', '2014-2015'),
            (            'Swansea City', '2014-2015'),
            (              'Stoke City', '2014-2015'),
            (          'Crystal Palace', '2014-2015'),
            (                 'Everton', '2014-2015'),
            (         'West Ham United', '2014-2015'),
            (          'Leicester City', '2014-2015'),
            (    'West Bromwich Albion', '2014-2015'),
            (             'Aston Villa', '2014-2015'),
            (        'Newcastle United', '2014-2015'),
            (               'Hull City', '2014-2015'),
            (     'Queens Park Rangers', '2014-2015'),
            (                 'Burnley', '2014-2015'),
            (              'Sunderland', '2014-2015'),
            (          'Leicester City', '2015-2016'),
            (                 'Arsenal', '2015-2016'),
            (         'Manchester City', '2015-2016'),
            (       'Manchester United', '2015-2016'),
            (       'Tottenham Hotspur', '2015-2016'),
            (             'Southampton', '2015-2016'),
            (               'Liverpool', '2015-2016'),
            (         'West Ham United', '2015-2016'),
            (              'Stoke City', '2015-2016'),
            (                 'Chelsea', '2015-2016'),
            (            'Swansea City', '2015-2016'),
            (                 'Watford', '2015-2016'),
            (         'AFC Bournemouth', '2015-2016'),
            (          'Crystal Palace', '2015-2016'),
            (                 'Everton', '2015-2016'),
            (    'West Bromwich Albion', '2015-2016'),
            (        'Newcastle United', '2015-2016'),
            (            'Norwich City', '2015-2016'),
            (              'Sunderland', '2015-2016'),
            (             'Aston Villa', '2015-2016'),
            (                 'Chelsea', '2016-2017'),
            (       'Tottenham Hotspur', '2016-2017'),
            (                 'Arsenal', '2016-2017'),
            (         'Manchester City', '2016-2017'),
            (               'Liverpool', '2016-2017'),
            (       'Manchester United', '2016-2017'),
            (                 'Everton', '2016-2017'),
            (         'AFC Bournemouth', '2016-2017'),
            (          'Crystal Palace', '2016-2017'),
            (          'Leicester City', '2016-2017'),
            (             'Southampton', '2016-2017'),
            (            'Swansea City', '2016-2017'),
            (    'West Bromwich Albion', '2016-2017'),
            (         'West Ham United', '2016-2017'),
            (                 'Burnley', '2016-2017'),
            (              'Stoke City', '2016-2017'),
            (                 'Watford', '2016-2017'),
            (               'Hull City', '2016-2017'),
            (              'Sunderland', '2016-2017'),
            (           'Middlesbrough', '2016-2017'),
            (         'Manchester City', '2017-2018'),
            (       'Manchester United', '2017-2018'),
            (       'Tottenham Hotspur', '2017-2018'),
            (                 'Chelsea', '2017-2018'),
            (               'Liverpool', '2017-2018'),
            (                 'Arsenal', '2017-2018'),
            (                 'Burnley', '2017-2018'),
            (                 'Everton', '2017-2018'),
            (          'Leicester City', '2017-2018'),
            (        'Newcastle United', '2017-2018'),
            (         'AFC Bournemouth', '2017-2018'),
            (          'Crystal Palace', '2017-2018'),
            (                 'Watford', '2017-2018'),
            (         'West Ham United', '2017-2018'),
            ('Brighton and Hove Albion', '2017-2018'),
            (       'Huddersfield Town', '2017-2018'),
            (            'Swansea City', '2017-2018'),
            (             'Southampton', '2017-2018'),
            (              'Stoke City', '2017-2018'),
            (    'West Bromwich Albion', '2017-2018')],
           names=['team', 'season'])

conv_rate_frame = plData[['goals','total_scoring_att']].copy()
conversion_rate = conv_rate_frame['goals'] / conv_rate_frame['total_scoring_att']
conv_rate_frame['conv_rate'] = conversion_rate
conv_rate_frame.sort_values(by='conv_rate', ascending=False,inplace= True)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18, 8))

conv_rate_frame.plot.bar(ax=ax, y='conv_rate')

plt.xlabel('Teams and Seasons')
plt.ylabel('Conversion Rate')
plt.title('Premier League Goal Conversion Rates from 2013/14 to 2017/18')

plt.tight_layout()

plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(conv_rate_frame['total_scoring_att'], conv_rate_frame['goals'])

plt.xlabel('Total Scoring Attempts')
plt.ylabel('Goals')
plt.title('Scatter Plot of Goals vs Total Scoring Attempts')

plt.grid(True)
plt.tight_layout()

correlation = conv_rate_frame['total_scoring_att'].corr(conv_rate_frame['goals'])
plt.annotate(f'Correlation: {correlation:.4f}', xy=(0.5, 0.95), xycoords='axes fraction', ha='center', fontsize=12)

plt.show()

pk_result_frame = plData[['penalty_conceded','pen_goals_conceded','penalty_save']].copy()

pen_miss_against = plData['penalty_conceded'] - plData['penalty_save'] - plData['pen_goals_conceded']
pk_result_frame['pen_miss_ag'] = pen_miss_against

labels = ['Penalties Scored', 'Penalties Saved', 'Penalties Missed']
sizes = [pk_result_frame['pen_goals_conceded'].sum(), pk_result_frame['penalty_save'].sum(), pk_result_frame['pen_miss_ag'].sum()]

fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%', colors=['lime', 'orange', 'red'],wedgeprops=dict(edgecolor='black', linewidth=1))

ax.axis('equal')
plt.title('Distribution of Penalty Outcomes')

plt.show()

tot_cards = plData['total_yel_card'] + plData['total_red_card']
wl_ratio = plData['wins'] / plData['losses']
wl_card_frame = plData[['wins','losses','total_yel_card','total_red_card']].copy()
wl_card_frame['tot_cards'] = plData['total_yel_card'] + plData['total_red_card']
wl_card_frame['wl_ratio'] = plData['wins'] / plData['losses']
wl_card_frame.sort_values(by='tot_cards', ascending=False,inplace= True)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18, 8))
wl_card_frame.plot.bar(ax=ax, y='tot_cards')

plt.xlabel('Teams and Seasons')
plt.ylabel('Total Cards')
plt.title('Premier League Yellow and Red Cards from 2013/14 to 2017/18')

plt.tight_layout()

plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(wl_card_frame['tot_cards'], wl_card_frame['wl_ratio'])

plt.xlabel('Total Cards')
plt.ylabel('Win-Loss Ratio')
plt.title('Scatter Plot of Win-Loss Ratio vs Total Cards')

plt.grid(True)
plt.tight_layout()

correlation = wl_card_frame['tot_cards'].corr(wl_card_frame['wl_ratio'])
plt.annotate(f'Correlation: {correlation:.4f}', xy=(0.5, 0.95), xycoords='axes fraction', ha='center', fontsize=12)

plt.show()

goal_method_frame = plData[['goals','att_hd_goal','att_pen_goal','att_freekick_goal','goal_fastbreak','own_goals']].copy()

other_goals = plData['goals'] - plData['att_hd_goal'] - plData['att_pen_goal'] - plData['att_freekick_goal'] - plData['goal_fastbreak'] - plData['own_goals']
goal_method_frame['other_goals'] = other_goals

labels = ['Headed Goals', 'Penalty Goals', 'Freekick Goals', 'Fast Break Goals','Own Goals','Other Goals']
sizes = [goal_method_frame['att_hd_goal'].sum(), goal_method_frame['att_pen_goal'].sum(), goal_method_frame['att_freekick_goal'].sum(), goal_method_frame['goal_fastbreak'].sum(), goal_method_frame['own_goals'].sum(),goal_method_frame['other_goals'].sum()]

fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.2f%%', startangle=90,colors=['dodgerblue', 'lime', 'gold','darkorange','firebrick','mediumorchid'],wedgeprops=dict(edgecolor='black', linewidth=1))
ax.axis('equal')

plt.title('Distribution of Scoring Methods')

plt.show()

save_frame = plData[['saves','clean_sheet']].copy()
save_frame.sort_values(by='saves', ascending=False,inplace= True)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18, 8))
save_frame.plot.bar(ax=ax, y='saves')

plt.xlabel('Teams and Seasons')
plt.ylabel('Total Saves')
plt.title('Premier League Saves from 2013/14 to 2017/18')

plt.tight_layout()

plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(save_frame['saves'], save_frame['clean_sheet'])

plt.xlabel('Saves')
plt.ylabel('Clean Sheets')
plt.title('Scatter Plot of Clean Sheets vs Goalkeeper Saves')

plt.grid(True)
plt.tight_layout()

correlation = save_frame['saves'].corr(save_frame['clean_sheet'])
plt.annotate(f'Correlation: {correlation:.4f}', xy=(0.5, 0.95), xycoords='axes fraction', ha='center', fontsize=12)

plt.show()

pass_frame = plData[['total_pass','total_through_ball','total_long_balls','backward_pass','total_cross']].copy()

other = plData['total_pass'] - plData['total_through_ball'] - plData['total_long_balls'] - plData['backward_pass']- plData['total_cross']
pass_frame['other_pass'] = other

labels = ['Through Balls', 'Long Balls', 'Back Passes', 'Crosses','Other Passes']
sizes = [pass_frame['total_through_ball'].sum(), pass_frame['total_long_balls'].sum(), pass_frame['backward_pass'].sum(), pass_frame['total_cross'].sum(),pass_frame['other_pass'].sum()]

fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.2f%%',labeldistance=1.35, pctdistance=1.2, startangle=180,colors=['#FF5733', '#33FF57', '#3373FF', '#FF33C7', '#FFD700'],wedgeprops=dict(edgecolor='black', linewidth=1))
ax.axis('equal')
plt.title('Distribution of Passes')

plt.show()

score_frame = plData[['total_scoring_att']].copy()
off_frame = plData['total_scoring_att'] - plData['ontarget_scoring_att'] - plData['hit_woodwork']
hit_frame = plData['hit_woodwork'].copy()
goals = plData['goals'].copy()
saved = plData['saves'].copy()
blocked = plData['outfielder_block'].copy()

score_frame['off_frame'] = off_frame
score_frame['hit_frame'] = hit_frame
score_frame['goals'] = goals
score_frame['saved'] = saved
score_frame['blocked'] = blocked

labels = ['Off Frame', 'Hit Woodwork', 'Goals', 'Saved','Blocked']
sizes = [score_frame['off_frame'].sum(), score_frame['hit_frame'].sum(), score_frame['goals'].sum(), score_frame['saved'].sum(),score_frame['blocked'].sum()]

fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.2f%%', startangle=180,colors=['#92B5E7', '#7F8BC0', '#64B590', '#E48696', '#DFA163'],wedgeprops=dict(edgecolor='black', linewidth=1))
ax.axis('equal')
plt.title('Distribution of Scoring Attempts')

plt.show()

	team	wins	losses	goals	total_yel_card	total_red_card	total_scoring_att	ontarget_scoring_att	hit_woodwork	att_hd_goal	...	total_cross	corner_taken	touches	big_chance_missed	clearance_off_line	dispossessed	penalty_save	total_high_claim	punches	season
0	Manchester United	28.0	5.0	83.0	60.0	1.0	698.0	256.0	21.0	12.0	...	918.0	258.0	25686.0	NaN	1.0	NaN	2.0	37.0	25.0	2006-2007
1	Chelsea	24.0	3.0	64.0	62.0	4.0	636.0	216.0	14.0	16.0	...	897.0	231.0	24010.0	NaN	2.0	NaN	1.0	74.0	22.0	2006-2007
2	Liverpool	20.0	10.0	57.0	44.0	0.0	668.0	214.0	15.0	8.0	...	1107.0	282.0	24150.0	NaN	1.0	NaN	0.0	51.0	27.0	2006-2007
3	Arsenal	19.0	8.0	63.0	59.0	3.0	638.0	226.0	19.0	10.0	...	873.0	278.0	25592.0	NaN	1.0	NaN	0.0	88.0	27.0	2006-2007
4	Tottenham Hotspur	17.0	12.0	57.0	48.0	3.0	520.0	184.0	6.0	5.0	...	796.0	181.0	22200.0	NaN	2.0	NaN	0.0	51.0	24.0	2006-2007

	team	wins	losses	goals	total_yel_card	total_red_card	total_scoring_att	ontarget_scoring_att	hit_woodwork	att_hd_goal	...	total_cross	corner_taken	touches	big_chance_missed	clearance_off_line	dispossessed	penalty_save	total_high_claim	punches	season
235	Huddersfield Town	9.0	19.0	28.0	62.0	3.0	362.0	109.0	8.0	5.0	...	765.0	165.0	22619.0	21.0	6.0	416.0	2.0	31.0	24.0	2017-2018
236	Swansea City	8.0	21.0	28.0	51.0	1.0	338.0	103.0	8.0	3.0	...	694.0	150.0	22775.0	26.0	1.0	439.0	3.0	44.0	15.0	2017-2018
237	Southampton	7.0	16.0	37.0	63.0	2.0	450.0	145.0	15.0	7.0	...	800.0	227.0	24639.0	37.0	4.0	379.0	1.0	29.0	13.0	2017-2018
238	Stoke City	7.0	19.0	35.0	62.0	1.0	384.0	132.0	8.0	8.0	...	598.0	136.0	20368.0	33.0	3.0	402.0	0.0	27.0	14.0	2017-2018
239	West Bromwich Albion	6.0	19.0	31.0	73.0	1.0	378.0	114.0	7.0	10.0	...	784.0	176.0	20552.0	28.0	3.0	446.0	0.0	40.0	5.0	2017-2018

DS220 Final Project¶

About the Dataset¶

Importing dataset into DataFrame¶

Cleaning the Data¶

Answering the questions¶

Which team has the highest conversion rate per scoring attempt and is there a correlation between scoring attempts and goals scored?¶

What is the outcome of penalty kicks against a team?¶

Which team has had the highest amount of cards per game, is there a relationship between win-loss ratio and discipline?¶

How do teams typically score their goals?¶

Which team has the highest number of saves and is there a correlation between the amount of saves to the number of clean sheets?¶

What is the distribution of types of passes for every team?¶

What is the distribution of the results of a teams scoring attempts?¶

Conclusion¶