df = pd.read_csv('matches.
csv')
df.head()
df.nunique()
df.winner.value_counts().head(3).sum()
total = df.winner.count()
total
df.winner.unique()
df.winner.value_counts()
#Q8
df['venue'].value_counts().sort_values(ascending=False)
#unique value sorted with name.
dict_st ={'Feroz Shah Kotla Ground': 'Feroz Shah Kotla', 'MA Chidambaram Stadium, Chepauk': 'M.
A. Chidambaram Stadium', 'M. Chinnaswamy Stadium': 'M Chinnaswamy Stadium',
'Rajiv Gandhi International Stadium, Uppal': 'Rajiv Gandhi Intl. Cricket Stadium', 'ACA-
VDCA Stadium': 'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium'}
df['stadium'] =df['venue'].replace(dict_st)
df['stadium'].value_counts().sort_values(ascending=False)
df['stadium'].nunique()
#Q9
df.head()
ump = df['umpire1'].value_counts() + df['umpire2'].value_counts()
ump.sort_values(ascending = False).head(3).sum()
#Q10
ump.reset_index()
#Q11
df_j= pd.merge(df,df1,left_on='id',right_on='match_id')
df_j.head()
df_j['total_runs'].sum()-df_j[df_j['batsman_runs']==4]['batsman_runs'].sum()-
df_j[df_j['batsman_runs']==6]['batsman_runs'].sum()
#Q12
df_j.groupby('season')['total_runs'].sum() -df_j[df_j['batsman_runs']==4].groupby('season')
['batsman_runs'].sum() -df_j[df_j['batsman_runs']==6].groupby('season')['batsman_runs'].sum()
#Q 13
max_win_margin_runs = df.groupby('season')['win_by_runs'].max().sum()
max_win_margin_runs
#Q14
df.groupby('season')['win_by_runs'].max().sort_values().tail(1)
#Q 15
#df[(df['team1']=='Mumbai Indians') & (df['team2']=='Chennai Super Kings')]
df[(df['team1'].isin(['Mumbai Indians', 'Chennai Super Kings'])) & (df['team2'].isin(['Mumbai Indians',
'Chennai Super Kings']))]['player_of_match'].count()
#Q16
df[(df['team1'].isin(['Mumbai Indians', 'Chennai Super Kings'])) & (df['team2'].isin(['Mumbai Indians',
'Chennai Super Kings']))].groupby('player_of_match')['player_of_match'].count()
#Q17
temp_df = df1.groupby(['batsman','match_id'])
['batsman_runs'].agg('sum').reset_index().sort_values(by='batsman_runs',
ascending=False).reset_index(drop=True)
temp_df[(temp_df['batsman_runs'] >=50) & (temp_df['batsman_runs'] <100)]['batsman_runs'].count()
#Q18
temp_df[(temp_df['batsman_runs'] >=50) & (temp_df['batsman_runs'] <100)]['batsman'].nunique()
#Q19
temp_df[temp_df['batsman_runs'] >=100]['batsman_runs'].count()
#Q20
team_100 = df1.groupby(['batsman', 'batting_team', 'match_id'])
['batsman_runs'].agg('sum').reset_index().sort_values(by='batsman_runs',
ascending=False).reset_index(drop=True)
hundreds_by_teams = team_100[team_100['batsman_runs'] >= 100].groupby(['batting_team',
'batsman'])['match_id'].count().reset_index()
hundreds_by_teams
total_hundreds_by_teams = hundreds_by_teams.groupby('batting_team')['match_id'].sum()
total_hundreds_by_teams[total_hundreds_by_teams > 10].count()