Data Science Example - Olympics
import pandas as pd
import cufflinks as cf
cf.go_offline()
#olympics = pd.read_csv('./olympics.csv')
olympics = pd.read_csv('https://raw.githubusercontent.com/callysto/online-courses/master/CallystoAndDataScience/olympics.csv')
athletes_by_year = olympics.groupby(["Year"]).size().iplot(title='Number of Athletes per Year (Olympics)',yTitle='Number of Athletes',xTitle='Year')
olympics[olympics['Season']=='Winter'].groupby(["Year"]).size().iplot(title='Number of Athletes per Year (Winter Olympics)',yTitle='Number of Athletes',xTitle='Year')
medals = olympics.dropna(subset=["Medal"])
medals_winter = medals[medals["Season"]=="Winter"]
medals_winter.groupby('region').count().sort_values('Medal',ascending=False)['Medal'].head(20).iplot(kind='bar',title='Medals per Country (Winter Olympics, Top 20 Countries)',yTitle='Number of Medals')
canada_medals = medals[medals['region']=='Canada']
canada_medals.groupby('Sport').count().sort_values('Medal',ascending=False)['Medal'].iplot(kind='bar',title='Canadian Olympic Medals per Sport')
df_canada_medals = pd.DataFrame(canada_medals.groupby('Sport').count().sort_values('Medal',ascending=False)['Medal'])
df_canada_medals.rename(columns={'Medal':'Medals'}, inplace=True)
df_canada_medals
medals_per_athlete = pd.DataFrame(medals.groupby('Name').count().sort_values('Medal',ascending=False).head(20)['Medal'])
medals_per_athlete.merge(olympics[['Name','region']].drop_duplicates('Name'),on='Name').iplot(kind='barh',title='Number of Olympic Medals per Athlete (Top 20)',y='Medal',x='Name',xTitle='Number of Medals',text='region')