import
pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px ### for plotting the data on world map
path = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid
df = pd.read_csv(path)
df.info()
df.head()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3436 entries, 0 to 3435
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 FIPS 3008 non-null float64
1 Admin2 3011 non-null object
2 Province_State 3255 non-null object
3 Country_Region 3436 non-null object
4 Last_Update 3436 non-null object
5 Lat 3365 non-null float64
6 Long_ 3365 non-null float64
7 Confirmed 3436 non-null int64
8 Deaths 3436 non-null int64
9 Recovered 3436 non-null int64
10 Active 3436 non-null int64
11 Combined_Key 3436 non-null object
dtypes: float64(3), int64(4), object(5)
memory usage: 322.2+ KB
FIPS Admin2 Province_State Country_Region Last_Update Lat Long_ Confirm
2020-05-26
0 45001.0 Abbeville South Carolina US 34.223334 -82.461707
02:32:35
2020-05-26
1 22001.0 Acadia Louisiana US 30.295065 -92.414197 2
02:32:35
2020-05-26
2 51001.0 Accomack Virginia US 37.767072 -75.632346 7
02:32:35
2020-05-26
3 16001.0 Ada Idaho US 43.452658 -116.241552 7
02:32:35
df.drop(['FIPS', 'Admin2','Last_Update','Province_State', 'Combined_Key'], axis=1, inplace=True)
df.rename(columns={'Country_Region': "Country"}, inplace=True)
df.head()
Country Lat Long_ Confirmed Deaths Recovered Active
0 US 34.223334 -82.461707 35 0 0 35
1 US 30.295065 -92.414197 299 18 0 281
2 US 37.767072 -75.632346 731 11 0 720
3 US 43.452658 -116.241552 796 23 0 773
4 US 41.330756 -94.471059 8 0 0 8
world = df.groupby("Country")['Confirmed','Active','Recovered','Deaths'].sum().reset_index()
world.head()
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: FutureWarning:
Indexing with multiple keys (implicitly converted to a tuple of keys) will be depr
Country Confirmed Active Recovered Deaths
0 Afghanistan 11180 9860 1097 223
1 Albania 1004 177 795 32
2 Algeria 8503 3147 4747 609
3 Andorra 763 49 663 51
4 Angola 70 48 18 4
### Find top 20 countries with maximum number of confirmed cases
top_20 = world.sort_values(by=['Confirmed'], ascending=False).head(20)
### Generate a Barplot
plt.figure(figsize=(12,10))
plot = sns.barplot(top_20['Confirmed'], top_20['Country'])
for i,(value,name) in enumerate(zip(top_20['Confirmed'],top_20['Country'])):
plot.text(value,i-0.05,f'{value:,.0f}',size=10)
plt.show()
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:
Pass the following variables as keyword args: x, y. From version 0.12, the only va
top_5 = world.sort_values(by=['Confirmed'], ascending=False).head()
### Generate a Barplot
plt.figure(figsize=(15,5))
confirmed = sns.barplot(top_5['Confirmed'], top_5['Country'], color = 'red', label='Confirmed')
recovered = sns.barplot(top_5['Recovered'], top_5['Country'], color = 'green', label='Recovered')
### Add Texts for Barplots
for i,(value,name) in enumerate(zip(top_5['Confirmed'],top_5['Country'])):
confirmed.text(value,i-0.05,f'{value:,.0f}',size=9)
for i,(value,name) in enumerate(zip(top_5['Recovered'],top_5['Country'])):
recovered.text(value,i-0.05,f'{value:,.0f}',size=9)
plt.legend(loc=4)
plt.show()
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:
Pass the following variables as keyword args: x, y. From version 0.12, the only va
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:
Pass the following variables as keyword args: x, y. From version 0.12, the only va
figure = px.choropleth(world,locations='Country', locationmode='country names', color='Confirmed', hov
figure.show()
Countries with Confirmed cases
check 0s completed at 1:10 PM