import pandas as pd
AQI_df = pd.read_csv( 'AIR QUALITY INDEX.csv' )
AQI_df.head(10)
Place Date PM 2.5 PM10 NO2 NH3
SO2 CO \
0 RABINDRA BHARATI UNIVERSITY 17-09-2019 48 58 17 3
7 10
1 RABINDRA BHARATI UNIVERSITY 19-09-2019 38 31 13 3
6 10
2 RABINDRA BHARATI UNIVERSITY 19-09-2019 28 32 16 3
6 8
3 RABINDRA BHARATI UNIVERSITY 20-09-2019 0 0 0 0
0 0
4 RABINDRA BHARATI UNIVERSITY 21-09-2019 72 86 43 3
16 20
5 RABINDRA BHARATI UNIVERSITY 22-09-2019 63 66 34 3
9 12
6 RABINDRA BHARATI UNIVERSITY 23-09-2019 33 42 19 3
7 14
7 RABINDRA BHARATI UNIVERSITY 24-09-2019 25 28 14 3
7 8
8 RABINDRA BHARATI UNIVERSITY 25-09-2019 0 0 0 0
0 0
9 RABINDRA BHARATI UNIVERSITY 26-09-2019 54 55 28 4
8 7
O3 Air Quality Index
0 11 58
1 10 38
2 24 32
3 0 0
4 35 86
5 40 66
6 45 45
7 23 28
8 0 0
9 28 55
import matplotlib.pyplot as plt
import seaborn as sn
%matplotlib inline
import numpy as np
Matplotlib is building the font cache; this may take a moment.
from statsmodels.tsa.seasonal import seasonal_decompose
ts_decompse = seasonal_decompose( np.array(AQI_df['Air Quality
Index']),
model='additive',
period=5 )
## Plotting the deocompsed time series components
ts_plot = ts_decompse.plot()
AQI_df['seasonal'] = ts_decompse.seasonal
AQI_df['trend'] = ts_decompse.trend
#AQI_df[['seasonal','trend']][0:]
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# Show autocorrelation upto lag 10
acf_plot = plot_acf(AQI_df['Air Quality Index'], lags=10)
pacf_plot = plot_pacf(AQI_df['Air Quality Index'],
lags=5 )
from statsmodels.tsa.arima.model import ARIMA
arima = ARIMA(AQI_df['Air Quality Index'][0:30], order = (1,0,0))
ar_model = arima.fit()
ar_model.summary()
<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
========
Dep. Variable: Air Quality Index No. Observations:
15
Model: ARIMA(1, 0, 0) Log Likelihood
-70.723
Date: Thu, 12 Sep 2024 AIC
147.447
Time: 10:06:07 BIC
149.571
Sample: 0 HQIC
147.424
- 15
Covariance Type: opg
======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
const 51.0224 10.691 4.772 0.000 30.068
71.977
ar.L1 0.1519 0.336 0.452 0.651 -0.506
0.810
sigma2 728.0467 330.856 2.200 0.028 79.580
1376.513
======================================================================
=============
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB):
0.35
Prob(Q): 0.92 Prob(JB):
0.84
Heteroskedasticity (H): 0.68 Skew:
-0.15
Prob(H) (two-sided): 0.69 Kurtosis:
2.31
======================================================================
=============
Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""
forecast_31_37 = ar_model.predict(30, 36)
forecast_31_37
30 51.022355
31 51.022355
32 51.022355
33 51.022355
34 51.022355
35 51.022355
36 51.022355
Name: predicted_mean, dtype: float64
## plotting ACF
plt.figure( figsize=(10,4))
plt.xlabel( "Date" )
plt.ylabel( "Air Quality Index" )
plt.plot( AQI_df['Air Quality Index'] );
from statsmodels.tsa.stattools import adfuller
def adfuller_test( ts ):
adfuller_result = adfuller( ts, autolag=None )
adfuller_out = pd.Series(adfuller_result[0:4],
index=['Test Statistic',
'p-value',
'Lags Used',
'Number of Observations Used'])
print( adfuller_out )
adfuller_test(AQI_df['Air Quality Index'])
Test Statistic 0.090290
p-value 0.965392
Lags Used 5.000000
Number of Observations Used 9.000000
dtype: float64
AQI_df['AQI_diff'] = AQI_df['Air Quality Index'] - AQI_df['Air Quality
Index'].shift(1)
AQI_df.head(10)
Place Date PM 2.5 PM10 NO2 NH3
SO2 CO \
0 RABINDRA BHARATI UNIVERSITY 17-09-2019 48 58 17 3
7 10
1 RABINDRA BHARATI UNIVERSITY 19-09-2019 38 31 13 3
6 10
2 RABINDRA BHARATI UNIVERSITY 19-09-2019 28 32 16 3
6 8
3 RABINDRA BHARATI UNIVERSITY 20-09-2019 0 0 0 0
0 0
4 RABINDRA BHARATI UNIVERSITY 21-09-2019 72 86 43 3
16 20
5 RABINDRA BHARATI UNIVERSITY 22-09-2019 63 66 34 3
9 12
6 RABINDRA BHARATI UNIVERSITY 23-09-2019 33 42 19 3
7 14
7 RABINDRA BHARATI UNIVERSITY 24-09-2019 25 28 14 3
7 8
8 RABINDRA BHARATI UNIVERSITY 25-09-2019 0 0 0 0
0 0
9 RABINDRA BHARATI UNIVERSITY 26-09-2019 54 55 28 4
8 7
O3 Air Quality Index seasonal trend AQI_diff
0 11 58 10.02 NaN NaN
1 10 38 13.52 NaN -20.0
2 24 32 -6.68 42.8 -6.0
3 0 0 -40.98 44.4 -32.0
4 35 86 24.12 45.8 86.0
5 40 66 10.02 45.0 -20.0
6 45 45 13.52 45.0 -21.0
7 23 28 -6.68 38.8 -17.0
8 0 0 -40.98 36.6 -28.0
9 28 55 24.12 46.0 55.0
AQI_diff_df = AQI_df.dropna()
plt.figure( figsize=(10,4))
plt.xlabel( "Date" )
plt.ylabel( "First Order Differences" )
plt.plot( AQI_diff_df['Air Quality Index'] );
pacf_plot = plot_acf( AQI_df['Air Quality Index'].dropna(),
lags=10 )
AQI_train = AQI_df[0:100]
AQI_test = AQI_df[100:]
arima = ARIMA( AQI_train['Air Quality Index'],
order = (1,1,1))
arima_model = arima.fit()
arima_model.summary()
<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
========
Dep. Variable: Air Quality Index No. Observations:
15
Model: ARIMA(1, 1, 1) Log Likelihood
-67.644
Date: Thu, 12 Sep 2024 AIC
141.288
Time: 10:43:22 BIC
143.206
Sample: 0 HQIC
141.111
- 15
Covariance Type: opg
======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
ar.L1 0.2266 0.465 0.488 0.626 -0.684
1.137
ma.L1 -0.9366 0.934 -1.003 0.316 -2.767
0.893
sigma2 825.5921 630.430 1.310 0.190 -410.029
2061.213
======================================================================
=============
Ljung-Box (L1) (Q): 0.03 Jarque-Bera (JB):
0.62
Prob(Q): 0.87 Prob(JB):
0.73
Heteroskedasticity (H): 0.74 Skew:
0.27
Prob(H) (two-sided): 0.75 Kurtosis:
2.12
======================================================================
=============
Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""
acf_plot = plot_acf(arima_model.resid,
lags = 10)
pacf_plot = plot_pacf(arima_model.resid,
lags = 5)
AQI_predict = arima_model.forecast(steps = 10)
AQI_predict
15 60.258577
16 54.651737
17 53.381129
18 53.093187
19 53.027934
20 53.013147
21 53.009796
22 53.009036
23 53.008864
24 53.008825
Name: predicted_mean, dtype: float64
import numpy as np
def get_mape(actual, predicted):
return np.mean(np.abs((actual - predicted) / actual)) * 100
get_mape(AQI_df['Air Quality Index'][5:],
AQI_predict )
nan