[go: up one dir, main page]

0% found this document useful (0 votes)
21 views12 pages

Untitled 11

Uploaded by

Tanmoy Bir
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views12 pages

Untitled 11

Uploaded by

Tanmoy Bir
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 12

import pandas as pd

AQI_df = pd.read_csv( 'AIR QUALITY INDEX.csv' )


AQI_df.head(10)

Place Date PM 2.5 PM10 NO2 NH3


SO2 CO \
0 RABINDRA BHARATI UNIVERSITY 17-09-2019 48 58 17 3
7 10
1 RABINDRA BHARATI UNIVERSITY 19-09-2019 38 31 13 3
6 10
2 RABINDRA BHARATI UNIVERSITY 19-09-2019 28 32 16 3
6 8
3 RABINDRA BHARATI UNIVERSITY 20-09-2019 0 0 0 0
0 0
4 RABINDRA BHARATI UNIVERSITY 21-09-2019 72 86 43 3
16 20
5 RABINDRA BHARATI UNIVERSITY 22-09-2019 63 66 34 3
9 12
6 RABINDRA BHARATI UNIVERSITY 23-09-2019 33 42 19 3
7 14
7 RABINDRA BHARATI UNIVERSITY 24-09-2019 25 28 14 3
7 8
8 RABINDRA BHARATI UNIVERSITY 25-09-2019 0 0 0 0
0 0
9 RABINDRA BHARATI UNIVERSITY 26-09-2019 54 55 28 4
8 7

O3 Air Quality Index


0 11 58
1 10 38
2 24 32
3 0 0
4 35 86
5 40 66
6 45 45
7 23 28
8 0 0
9 28 55

import matplotlib.pyplot as plt


import seaborn as sn
%matplotlib inline
import numpy as np

Matplotlib is building the font cache; this may take a moment.

from statsmodels.tsa.seasonal import seasonal_decompose


ts_decompse = seasonal_decompose( np.array(AQI_df['Air Quality
Index']),
model='additive',
period=5 )
## Plotting the deocompsed time series components
ts_plot = ts_decompse.plot()

AQI_df['seasonal'] = ts_decompse.seasonal
AQI_df['trend'] = ts_decompse.trend

#AQI_df[['seasonal','trend']][0:]

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


# Show autocorrelation upto lag 10
acf_plot = plot_acf(AQI_df['Air Quality Index'], lags=10)
pacf_plot = plot_pacf(AQI_df['Air Quality Index'],
lags=5 )
from statsmodels.tsa.arima.model import ARIMA

arima = ARIMA(AQI_df['Air Quality Index'][0:30], order = (1,0,0))


ar_model = arima.fit()

ar_model.summary()

<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results

======================================================================
========
Dep. Variable: Air Quality Index No. Observations:
15
Model: ARIMA(1, 0, 0) Log Likelihood
-70.723
Date: Thu, 12 Sep 2024 AIC
147.447
Time: 10:06:07 BIC
149.571
Sample: 0 HQIC
147.424
- 15
Covariance Type: opg

======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
const 51.0224 10.691 4.772 0.000 30.068
71.977
ar.L1 0.1519 0.336 0.452 0.651 -0.506
0.810
sigma2 728.0467 330.856 2.200 0.028 79.580
1376.513
======================================================================
=============
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB):
0.35
Prob(Q): 0.92 Prob(JB):
0.84
Heteroskedasticity (H): 0.68 Skew:
-0.15
Prob(H) (two-sided): 0.69 Kurtosis:
2.31
======================================================================
=============

Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""

forecast_31_37 = ar_model.predict(30, 36)

forecast_31_37

30 51.022355
31 51.022355
32 51.022355
33 51.022355
34 51.022355
35 51.022355
36 51.022355
Name: predicted_mean, dtype: float64

## plotting ACF
plt.figure( figsize=(10,4))
plt.xlabel( "Date" )
plt.ylabel( "Air Quality Index" )
plt.plot( AQI_df['Air Quality Index'] );
from statsmodels.tsa.stattools import adfuller

def adfuller_test( ts ):
adfuller_result = adfuller( ts, autolag=None )
adfuller_out = pd.Series(adfuller_result[0:4],
index=['Test Statistic',
'p-value',
'Lags Used',
'Number of Observations Used'])
print( adfuller_out )

adfuller_test(AQI_df['Air Quality Index'])

Test Statistic 0.090290


p-value 0.965392
Lags Used 5.000000
Number of Observations Used 9.000000
dtype: float64

AQI_df['AQI_diff'] = AQI_df['Air Quality Index'] - AQI_df['Air Quality


Index'].shift(1)

AQI_df.head(10)

Place Date PM 2.5 PM10 NO2 NH3


SO2 CO \
0 RABINDRA BHARATI UNIVERSITY 17-09-2019 48 58 17 3
7 10
1 RABINDRA BHARATI UNIVERSITY 19-09-2019 38 31 13 3
6 10
2 RABINDRA BHARATI UNIVERSITY 19-09-2019 28 32 16 3
6 8
3 RABINDRA BHARATI UNIVERSITY 20-09-2019 0 0 0 0
0 0
4 RABINDRA BHARATI UNIVERSITY 21-09-2019 72 86 43 3
16 20
5 RABINDRA BHARATI UNIVERSITY 22-09-2019 63 66 34 3
9 12
6 RABINDRA BHARATI UNIVERSITY 23-09-2019 33 42 19 3
7 14
7 RABINDRA BHARATI UNIVERSITY 24-09-2019 25 28 14 3
7 8
8 RABINDRA BHARATI UNIVERSITY 25-09-2019 0 0 0 0
0 0
9 RABINDRA BHARATI UNIVERSITY 26-09-2019 54 55 28 4
8 7

O3 Air Quality Index seasonal trend AQI_diff


0 11 58 10.02 NaN NaN
1 10 38 13.52 NaN -20.0
2 24 32 -6.68 42.8 -6.0
3 0 0 -40.98 44.4 -32.0
4 35 86 24.12 45.8 86.0
5 40 66 10.02 45.0 -20.0
6 45 45 13.52 45.0 -21.0
7 23 28 -6.68 38.8 -17.0
8 0 0 -40.98 36.6 -28.0
9 28 55 24.12 46.0 55.0

AQI_diff_df = AQI_df.dropna()

plt.figure( figsize=(10,4))
plt.xlabel( "Date" )
plt.ylabel( "First Order Differences" )
plt.plot( AQI_diff_df['Air Quality Index'] );
pacf_plot = plot_acf( AQI_df['Air Quality Index'].dropna(),
lags=10 )

AQI_train = AQI_df[0:100]
AQI_test = AQI_df[100:]

arima = ARIMA( AQI_train['Air Quality Index'],


order = (1,1,1))
arima_model = arima.fit()
arima_model.summary()

<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results

======================================================================
========
Dep. Variable: Air Quality Index No. Observations:
15
Model: ARIMA(1, 1, 1) Log Likelihood
-67.644
Date: Thu, 12 Sep 2024 AIC
141.288
Time: 10:43:22 BIC
143.206
Sample: 0 HQIC
141.111
- 15

Covariance Type: opg

======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
ar.L1 0.2266 0.465 0.488 0.626 -0.684
1.137
ma.L1 -0.9366 0.934 -1.003 0.316 -2.767
0.893
sigma2 825.5921 630.430 1.310 0.190 -410.029
2061.213
======================================================================
=============
Ljung-Box (L1) (Q): 0.03 Jarque-Bera (JB):
0.62
Prob(Q): 0.87 Prob(JB):
0.73
Heteroskedasticity (H): 0.74 Skew:
0.27
Prob(H) (two-sided): 0.75 Kurtosis:
2.12
======================================================================
=============

Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""

acf_plot = plot_acf(arima_model.resid,
lags = 10)
pacf_plot = plot_pacf(arima_model.resid,
lags = 5)
AQI_predict = arima_model.forecast(steps = 10)
AQI_predict

15 60.258577
16 54.651737
17 53.381129
18 53.093187
19 53.027934
20 53.013147
21 53.009796
22 53.009036
23 53.008864
24 53.008825
Name: predicted_mean, dtype: float64

import numpy as np
def get_mape(actual, predicted):
return np.mean(np.abs((actual - predicted) / actual)) * 100
get_mape(AQI_df['Air Quality Index'][5:],
AQI_predict )

nan

You might also like