24. 6. 8. 오후 8:46 time_series_1.
ipynb - Colab
1 from google.colab import drive
2 drive.mount('/content/drive')
1 %cd /content/drive/MyDrive/Colab Notebooks/time_series
1 import pandas as pd
2 import numpy as np
3 import matplotlib.pyplot as plt
4 from scipy.stats import norm
1 AAPL = pd.read_csv('AAPL.csv')
2 AAPL_D = AAPL[['Date','Open','High','Low','Close','Adj Close','Volume']]
3 AD = AAPL_D['Close'].tolist()
add 코드 add 텍스트
1 plt.plot(AD,color='red')
2 plt.show()
1 def LogReturn(D):
2 n = len(D)
3 L = []
4 for t in range(1,n):
5 y = np.log(D[t]/D[t-1])
6 L.append(y)
7 plt.plot(L, color='blue')
8 plt.show()
9
10 LogReturn(AD)
1 #반복문 쓰지 않는 로그리턴
2 def LogReturn(D):
3 n = len(D)
4 D = np.array(D)
5 LogR = np.log(D[1:]/D[:-1])
6 plt.plot(LogR,'b')
7 plt.show()
8 plt.hist(LogR, bins = 50, color = 'c')
9 plt.show()
10 return LogR
11 LRAD=LogReturn(AD)
1 LRAD[:5]
1 # ECDF 경험적 누적 분포함수
2 def Indicator(X,x):
3 if X <= x:
4 ind = 1
5 else:
6 ind = 0
7 return ind
8
9 def ECDF(D):
10 X = D
11 MM,mm = max(D), min(D)
12 x = np.linspace(mm,MM,200)
13 print(mm,MM)
14 Fn_list = []
15 n = len(D)
16 for i in range(len(x)):
17 i_list = []
18 for t in range(len(X)):
19 I = Indicator(X[t],x[i])
20 i_list.append(I)
21 Fn = sum(i_list)/n # 각각의 x[i]에서의 ECDF
22 Fn_list.append(Fn)
23 plt.plot(x,Fn_list, 'r')
24 #plt.show()
25
26 ECDF(LRAD)
27
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 1/10
24. 6. 8. 오후 8:46 time_series_1.ipynb - Colab
1 def Hist_M_density(D):
2 x = np.linspace(min(D),max(D),200)
3 plt.hist(D,bins=50,density=True, color='c')
4 mean = np.mean(D)
5 var = np.var(D)
6 y = norm.pdf(x,mean,var**0.5)
7 plt.plot(x,y,color='b')
8 #plt.show()
9
10 Hist_M_density(LRAD)
1 def ECDF_N_CDF(D): #D=log return
2
3 x=np.linspace(min(D),max(D),200)
4
5 Fn=ECDF(D)
6
7 mean=np.mean(D)
8
9 var=np.var(D)
10
11 y=norm.cdf(x,mean,var**0.5) # 평균, 표준편차
12
13 plt.plot(x,y,color='b')
14
15 plt.show()
16
17 ECDF_N_CDF(LRAD)
keyboard_arrow_down AR(1) 모형
1 def AR_1(phi0, phi1, ss,n): # Xt = phi0 + phi1*X_[t-1] + et, ss는 noise의 분산
2 L = []
3 X0=0
4 for t in range(1, n+1): #t = 1,2,3 ... n
5 Xt = phi0 + phi1 *X0 + np.random.normal(0,ss**0.5)
6 L.append(Xt)
7 X0 = Xt ###### 중요 초기값 변경
8 AR = L[100:] # n > 100, 앞부분 100개를 버리면서, 초기값 영향을 안받게 하는것
9 return AR
10
11
12 D1 = AR_1(1,0.5,1,800)
13
14 def Gr(D,T): # T= title
15 plt.plot(D, 'b')
16 plt.title(T)
17 plt.show()
18
19 Gr(D1,'AR(1) model')
keyboard_arrow_down AR(2) 모형
1 def AR_2(phi0, phi1, phi2, ss, n):
2 L = []
3 X0,X00 = 0, 0 #초기값
4 for t in range(1, n+1):
5 Xt = phi0 + phi1 * X0 + phi2 * X00 + np.random.normal(0,ss**0.5)
6 L.append(Xt)
7 X00 = X0
8 X0 = Xt
9 AR = L[100:]
10 return AR
11
12 D2 = AR_2(1,0.5,-0.2, 1,800)
13 Gr(D2, 'AR(2) model')
1 D11 = AR_1(1,1,1,500)
2 Gr(D11, 'Nonstationary')
keyboard_arrow_down 중심극한 정리
1 def CLT_AR_1_MU(phi0, phi1, ss, n, M): # M = number of replications
2 L = []
3 for j in range(M):
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 2/10
24. 6. 8. 오후 8:46 time_series_1.ipynb - Colab
3 for j in range(M):
4 D = AR_1(phi0, phi1, ss, n)
5 N = n-100
6 mu_hat = np.mean(D)
7 mu = phi0/1-phi1 #theometical mean
8 clt_value = N**0.5 * (mu_hat-mu)
9 L.append(clt_value)
10
11 plt.hist(L, bins = 50, color = 'b')
12 plt.show()
13 CLT_AR_1_MU(1,0.5,1,500,300)
과제 AR(3) 모델 함수 만들기
우리 데이터를 적용한 그래프찍어보기
keyboard_arrow_down OLS, Yule walker
1 def OLSE_AR_1(D):
2 n = len(D)
3 X = np.array(D) - np.mean(D) # mean of X is zero
4 L_num, L_den = [],[]
5 for t in range(1,n): #1,2,3.../n-1 이론에서는 t=1,2,3...n
6 L_num.append(X[t]*X[t-1])
7 L_den.append(X[t-1]**2)
8
9 olse = sum(L_num)/sum(L_den)
10 hat_phi1 = olse
11 hat_phi0= np.mean(D) *(1-hat_phi1) # phi0 = mu*(1-phi1)
12 return hat_phi0,hat_phi1
13
14 print(OLSE_AR_1(D1)) #실제값 1, 0.5
1 def CLT_OLSE(phi0, phi1,ss,n,M):
2 L = []
3 for j in range(M):
4 D = AR_1(phi0, phi1, ss,n)
5 olse = OLSE_AR_1(D)[1] # olse of phi1
6 N = n-100
7 clf_value = N**0.5*(olse-phi1)
8 L.append(clf_value)
9 plt.hist(L, bins=50, color = 'b')
10 plt.show()
1 CLT_OLSE(1,0.8,1,500,600)
1 def FORE_AR1(D):
2 mu = np.mean(D)
3 print(mu)
4 X =np.array(D) - mu
5 olse = OLSE_AR_1(D)[1]
6 print(olse)
7 Xt = D[-1] # the last
8 Xt1 = mu + olse * (Xt-mu)# one step forecast
9 Xt2 = mu + olse * (Xt1-mu)# two step forecast
10 Xt3 = mu + olse * (Xt2 -mu) # three step forecast
11 return Xt1, Xt2, Xt3
12 print(FORE_AR1(D1))
1 def Graph_Fore_AR1(D):
2 mu = np.mean(D)
3 print(mu)
4 X =np.array(D) - mu
5 olse = OLSE_AR_1(D)[1]
6 print(olse)
7 Xt = D[-1] # the last
8 L = []
9 for k in range(1,21):
10 fore_k = mu+olse*(Xt-mu)
11 L.append(fore_k)
12 Xt = fore_k
13 plt.plot(L,'r',marker='*')
14 return L
15
16 Graph_Fore_AR1(D1)
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 3/10
24. 6. 8. 오후 8:46 time_series_1.ipynb - Colab
1 ### one step error
2
3 def ERROR_one_step_forecast(D,m): # m = out of sample size
4 T = len(D)
5 n = T - m #강의노트에선 31이지만 30이 맞다고 생각한다함
6 Lr,Lf,Le =[],[],[]
7 for i in range(m):
8 INS = D[i:i+n] # n개의 데이터
9 Real_one = D[i+n] # i+n 번째 실제값
10 mu = np.mean(INS)
11 X = np.array(INS) - mu
12 olse = OLSE_AR_1(X)[1]
13 Xt = INS[-1] # the last
14 Fore_one = mu + olse * (Xt -mu)# one step forecast
15 Lr.append(Real_one)
16 Lf.append(Fore_one)
17 Le.append(Real_one - Fore_one)
18 Le = np.array(Le)
19 MAE = np.mean(np.abs(Le))
20 RMSE = (np.mean(Le**2))**0.5
21 print('MAE:{0}, RMSE :{1}'.format(MAE, RMSE))
22 plt.figure(figsize =(10,8))
23 plt.plot(Lr, 'b', label = 'Real value', marker = '*')
24 plt.plot(Lf, 'r', label = 'Forecast value', marker='o')
25 plt.legend()
26 plt.show()
27 ERROR_one_step_forecast(D1,30)
1 ### one step error
2 from sklearn.metrics import mean_absolute_percentage_error
3 def ERROR2_one_step_forecast(D,m): # m = out of sample size
4 T = len(D)
5 n = T - m #강의노트에선 31이지만 30이 맞다고 생각한다함
6 Lr,Lf,Le =[],[],[]
7 for i in range(m):
8 INS = D[i:i+n] # n개의 데이터
9 Real_one = D[i+n] # i+n 번째 실제값
10 mu = np.mean(INS)
11 X = np.array(INS) - mu
12 olse = OLSE_AR_1(X)[1]
13 Xt = INS[-1] # the last
14 Fore_one = mu + olse * (Xt -mu)# one step forecast
15 Lr.append(Real_one)
16 Lf.append(Fore_one)
17 Le.append(Real_one - Fore_one)
18 Le = np.array(Le)
19 Lr = np.array(Lr)
20 Lf = np.array(Lf)
21 MAE = np.mean(np.abs(Le))
22 #MAPE = mean_absolute_percentage_error(Lr,Lf)
23 MAPE = np.mean(np.abs(Le/Lr))
24 RMSE = (np.mean(Le**2))**0.5
25 print('MAE:{0}, MAPE:{1},RMSE :{2}'.format(MAE, MAPE,RMSE))
26 plt.figure(figsize =(10,8))
27 plt.plot(Lr, 'b', label = 'Real value', marker = '*')
28 plt.plot(Lf, 'r', label = 'Forecast value', marker='o')
29 plt.legend()
30 plt.show()
31 ERROR2_one_step_forecast(D1,30)
keyboard_arrow_down Yule walker
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 4/10
24. 6. 8. 오후 8:46 time_series_1.ipynb - Colab
1 def ACF(D):
2 X = D
3 n = len(D)
4 mu = np.mean(D)
5 L =[]
6 for h in range(21):
7 Lh = []
8 for t in range(0,n-h):
9 ac = (X[t+h]-mu) * (X[t]-mu)
10 Lh.append(ac)
11 autocov_h = np.mean(Lh)
12 L.append(autocov_h)
13 ACov = np.array(L)
14 Sacf = ACov / ACov[0] #분산으로 나눠줌
15 plt.plot(Sacf, marker='^')
16 plt.show()
17 return Sacf
18 ACF(D1)
1 def YW_AR_1(D):
2 n = len(D)
3 sacf = ACF(D)
4
5
6 hat_phi1 = sacf[1]
7 hat_phi0 = np.mean(D) * (1-sacf[1])
8 return hat_phi0, hat_phi1
9 print(YW_AR_1(D1))
1 def OLSE_AR2(D): # Zt = (X_[t-1], X_[t-2]), olse = (Z.T'Z) * [-1] (Z'X) olse of Regression
2 n = len(D)
3 X = np.array(D) - np.mean(D)
4 Z, xx = [], []
5 for t in range(2,n):
6 Zt = [X[t-1], X[t-2]]
7 Z.append(Zt)
8 xx.append(X[t])
9 Z, xx = np.array(Z), np.array(xx)
10 First = np.dot(Z.T, Z)
11 Second = np.dot(Z.T, xx)
12 F_inv = np.linalg.inv(First)
13 olse = np.dot(F_inv, Second)
14 phi1, phi2 = olse
15 phi0 = np.mean(D) * (1 - phi1 - phi2)
16 return phi0, phi1, phi2
17
18 print(OLSE_AR2(D2)) # 실제모수 : 1, 0.5, -0.2
1 def YW_AR_2(D):
2 X = np.array(D) - np.mean(D)
3 rho = ACF(D) # AX= B
4 B = np.zeros((2,1)) # 2 by 1 columns
5 A = np.zeros((2,2)) # 2 by 2 matrix
6 for i in range(2): # i = 0, 1
7 B[i][0] = rho[i+1]
8 for j in range(2):
9 A[i][j] = rho[np.abs(i-j)]
10 A_inv = np.linalg.inv(A)
11 phi = np.dot(A_inv, B)
12 phi1, phi2 = phi
13 phi0 = np.mean(D) * (1 - phi1 - phi2)
14 return phi0, phi1, phi2
15
16 print(YW_AR_2(D2))
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 5/10
24. 6. 8. 오후 8:46 time_series_1.ipynb - Colab
1 def ERROR_one_AR2(D, m):
2 T = len(D)
3 Lr, Lf, Le = [],[],[]
4 n =T-m
5 for i in range(m):
6 INS = D[i:i+n]
7 Real_one = D[i+n]
8 #print(f'Real one: {Real_one}')
9 olse = OLSE_AR2(INS)
10 Xt1, Xt = INS[-2], INS[-1]
11 Fore_cast = olse[0] + olse[1] * Xt + olse[2]* Xt1
12 #print(f'Forecast one: {Fore_cast}')
13 Lr.append(Real_one)
14 Lf.append(Fore_cast)
15 Le.append(Real_one-Fore_cast)
16 Le = np.array(Le)
17 MAE = np.mean(np.abs(Le))
18 RMSE = (np.mean(Le**2))**0.5
19 print(f'MAE:{MAE}, RMSE:{RMSE}')
20 plt.figure(figsize =(10,8))
21 plt.plot(Lr, 'b', label = 'Real value', marker = '*')
22 plt.plot(Lf, 'r', label = 'Forecast value', marker='o')
23 plt.legend()
24 plt.show()
25
26 ERROR_one_AR2(D2, 5)
1 def ERROR_one_AR2_YW(D, m):
2 T = len(D)
3 Lr, Lf, Le = [], [], []
4 n = T - m
5 for i in range(m):
6 INS = D[i:i+n]
7 Real_one = D[i+n]
8 # YW_AR_2 함수를 사용하여 파라미터 추정
9 hat_phi0, hat_phi1, hat_phi2 = YW_AR_2(INS)
10 if len(INS) >= 2:
11 Xt = INS[-1] # 가장 최근 값
12 Xt_1 = INS[-2] # 그 이전 값
13 else:
14 raise ValueError("INS 길이가 2 미만입니다. AR(2) 모델에는 최소 2개의 데이터 포인트가 필요합니다.")
15 # AR(2) 모델을 사용한 예측값 계산
16 Fore_cast = hat_phi0 + hat_phi1 * Xt + hat_phi2 * Xt_1
17 Lr.append(Real_one)
18 Lf.append(Fore_cast)
19 Le.append(Real_one - Fore_cast)
20 Le = np.array(Le)
21 MAE = np.mean(np.abs(Le))
22 RMSE = (np.mean(Le**2))**0.5
23 print(f'MAE: {MAE}, RMSE: {RMSE}')
24 plt.figure(figsize=(10, 8))
25 plt.plot(Lr, 'b', label='Real value', marker='*')
26 plt.plot(Lf, 'r', label='Forecast value', marker='o')
27 plt.legend()
28 plt.show()
keyboard_arrow_down AR(3) Model
1 def AR_3(phi0, phi1, phi2, phi3, ss,n): # Xt = phi0 + phi1*X_[t-1] +phi2*X_[t-2] + phi3*X_[t-3] + et, ss는 noise의 분산
2 L = []
3 X0, X00, X000 = 0, 0, 0
4 for t in range(1, n+1): #t = 1,2,3 ... n
5 Xt = phi0 + phi1 *X0 + phi2* X00 + phi3 * X000 + np.random.normal(0,ss**0.5)
6 L.append(Xt)
7 X000 = X00
8 X00 = X0
9 X0 = Xt ###### 중요 초기값 변경
10 AR = L[100:] # n > 100, 앞부분 100개를 버리면서, 초기값 영향을 안받게 하는것
11 return AR
12
13
14 D3 = AR_3(1, 0.5, -0.2, 0.3, 1, 800)
15
16 def Gr(D,T): # T= title
17 plt.plot(D, 'b')
18 plt.title(T)
19 plt.show()
20
21 Gr(D3,'AR(3) model')
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 6/10
24. 6. 8. 오후 8:46 time_series_1.ipynb - Colab
AR(3) 모형의 OLSE
1 def OLSE_AR3(D):
2 n = len(D)
3 X = np.array(D) - np.mean(D) # 데이터 중심화
4 Z, xx = [], []
5 # AR(3) 모델을 위한 Z와 xx 벡터 준비
6 for t in range(3, n): # 이전 세 시점의 데이터를 사용
7 Zt = [X[t-1], X[t-2], X[t-3]]
8 Z.append(Zt)
9 xx.append(X[t])
10 Z, xx = np.array(Z), np.array(xx)
11 # OLSE 계산
12 First = np.dot(Z.T, Z)
13 Second = np.dot(Z.T, xx)
14 F_inv = np.linalg.inv(First)
15 olse = np.dot(F_inv, Second)
16 phi1, phi2, phi3 = olse
17 phi0 = np.mean(D) * (1 - phi1 - phi2- phi3)
18 return phi0, phi1, phi2, phi3
19
20 print(OLSE_AR3(D3)) #실제 모수 1, 0.5, -0.2, 0.3
21
AR(3) 모형의 Yule Walker estimator
1 def ACF(D):
2 X = D
3 n = len(D)
4 mu = np.mean(D)
5 L =[]
6 for h in range(21):
7 Lh = []
8 for t in range(0,n-h):
9 ac = (X[t+h]-mu) * (X[t]-mu)
10 Lh.append(ac)
11 autocov_h = np.mean(Lh)
12 L.append(autocov_h)
13 ACov = np.array(L)
14 Sacf = ACov / ACov[0] #분산으로 나눠줌
15 #plt.plot(Sacf, marker='^')
16 #plt.show()
17 return Sacf
18
19
20 def YW_AR_3(D):
21 X = np.array(D) - np.mean(D)
22 rho = ACF(D) # ACF 함수를 사용하여 rho 값을 계산
23 B = np.zeros((3, 1)) # 3 by 1 벡터
24 A = np.zeros((3, 3)) # 3 by 3 행렬
25 for i in range(3): # i = 0, 1, 2
26 B[i][0] = rho[i+1]
27 for j in range(3):
28 A[i][j] = rho[abs(i-j)]
29 A_inv = np.linalg.inv(A)
30 phi = np.dot(A_inv, B).flatten() # phi 값 계산
31 phi1, phi2, phi3 = phi
32 phi0 = np.mean(D) * (1 - phi1 - phi2 - phi3) # phi0 계산
33 return phi0, phi1, phi2, phi3
34 print(YW_AR_3(D3))
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 7/10
24. 6. 8. 오후 8:46 time_series_1.ipynb - Colab
1 def ERROR_one_AR3(D, m):
2 T = len(D)
3 Lr, Lf, Le = [],[],[]
4 n =T-m
5 for i in range(m):
6 INS = D[i:i+n]
7 Real_one = D[i+n]
8 olse = OLSE_AR3(INS)
9 Xt2, Xt1, Xt = INS[-3], INS[-2], INS[-1]
10 Fore_cast = olse[0] + olse[1] * Xt + olse[2]* Xt1 + olse[3] * Xt2
11 Lr.append(Real_one)
12 Lf.append(Fore_cast)
13 Le.append(Real_one-Fore_cast)
14 Le = np.array(Le)
15 MAE = np.mean(np.abs(Le))
16 RMSE = (np.mean(Le**2))**0.5
17 print(f'MAE:{MAE}, RMSE:{RMSE}')
18 #plt.figure(figsize =(10,8))
19 plt.plot(Lr, 'b', label = 'Real value', marker = '*')
20 plt.plot(Lf, 'r', label = 'Forecast value', marker='o')
21 plt.legend()
22 plt.show()
23
24 m = 30 # out of samplse size
25 ERROR_one_AR3(D3, m)
1 def ERROR2_one_AR3(D, m):
2 T = len(D)
3 Lr, Lf, Le = [],[],[]
4 n =T-m
5 for i in range(m):
6 INS = D[i:i+n]
7 Real_one = D[i+n]
8 olse = YW_AR_3(INS)
9 Xt2, Xt1, Xt = INS[-3], INS[-2], INS[-1]
10 Fore_cast = olse[0] + olse[1] * Xt + olse[2]* Xt1 + olse[3] * Xt2
11 Lr.append(Real_one)
12 Lf.append(Fore_cast)
13 Le.append(Real_one-Fore_cast)
14 Le = np.array(Le)
15 MAE = np.mean(np.abs(Le))
16 RMSE = (np.mean(Le**2))**0.5
17 print(f'MAE:{MAE}, RMSE:{RMSE}')
18 #plt.figure(figsize =(10,8))
19 plt.plot(Lr, 'b', label = 'Real value', marker = '*')
20 plt.plot(Lf, 'r', label = 'Forecast value', marker='o')
21 plt.legend()
22 plt.show()
23
24 m = 30 # out of samplse size
25 ERROR2_one_AR3(D3, m)
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 8/10
24. 6. 8. 오후 8:46 time_series_1.ipynb - Colab
1 import numpy as np
2 import matplotlib.pyplot as plt
3 from sklearn.metrics import mean_absolute_error, mean_squared_error
4
5 # YW_AR_1 함수 정의
6 def YW_AR_1(D):
7 # ACF 함수 구현 필요
8 sacf = ACF(D)
9 hat_phi1 = sacf[1]
10 hat_phi0 = np.mean(D) * (1 - sacf[1])
11 return hat_phi0, hat_phi1
12
13 # one step forecast 함수 수정
14 def ERROR2_one_step_forecast_YW(D, m): # m = out of sample size
15 T = len(D)
16 n = T - m # n 개의 데이터로 m 개의 out of sample forecast를 한다
17 Lr, Lf, Le = [], [], []
18 for i in range(m):
19 INS = D[i:i+n] # n개의 데이터
20 Real_one = D[i+n] # i+n 번째 실제값
21 # YW_AR_1 함수를 사용하여 파라미터 추정
22 yw_params = YW_AR_1(INS)
23 hat_phi0, hat_phi1 = yw_params
24 mu = np.mean(INS)
25 Xt = INS[-1] # the last observation
26 # one step forecast 계산
27 Fore_one = mu + hat_phi1 * (Xt - mu)
28 Lr.append(Real_one)
29 Lf.append(Fore_one)
30 Le.append(Real_one - Fore_one)
31 Le = np.array(Le)
32 Lr = np.array(Lr)
33 Lf = np.array(Lf)
34 MAE = np.mean(np.abs(Le))
35 MAPE = np.mean(np.abs(Le / Lr))
36 RMSE = np.sqrt(np.mean(Le**2))
37 print('MAE:{0}, MAPE:{1}, RMSE:{2}'.format(MAE, MAPE, RMSE))
38 plt.figure(figsize=(10, 8))
39 plt.plot(Lr, 'b', label='Real value', marker='*')
40 plt.plot(Lf, 'r', label='Forecast value', marker='o')
41 plt.legend()
42 plt.show()
43
44 # 데이터 예제
45 # D1 = np.random.rand(100) # 실제 데이터셋을 사용해야 합니다.
46 # ERROR2_one_step_forecast_YW(D1, 30) # 함수
47
내가 가진 데이터로 정상성 여부 판단.
1 df = pd.read_csv('seoul_temp.csv')
2 df.head(3)
1 df.isnull().sum()
데이터중 타겟 값인 평균 기온만 활용하고, 최근 300개의 데이터만을 활용
1 df1 = df[['평균습도']]
2 recent_1000 = df1.tail(1096)
3 recent_1000 = recent_1000.reset_index(drop=True)
4 recent_1000.head(3)
1 len(recent_1000)
1 plt.plot(recent_1000['평균습도'],color='red')
2 plt.show()
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 9/10
24. 6. 8. 오후 8:46 time_series_1.ipynb - Colab
1 import pandas as pd
2 from statsmodels.tsa.stattools import adfuller
3
4
5
6 # ADF 검정 수행
7 result = adfuller(recent_1000['평균습도'])
8 print('ADF Statistic: %f' % result[0])
9 print('p-value: %f' % result[1])
10 print('Critical Values:')
11 for key, value in result[4].items():
121 # ECDF 경험적 누적%.3f'
print('\t%s: 분포함수
% (key, value))
132 def Indicator(X,x):
143 if X <=
# 정상성 판단x:
154 ind =>10.05:
if result[1]
165 else:
print('시계열 데이터가 정상 시계열이 아님 (단위근이 있음)')
176 else: ind = 0
187 return ind
print('시계열 데이터가 정상 시계열임 (단위근이 없음)')
198
9 def ECDF(D):
10 X = D
11 MM,mm = max(D), min(D)
12 x = np.linspace(mm,MM,200)
13 print(mm,MM)
14 Fn_list = []
15 n = len(D)
16 for i in range(len(x)):
17 i_list = []
18 for t in range(len(X)):
19 I = Indicator(X[t],x[i])
20 i_list.append(I)
21 Fn = sum(i_list)/n # 각각의 x[i]에서의 ECDF
22 Fn_list.append(Fn)
23 plt.plot(x,Fn_list, 'r')
24 #plt.show()
25
26 def ECDF_N_CDF(D): #D=log return
27
28 x=np.linspace(min(D),max(D),200)
29
30 Fn=ECDF(D)
31
32 mean=np.mean(D)
33
34 var=np.var(D)
35
36 y=norm.cdf(x,mean,var**0.5) # 평균, 표준편차
37
38 plt plot(x y color='b')
https://colab.research.google.com/drive/1XzGySyzP_Z5BsYuPRjy_n65lDNmyl3od#scrollTo=G6a1R-iJHy5X 10/10