Data Science
Non-stationery price dataset

Non-stationery price dataset

import os
os.chdir(r"C:\Users\haderer\Desktop\priceindex")
cwd= os.getcwd() 
print("Current working directory is:", cwd) 

import pandas as pd
import pandas_datareader
from pandas_datareader import data
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.dates import DateFormatter
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA

import matplotlib.pyplot as plt

df = pd.read_csv('msciemret.csv', parse_dates=['Date'])
df = df.replace(',','', regex=True)
df['Price'] = df['Price'].astype(float)
print(df['Price'].std())

# convert to datetime

df['Date']= pd.to_datetime(df['Date'], format='%b %y')
df.info()
df = df.set_index('Date')
df.sort_values(by=['Date'], inplace=True)

print(df)
df=df.dropna()
print(df.index)

Dataset:

timeseries=df['Price']

from statsmodels.tsa.stattools import adfuller

def test_stationarity (timeseries):
    
    #roling statistics
    movingAverage = timeseries.rolling(window=12).mean()
    movingSTD=  timeseries.rolling(window=12).std()
    orig= plt.plot(timeseries, color='blue',label='Original')
    mean= plt.plot(movingAverage,color='red', label='Rolling Mean')
    std= plt.plot(movingSTD, color='black', label= 'Rolling STD')
    plt.legend(loc='best')
    plt.show(block=False)


    dftest = adfuller(timeseries, autolag ='AIC')
    print("1.ADF : ", dftest[0])
    print("2.P-Value:",dftest[1])
    print("3. num of lags:", dftest[2])
    print("4. Num of observations used for adfregression and critical value calc ",dftest[3])
    print("5.Critical Values:")
    for key, val in dftest[4].items():
        print("\t",key, ": ", val)

print(test_stationarity(timeseries))
timeseries.plot()

Output Graph /ADF:

plot_acf(df['Price'])
#series decay slowly -> indicator for non-stationary

Output graph: