There are fives parts in this document:
#############################################################
# Part I. Some real time series data;
#############################################################
# I.1 monthly temperatures data
library(TSA) # install.packages("TSA")
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
data(tempdub) # access this data from TSA into R
plot(tempdub,ylab='Temperature',type='o') # Time plot of data
# I.2 color property data
data(color) # access this data from TSA into R
plot(color,ylab='Color Property',type='o') # Time plot of data
# I.3 Oil prices data
data(oil.price) # access this data from TSA into R
plot(oil.price, ylab='Price per Barrel',type='l') # Time plot of data
# I.4 US electricity**
par(mfrow=c(1,3),mai=c(0.8,0.8,0.8,0.1))
# release the dataset "electricity" from package "TSA", and plot it
data(electricity); plot(electricity);
#plot the sequence after log transformation
log.electricity <- log(electricity); plot(log.electricity);
#plot the sequence after log transformation and differencing
plot(diff(log.electricity))
# I.5 Air Passengers**
par(mfrow=c(1,2),mai=c(0.8,0.8,0.3,0.1))
data(AirPassengers)
plot(AirPassengers); plot(log(AirPassengers));
par(mfrow=c(1,1),mai=c(0.3,0.3,0.3,0.1))
# I.6 generate random walk time plots
n <- 100;
e <- rnorm(n); z <- as.numeric(); z[1] <- e[1]
for(i in 2:n){
z[i]=z[i-1]+e[i]
}
plot(z); lines(z);
#############################################################
# Part II. Model identification;
#############################################################
library(TSA)
# II.1 Moving average (MA) models
# II.1.1 Time plot of an MA model
n <- 100; e <- rnorm(n+1)
z <- as.numeric(); z[1] <- e[2]-0.5*e[1]
for(i in 2:n){
z[i]=e[i+1]-0.5*e[i]
}
par(mfrow=c(2,2),mai=c(0.8,0.8,0.8,0.1))
plot(z); lines(z)
# II.1.2 Population ACF and PACF of an MA model
coef.ma <- c(-0.5,-0.3);
acf.ma <- ARMAacf(ma=coef.ma,lag.max=20, pacf=FALSE);
plot(y=acf.ma,x=c(1:length(acf.ma)-1), ylab='ACF', xlab='Lag'); lines(y=acf.ma,x=c(1:length(acf.ma)-1),type='h'); abline(h=0)
# II.1.3 Sample ACF and sample PACF of an MA model
acf(z,ci.type='ma')
pacf(z)
par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))
# II.2 Autoregressive (AR) models
# II.2.1 Time plot of an AR model
n <- 1000; order.p <- 1; n.offset <- 10;
e <- rnorm(n+n.offset)
z <- as.numeric(); z <- e[1:order.p]
for(i in (order.p+1):(n+n.offset)){
z[i]=0.5*z[i-1]+e[i]
}
par(mfrow=c(2,2),mai=c(0.8,0.8,0.8,0.1))
plot(z); lines(z)
# II.2.2 ACF and PACF of an AR model
coef.ar <- c(-0.5,-0.25);
acf.ar <- ARMAacf(ar=coef.ar,lag.max=20, pacf=FALSE);
plot(y=acf.ar,x=c(1:length(acf.ar)-1), ylab='ACF', xlab='Lag'); lines(y=acf.ar,x=c(1:length(acf.ar)-1),type='h'); abline(h=0)
# II.2.3 Sample ACF and sample PACF of an MA model
acf(z,ci.type='ma')
pacf(z)
par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))
# II.3 Nonstationary time series
# II.3.1 Analyzing the oil prices
# II.3.1.1 The original sequence together with its ACF and PACF
data(oil.price)
plot(oil.price);
par(mfrow=c(1,3),mai=c(0.8,0.8,0.8,0.1))
plot(oil.price); acf(oil.price,main=''); pacf(oil.price)
# II.3.1.2 The logged sequence together with its ACF and PACF
l.oil.price <- log(oil.price)
plot(l.oil.price); acf(l.oil.price); pacf(l.oil.price)
# II.3.1.3 The log and differenced sequence together with its ACF and PACF
dl.oil.price <- diff(l.oil.price)
plot(dl.oil.price); acf(dl.oil.price); pacf(dl.oil.price)
par(mfrow=c(1,1))
# II.3.2 ADF tests
library(aTSA)
##
## Attaching package: 'aTSA'
## The following object is masked from 'package:graphics':
##
## identify
adf.test(oil.price, nlag=5)
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 1.53 0.968
## [2,] 1 1.35 0.955
## [3,] 2 1.74 0.979
## [4,] 3 1.96 0.988
## [5,] 4 2.36 0.990
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 0.904 0.990
## [2,] 1 0.260 0.976
## [3,] 2 0.806 0.990
## [4,] 3 1.144 0.990
## [5,] 4 1.916 0.990
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -0.661 0.973
## [2,] 1 -0.932 0.948
## [3,] 2 -0.369 0.987
## [4,] 3 -0.037 0.990
## [5,] 4 0.616 0.990
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
#############################################################
# Part III Parameter estimation;
#############################################################
# III.1 Generate an AR(1) process
n <- 100; order.p <- 1; n.offset <- 10;
e <- rnorm(n+n.offset)
z <- as.numeric(); z <- e[1:order.p]
for(i in (order.p+1):(n+n.offset)){
z[i]=0.5*z[i-1]+e[i]
}
# III.2 Draw time plot, sample ACF and sample PACF
par(mfrow=c(1,3),mai=c(0.8,0.8,0.8,0.1))
plot(z); lines(z)
acf(z,ci.type='ma')
pacf(z)
par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))
# III.3 Fit an ARIMA(0,0,1) model with ML method
m.fit <- arima(z, order=c(0,0,1), method='ML')
# III.3.1 We may try different etimation method for different models
arima(z, order=c(0,0,1), method='CSS') # CSS method for ARIMA(0,0,1)
##
## Call:
## arima(x = z, order = c(0, 0, 1), method = "CSS")
##
## Coefficients:
## ma1 intercept
## 0.4626 0.1230
## s.e. 0.0691 0.1443
##
## sigma^2 estimated as 1.079: part log likelihood = -160.28
arima(z, order=c(16,0,1), method='ML') # ML method for ARIMA(16,0,1)
##
## Call:
## arima(x = z, order = c(16, 0, 1), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.6531 -0.0074 -0.0068 -0.0890 -0.0429 0.1777 -0.1059 0.1324
## s.e. 0.3235 0.2027 0.1140 0.1117 0.1107 0.1166 0.1221 0.1206
## ar9 ar10 ar11 ar12 ar13 ar14 ar15 ar16
## -0.1437 0.1577 0.0950 -0.2007 -0.0262 -0.1146 0.2580 -0.2528
## s.e. 0.1179 0.1195 0.1207 0.1253 0.1334 0.1245 0.1265 0.1090
## ma1 intercept
## -0.0762 0.1790
## s.e. 0.3305 0.1639
##
## sigma^2 estimated as 0.8359: log likelihood = -147.54, aic = 331.09
arima(z, order=c(0,0,1)) # the default is to use CSS to find starting values, then ML.
##
## Call:
## arima(x = z, order = c(0, 0, 1))
##
## Coefficients:
## ma1 intercept
## 0.4620 0.1249
## s.e. 0.0696 0.1444
##
## sigma^2 estimated as 1.079: log likelihood = -160.37, aic = 324.73
#############################################################
# Part IV Model diagnostics;
#############################################################
# IV.1 Time plot of residuals
plot(m.fit$residuals, ylab='Residuals'); abline(h=0)
# IV.2 Normality checking
qqnorm(m.fit$residuals); qqline(m.fit$residuals);
hist(m.fit$residuals)
# IV.3 Autocorrelation checking to check residual autocorrelations individually
par(mfrow=c(1,2),mai=c(0.8,0.8,0.8,0.1))
acf(m.fit$residuals); pacf(m.fit$residuals)
par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))
# IV.4 Box-Pierce or Ljung-Box test to check residual autocorrelations jointly
Box.test(m.fit$residuals, lag = 10, type = c("Ljung-Box"), fitdf = 1)
##
## Box-Ljung test
##
## data: m.fit$residuals
## X-squared = 15.416, df = 9, p-value = 0.08013
#Box.test(m.fit$residuals, lag = 10, type = c("Box-Pierce"), fitdf = 1)
# IV.4.1 A useful function
library(TSA)
tsdiag(m.fit,gof=15)
#############################################################
# Part V. Forecasting;
#############################################################
# V.1 Generate AR(1) process
n <- 100; order.p <- 1; n.offset <- 10;
e <- rnorm(n+n.offset)
z <- as.numeric(); z <- e[1:order.p]
for(i in (order.p+1):(n+n.offset)){
z[i]=0.5*z[i-1]+e[i]
}
z <- z[(n.offset+1):(n.offset+n)]
# V.2 Draw time plot, Sample ACF and sample PACF, and then fit a model
par(mfrow=c(1,3),mai=c(0.8,0.8,0.8,0.1))
plot(z); lines(z)
acf(z,ci.type='ma')
pacf(z)
par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))
ar <- arima(z, order=c(1,0,0), method='ML'); ar;
##
## Call:
## arima(x = z, order = c(1, 0, 0), method = "ML")
##
## Coefficients:
## ar1 intercept
## 0.4865 -0.1761
## s.e. 0.0879 0.1876
##
## sigma^2 estimated as 0.9444: log likelihood = -139.17, aic = 282.33
# V.3 Predict the next 6 observations
n.head <- 6
ar.pre <- predict(ar,n.ahead=n.head)
# V.4 Present the prediction on the plot
plot(y=z,x=1:n,type='l',xlim=c(-1,(n+n.head+2)))
lines(y=c(z[n],ar.pre$pred),x=c((n):(n+n.head)),col=4)
points(y=ar.pre$pred,x=c((n+1):(n+n.head)),col=4)
lines(y=ar.pre$pred+0.96*ar.pre$se,x=c((n+1):(n+n.head)),col=4)
points(y=ar.pre$pred+0.96*ar.pre$se,x=c((n+1):(n+n.head)),col=4)
lines(y=ar.pre$pred-0.96*ar.pre$se,x=c((n+1):(n+n.head)),col=4)
points(y=ar.pre$pred-0.96*ar.pre$se,x=c((n+1):(n+n.head)),col=4)
#############################################################
# Part VI. Conditional heteroscedastic time series models;
#############################################################
# Set the physical path
setwd("C:/Users/ligd/Google Drive/Presenations/Teaching/Rcode/")
# Read the file into R software
hsi <- read.csv(file = 'HSI.csv'); head(hsi)
## Date Close return
## 1 11/18/2010 23637.39 NA
## 2 11/19/2010 23605.71 -0.000582448
## 3 11/22/2010 23524.02 -0.001505553
## 4 11/23/2010 22896.14 -0.011749248
## 5 11/24/2010 23023.86 0.002415840
## 6 11/25/2010 23054.68 0.000580969
hsi.return <- hsi$return[2:length(hsi$return)]
# Time plot of daily closing prices
plot(hsi$Close, type='l', ylab='Price', xlab='Time')
# Time plot of log returns
plot(hsi.return, type='l', ylab='Log Return', xlab='Time')
# Sample ACF and PACF of log returns
par(mfrow=c(1,2),mai=c(0.8,0.8,0.1,0.1))
acf(hsi.return, lag=100, main=''); pacf(hsi.return, lag=100, main='')
par(mfrow=c(1,1),mai=c(0.8,0.8,0.1,0.1))
# Sample ACF and PACF of squared log returns
hsi.return2 <- hsi.return*hsi.return
par(mfrow=c(1,2),mai=c(0.8,0.8,0.1,0.1))
acf(hsi.return2, lag=100, main=''); pacf(hsi.return2, lag=100, main='')
par(mfrow=c(1,1),mai=c(0.8,0.8,0.1,0.1))
# Fitting an GARCH model (need the package of "fGarch")
library(fGarch)
## NOTE: Packages 'fBasics', 'timeDate', and 'timeSeries' are no longer
## attached to the search() path when 'fGarch' is attached.
##
## If needed attach them yourself in your R script by e.g.,
## require("timeSeries")
gfit <- garchFit(~garch(1,1), data=hsi.return, trace=FALSE, cond.dist = c("norm"),
include.mean = FALSE)
summary(gfit)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(1, 1), data = hsi.return, cond.dist = c("norm"),
## include.mean = FALSE, trace = FALSE)
##
## Mean and Variance Equation:
## data ~ garch(1, 1)
## <environment: 0x000000002112e2b0>
## [data = hsi.return]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## omega alpha1 beta1
## 3.8186e-07 4.7856e-02 9.3711e-01
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## omega 3.819e-07 1.119e-07 3.413 0.000642 ***
## alpha1 4.786e-02 6.922e-03 6.914 4.71e-12 ***
## beta1 9.371e-01 9.433e-03 99.344 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 9627.317 normalized: 3.921514
##
## Description:
## Sun Nov 13 10:12:43 2022 by user: ligd
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 273.2475 0
## Shapiro-Wilk Test R W 0.9844884 1.012076e-15
## Ljung-Box Test R Q(10) 13.23267 0.2109525
## Ljung-Box Test R Q(15) 16.94404 0.3222205
## Ljung-Box Test R Q(20) 20.02622 0.4562906
## Ljung-Box Test R^2 Q(10) 10.92054 0.3637425
## Ljung-Box Test R^2 Q(15) 11.40044 0.7237261
## Ljung-Box Test R^2 Q(20) 16.33553 0.6956028
## LM Arch Test R TR^2 11.20496 0.5114401
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -7.840584 -7.833489 -7.840587 -7.838006
# Predict the next 10 values
predict(gfit, n.ahead = 10, plot=TRUE, conf=.95, nx=100)
## meanForecast meanError standardDeviation lowerInterval upperInterval
## 1 0 0.004849328 0.004849328 -0.009504508 0.009504508
## 2 0 0.004852237 0.004852237 -0.009510210 0.009510210
## 3 0 0.004855100 0.004855100 -0.009515822 0.009515822
## 4 0 0.004857919 0.004857919 -0.009521347 0.009521347
## 5 0 0.004860694 0.004860694 -0.009526785 0.009526785
## 6 0 0.004863426 0.004863426 -0.009532139 0.009532139
## 7 0 0.004866115 0.004866115 -0.009537409 0.009537409
## 8 0 0.004868762 0.004868762 -0.009542597 0.009542597
## 9 0 0.004871367 0.004871367 -0.009547705 0.009547705
## 10 0 0.004873933 0.004873933 -0.009552733 0.009552733