R codes for time series modeling

There are fives parts in this document:

Part I. Some real time series data;
Part II. Model identification;
Part III Parameter estimation;
Part IV Model diagnostics;
Part V. Forecasting
Part VI. Conditional heteroscedastic time series models

Part I. Some real time series data;

#############################################################
#  Part I. Some real time series data;
#############################################################
# I.1 monthly temperatures data
library(TSA)  # install.packages("TSA")

## 
## Attaching package: 'TSA'

## The following objects are masked from 'package:stats':
## 
##     acf, arima

## The following object is masked from 'package:utils':
## 
##     tar

data(tempdub) # access this data from TSA into R
plot(tempdub,ylab='Temperature',type='o') # Time plot of data

# I.2 color property data
data(color) # access this data from TSA into R
plot(color,ylab='Color Property',type='o') # Time plot of data

# I.3 Oil prices data
data(oil.price) # access this data from TSA into R
plot(oil.price, ylab='Price per Barrel',type='l') # Time plot of data

# I.4 US electricity**
par(mfrow=c(1,3),mai=c(0.8,0.8,0.8,0.1))
# release the dataset "electricity" from package "TSA", and plot it
data(electricity); plot(electricity); 
#plot the sequence after log transformation
log.electricity <- log(electricity); plot(log.electricity);
#plot the sequence after log transformation and differencing
plot(diff(log.electricity))

# I.5 Air Passengers**
par(mfrow=c(1,2),mai=c(0.8,0.8,0.3,0.1))
data(AirPassengers)
plot(AirPassengers); plot(log(AirPassengers));

par(mfrow=c(1,1),mai=c(0.3,0.3,0.3,0.1))

# I.6 generate random walk time plots
n <- 100; 
e <- rnorm(n); z <- as.numeric(); z[1] <- e[1]
for(i in 2:n){
 z[i]=z[i-1]+e[i]
}
plot(z); lines(z);

Part II. Model identification;

#############################################################
#  Part II. Model identification;
#############################################################
library(TSA)
# II.1   Moving average (MA) models
# II.1.1 Time plot of an MA model
n <- 100; e <- rnorm(n+1)
z <- as.numeric(); z[1] <- e[2]-0.5*e[1]
for(i in 2:n){
  z[i]=e[i+1]-0.5*e[i]
}
par(mfrow=c(2,2),mai=c(0.8,0.8,0.8,0.1))
plot(z); lines(z)

# II.1.2 Population ACF and PACF of an MA model
coef.ma <- c(-0.5,-0.3);
acf.ma <- ARMAacf(ma=coef.ma,lag.max=20, pacf=FALSE);
plot(y=acf.ma,x=c(1:length(acf.ma)-1), ylab='ACF', xlab='Lag'); lines(y=acf.ma,x=c(1:length(acf.ma)-1),type='h'); abline(h=0)

# II.1.3 Sample ACF and sample PACF of an MA model
acf(z,ci.type='ma')
pacf(z)

par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))

# II.2   Autoregressive (AR) models
# II.2.1 Time plot of an AR model
n <- 1000; order.p <- 1; n.offset <- 10;
e <- rnorm(n+n.offset)
z <- as.numeric(); z <- e[1:order.p]
for(i in (order.p+1):(n+n.offset)){
  z[i]=0.5*z[i-1]+e[i]
}

par(mfrow=c(2,2),mai=c(0.8,0.8,0.8,0.1))
plot(z); lines(z)

# II.2.2 ACF and PACF of an AR model
coef.ar <- c(-0.5,-0.25);
acf.ar <- ARMAacf(ar=coef.ar,lag.max=20, pacf=FALSE);
plot(y=acf.ar,x=c(1:length(acf.ar)-1), ylab='ACF', xlab='Lag'); lines(y=acf.ar,x=c(1:length(acf.ar)-1),type='h'); abline(h=0)

# II.2.3 Sample ACF and sample PACF of an MA model
acf(z,ci.type='ma')
pacf(z)

par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))

# II.3  Nonstationary time series
# II.3.1 Analyzing the oil prices
# II.3.1.1 The original sequence together with its ACF and PACF
data(oil.price)
plot(oil.price);

par(mfrow=c(1,3),mai=c(0.8,0.8,0.8,0.1))
plot(oil.price); acf(oil.price,main=''); pacf(oil.price)

# II.3.1.2 The logged sequence together with its ACF and PACF
l.oil.price <- log(oil.price)
plot(l.oil.price); acf(l.oil.price); pacf(l.oil.price)

# II.3.1.3 The log and differenced sequence together with its ACF and PACF
dl.oil.price <- diff(l.oil.price)
plot(dl.oil.price); acf(dl.oil.price); pacf(dl.oil.price)

par(mfrow=c(1,1))

# II.3.2 ADF tests
library(aTSA)

## 
## Attaching package: 'aTSA'

## The following object is masked from 'package:graphics':
## 
##     identify

adf.test(oil.price, nlag=5)

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag  ADF p.value
## [1,]   0 1.53   0.968
## [2,]   1 1.35   0.955
## [3,]   2 1.74   0.979
## [4,]   3 1.96   0.988
## [5,]   4 2.36   0.990
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 0.904   0.990
## [2,]   1 0.260   0.976
## [3,]   2 0.806   0.990
## [4,]   3 1.144   0.990
## [5,]   4 1.916   0.990
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -0.661   0.973
## [2,]   1 -0.932   0.948
## [3,]   2 -0.369   0.987
## [4,]   3 -0.037   0.990
## [5,]   4  0.616   0.990
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

Part III Parameter estimation;

#############################################################
#  Part III Parameter estimation;
#############################################################
# III.1 Generate an AR(1) process
n <- 100; order.p <- 1; n.offset <- 10;
e <- rnorm(n+n.offset)
z <- as.numeric(); z <- e[1:order.p]
for(i in (order.p+1):(n+n.offset)){
  z[i]=0.5*z[i-1]+e[i]
}

# III.2 Draw time plot, sample ACF and sample PACF
par(mfrow=c(1,3),mai=c(0.8,0.8,0.8,0.1))
plot(z); lines(z)
acf(z,ci.type='ma')
pacf(z)

par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))

# III.3 Fit an ARIMA(0,0,1) model with ML method
m.fit <- arima(z, order=c(0,0,1), method='ML')

# III.3.1 We may try different etimation method for different models
arima(z, order=c(0,0,1), method='CSS') # CSS method for ARIMA(0,0,1)

## 
## Call:
## arima(x = z, order = c(0, 0, 1), method = "CSS")
## 
## Coefficients:
##          ma1  intercept
##       0.4626     0.1230
## s.e.  0.0691     0.1443
## 
## sigma^2 estimated as 1.079:  part log likelihood = -160.28

arima(z, order=c(16,0,1), method='ML') # ML method for ARIMA(16,0,1)

## 
## Call:
## arima(x = z, order = c(16, 0, 1), method = "ML")
## 
## Coefficients:
##          ar1      ar2      ar3      ar4      ar5     ar6      ar7     ar8
##       0.6531  -0.0074  -0.0068  -0.0890  -0.0429  0.1777  -0.1059  0.1324
## s.e.  0.3235   0.2027   0.1140   0.1117   0.1107  0.1166   0.1221  0.1206
##           ar9    ar10    ar11     ar12     ar13     ar14    ar15     ar16
##       -0.1437  0.1577  0.0950  -0.2007  -0.0262  -0.1146  0.2580  -0.2528
## s.e.   0.1179  0.1195  0.1207   0.1253   0.1334   0.1245  0.1265   0.1090
##           ma1  intercept
##       -0.0762     0.1790
## s.e.   0.3305     0.1639
## 
## sigma^2 estimated as 0.8359:  log likelihood = -147.54,  aic = 331.09

arima(z, order=c(0,0,1)) # the default is to use CSS to find starting values, then ML.

## 
## Call:
## arima(x = z, order = c(0, 0, 1))
## 
## Coefficients:
##          ma1  intercept
##       0.4620     0.1249
## s.e.  0.0696     0.1444
## 
## sigma^2 estimated as 1.079:  log likelihood = -160.37,  aic = 324.73

Part IV Model diagnostics;

#############################################################
#  Part IV Model diagnostics;
#############################################################
# IV.1 Time plot of residuals
plot(m.fit$residuals, ylab='Residuals'); abline(h=0)

# IV.2 Normality checking
qqnorm(m.fit$residuals); qqline(m.fit$residuals);

hist(m.fit$residuals)

# IV.3 Autocorrelation checking to check residual autocorrelations individually
par(mfrow=c(1,2),mai=c(0.8,0.8,0.8,0.1))
acf(m.fit$residuals); pacf(m.fit$residuals)

par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))

# IV.4 Box-Pierce or Ljung-Box test to check residual autocorrelations jointly
Box.test(m.fit$residuals, lag = 10, type = c("Ljung-Box"), fitdf = 1)

## 
##  Box-Ljung test
## 
## data:  m.fit$residuals
## X-squared = 15.416, df = 9, p-value = 0.08013

#Box.test(m.fit$residuals, lag = 10, type = c("Box-Pierce"), fitdf = 1)

# IV.4.1 A useful function
library(TSA)
tsdiag(m.fit,gof=15)

Part V. Forecasting;

#############################################################
#  Part V. Forecasting;
#############################################################
# V.1 Generate AR(1) process
n <- 100; order.p <- 1; n.offset <- 10;
e <- rnorm(n+n.offset)
z <- as.numeric(); z <- e[1:order.p]
for(i in (order.p+1):(n+n.offset)){
  z[i]=0.5*z[i-1]+e[i]
}
z <- z[(n.offset+1):(n.offset+n)]

# V.2 Draw time plot, Sample ACF and sample PACF, and then fit a model
par(mfrow=c(1,3),mai=c(0.8,0.8,0.8,0.1))
plot(z); lines(z)
acf(z,ci.type='ma')
pacf(z)

par(mfrow=c(1,1),mai=c(0.8,0.8,0.8,0.1))
ar <- arima(z, order=c(1,0,0), method='ML'); ar;

## 
## Call:
## arima(x = z, order = c(1, 0, 0), method = "ML")
## 
## Coefficients:
##          ar1  intercept
##       0.4865    -0.1761
## s.e.  0.0879     0.1876
## 
## sigma^2 estimated as 0.9444:  log likelihood = -139.17,  aic = 282.33

# V.3 Predict the next 6 observations
n.head <- 6
ar.pre <- predict(ar,n.ahead=n.head)

# V.4 Present the prediction on the plot
plot(y=z,x=1:n,type='l',xlim=c(-1,(n+n.head+2)))
lines(y=c(z[n],ar.pre$pred),x=c((n):(n+n.head)),col=4)
points(y=ar.pre$pred,x=c((n+1):(n+n.head)),col=4)
lines(y=ar.pre$pred+0.96*ar.pre$se,x=c((n+1):(n+n.head)),col=4)
points(y=ar.pre$pred+0.96*ar.pre$se,x=c((n+1):(n+n.head)),col=4)
lines(y=ar.pre$pred-0.96*ar.pre$se,x=c((n+1):(n+n.head)),col=4)
points(y=ar.pre$pred-0.96*ar.pre$se,x=c((n+1):(n+n.head)),col=4)

Part VI. Conditional heteroscedastic time series models

#############################################################
# Part VI. Conditional heteroscedastic time series models;
#############################################################

# Set the physical path
setwd("C:/Users/ligd/Google Drive/Presenations/Teaching/Rcode/")
# Read the file into R software
hsi <- read.csv(file = 'HSI.csv'); head(hsi)

##         Date    Close       return
## 1 11/18/2010 23637.39           NA
## 2 11/19/2010 23605.71 -0.000582448
## 3 11/22/2010 23524.02 -0.001505553
## 4 11/23/2010 22896.14 -0.011749248
## 5 11/24/2010 23023.86  0.002415840
## 6 11/25/2010 23054.68  0.000580969

hsi.return <- hsi$return[2:length(hsi$return)]
# Time plot of daily closing prices
plot(hsi$Close, type='l', ylab='Price', xlab='Time')

# Time plot of log returns
plot(hsi.return, type='l', ylab='Log Return', xlab='Time')

# Sample ACF and PACF of log returns
par(mfrow=c(1,2),mai=c(0.8,0.8,0.1,0.1))
acf(hsi.return, lag=100, main=''); pacf(hsi.return, lag=100, main='')

par(mfrow=c(1,1),mai=c(0.8,0.8,0.1,0.1))

# Sample ACF and PACF of squared log returns
hsi.return2 <- hsi.return*hsi.return
par(mfrow=c(1,2),mai=c(0.8,0.8,0.1,0.1))
acf(hsi.return2, lag=100, main=''); pacf(hsi.return2, lag=100, main='')

par(mfrow=c(1,1),mai=c(0.8,0.8,0.1,0.1))

# Fitting an GARCH model (need the package of "fGarch")
library(fGarch)

## NOTE: Packages 'fBasics', 'timeDate', and 'timeSeries' are no longer
## attached to the search() path when 'fGarch' is attached.
## 
## If needed attach them yourself in your R script by e.g.,
##         require("timeSeries")

gfit <- garchFit(~garch(1,1), data=hsi.return, trace=FALSE, cond.dist = c("norm"),
include.mean = FALSE)
summary(gfit)

## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(1, 1), data = hsi.return, cond.dist = c("norm"), 
##     include.mean = FALSE, trace = FALSE) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 1)
## <environment: 0x000000002112e2b0>
##  [data = hsi.return]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##      omega      alpha1       beta1  
## 3.8186e-07  4.7856e-02  9.3711e-01  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## omega  3.819e-07   1.119e-07    3.413 0.000642 ***
## alpha1 4.786e-02   6.922e-03    6.914 4.71e-12 ***
## beta1  9.371e-01   9.433e-03   99.344  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  9627.317    normalized:  3.921514 
## 
## Description:
##  Sun Nov 13 10:12:43 2022 by user: ligd 
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value     
##  Jarque-Bera Test   R    Chi^2  273.2475  0           
##  Shapiro-Wilk Test  R    W      0.9844884 1.012076e-15
##  Ljung-Box Test     R    Q(10)  13.23267  0.2109525   
##  Ljung-Box Test     R    Q(15)  16.94404  0.3222205   
##  Ljung-Box Test     R    Q(20)  20.02622  0.4562906   
##  Ljung-Box Test     R^2  Q(10)  10.92054  0.3637425   
##  Ljung-Box Test     R^2  Q(15)  11.40044  0.7237261   
##  Ljung-Box Test     R^2  Q(20)  16.33553  0.6956028   
##  LM Arch Test       R    TR^2   11.20496  0.5114401   
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -7.840584 -7.833489 -7.840587 -7.838006

# Predict the next 10 values
predict(gfit, n.ahead = 10, plot=TRUE, conf=.95, nx=100)

##    meanForecast   meanError standardDeviation lowerInterval upperInterval
## 1             0 0.004849328       0.004849328  -0.009504508   0.009504508
## 2             0 0.004852237       0.004852237  -0.009510210   0.009510210
## 3             0 0.004855100       0.004855100  -0.009515822   0.009515822
## 4             0 0.004857919       0.004857919  -0.009521347   0.009521347
## 5             0 0.004860694       0.004860694  -0.009526785   0.009526785
## 6             0 0.004863426       0.004863426  -0.009532139   0.009532139
## 7             0 0.004866115       0.004866115  -0.009537409   0.009537409
## 8             0 0.004868762       0.004868762  -0.009542597   0.009542597
## 9             0 0.004871367       0.004871367  -0.009547705   0.009547705
## 10            0 0.004873933       0.004873933  -0.009552733   0.009552733

R codes for time series modeling

Time Series Analysis and Forecasting (STAT4601 & STAT8003)

Part I. Some real time series data;

Part II. Model identification;

Part III Parameter estimation;

Part IV Model diagnostics;

Part V. Forecasting;

Part VI. Conditional heteroscedastic time series models