# ANALYSIS OF Nile DATA

# GET HELP ON SOMETHING CALLED 'nile' - DOESN'T WORK
?nile
# THIS WORKS
??nile

# -------------------- DOWNLOAD -------------------- 
# HOW LEE CREATED THE TEXT FILE DOWLOADED IN NEXT STEP
# NOT RUN...
if (F) print(
  as.data.frame(Nile), 
  row.names = F
)

# DOWNLOAD THE DATA
nile <- read.table('http://ldecola.net/files/nile.txt')
# GIVES A DATA.FRAME
head(nile)
# WE WANT FIRST (AND ONLY) COLUMN
# CONVERT TO A 'SMALLER' NUMBER (SEE Nile HELP PAGE)
nile.ts <- ts(
  nile[,1] / 100,
  1871
)

# ------------------ HISTOGRAMS ------------------ 
# A TIME SERIES
str(nile.ts)
# LABEL FOR GRAPHICS
label <- 'billion cubic meters'
# SUMMARY
summary(nile.ts)
# DEFAULT HISTOGRAM
hist(nile.ts)
# TITLE FOR PLOTS
title <- 'Annual flow of the Nile at Aswan'
# CHARACTER EXPANSION
par(cex = 1.3)
# BETTER HISTO
hist(
  nile.ts, 
  xlim = c(0, 15),
  main = title, 
  xlab = label,
  ylab = 'years'
)

# ---------------------- PLOTS ----------------------
# DEFAULT PLOT
plot(nile.ts)
# GRAPHIC LIMITS FOR 'Y' VARIABLE
ylim <-c(0, 15)
# AND FOR 'X' VARIABLE (TIME)
xlim <- c(1860, 1980)
# DESIGNED PLOT
plot(
  nile.ts,
  type = 'o', pch = 20, lwd = 2,
#  col = 'blue', # DOES COLOR HELP?
  las = 1,
  xlim = xlim,  ylim = ylim,
  ylab = label,
  xlab = paste(
    range(time(nile.ts)), 
    collapse = ' to '
  ),
  main = title
)
# DOES THIS HELP?
axis(1, seq(xlim[1], xlim[2], 10))

ptype <- c(
  default = 'l',
  points = 'p',
  overplot = 'o',
  nothing = 'n'
)
# FUNCTION
myplot <- function(t = 'l') plot(
  nile.ts,
  ylim = ylim,
  type = t,
  main = paste('type =', t),
  xlab = '', ylab = ''
)
par(mfrow = c(2, 2))
for(t in c('l', 'p', 'o', 'n')) myplot(t)

# WHY WOULD WE WANT NOTHING?
par(mfrow = c(1, 1))
# FRAME FOR THE DATA
myplot('n')
# LINES AT 4 LEVELS
abline(h = 5 * 0:3, col = 'gray', lwd = 2, lty = 2)
# ADD DATA _OVER_ THE LINES
lines(nile.ts, lwd = 3, col = 'blue')
# TITLE IN THE PLOT AREA
text(1920, 14, title, col = 'blue', cex = 1.5)

# -------------------- FILTER --------------------
plot(
  nile.ts, type = 'p', 
  main = 'data', 
  pch = 20, cex = 1.3,
  ylim = ylim
)
width <- 5 # HALF-DECADE - TRY OTHERS 1, 10, 
f <- filter(nile.ts,  rep(1, width) / width)
str(f)
plot(
  f,
  col = 'red', 
  lwd = 2,
  ylim = ylim
)
title(paste(width, 'year moving average'), col.main = 'red')

plot(nile.ts, type = 'p', pch = 20, cex = 1.3,
     ylim = ylim
)
lines(
  f,
  col = 'red', 
  lwd = 2
)
title(
  paste(width, 'year moving average'), 
  col.main = 'red'
)
# GO BACK AND RUN THE 3 FILTER PLOTS AS 'MULTI-FRAME'
par(mfrow = c(3, 1))

# ------------------ REGRESSION ------------------ 
# time() IS A TIME SERIES
time(nile.ts)
summary(time(nile.ts))
# ITS VALUE AT EACH YEAR IS THE INTEGER
plot(time(nile.ts))
# REGRESSION
??regression
# LINEAR (REGRESSION) MODEL
?lm
# TIME (YEAR) AS 'INDEPENDENT' VARIABLE
lm(nile.ts ~ time(nile.ts))
# STORE THE RESULT
fit <- lm(nile.ts ~ time(nile.ts))
# A BRIEF REPORT
fit
# WHAT'S IN IT
names(fit)
# THE COEFFICIENTS
fit$coeff
# PREDICTED CHANGE OVER PERIOD
fit$coef[2] * length(nile.ts)
# ANOTHER WAY - WHY NOT EXACTLY THE SAME ANSWER?
diff(fit$fitted[c(1, length(nile.ts))])
# FULLER REPORT - NOTE R-squared
summary(fit)

# PLOT AS POINTS
plot(
  nile.ts, 
  type = 'p', xlab = '',
  ylab = 'billion cubic feet'
)
abline(fit, col = 'red', lty = 2, lwd = 2)
title('linear regression', 1, col.main = 'red')
text(
  1920, 12,
  paste('R-SQUARE', round(summary(fit)$r.sq, 2)),
  col = 'red'
)
# RESIDUALS (HOW TO MAKE A TIME SERIES)
resid.ts <- ts(
  fit$resid,
  start(nile.ts)
)
par(mfrow = c(2, 1))
hist(nile.ts - mean(nile.ts), 
  xlim = 5 * c(-1, 1)
)
hist(resid.ts, xlim = 5 * c(-1, 1))
par(mfrow = c(1, 1))
plot(
  resid.ts,
  ylim = 5 * c(-1, 1),
  lwd = 2,
  type = 'h', main = 'residuals', xlab = '',
  ylab = 'meters'
)
abline(h = 0)
points(nile.ts - mean(nile.ts), pch = 20, col = 'red')

# -------------------- FORECAST -------------------- 
# NEED PACKAGE
library(forecast)
fore <- forecast(nile.ts)
# THE POINT FORECAST - DOESN'T CHANGE
fore
# IT PLOTS WITH THE DATA
plot(fore)
# ADD THE MEAN
abline(h = mean(nile.ts))
# AND THE REGRESSION LINE
abline(fit)

# *** END ***