# SIMULATE A SIMPLE TIME SERIES AND RECOVER ITS MODEL
# USED IN LAB FOR UMBC 2017 GES 679-01
# ------------------ CALCULATOR ------------------
# A NUMBER
6378
# SQUARE IT
6378^2
# MULTIPLY IT
6378^2 * 4 / 3
# BUILT-IN
pi
# WHAT IS THIS?
6378^2 * pi
# AND WHAT IS THIS?
6378^2 * 4 / 3 * pi

# VARIABLE: RADIUS OF EARTH IN KM
r <- 6378
# 'PRINT' IT
r
# CALCULATOR
r^2
# R AS A CALCULATOR
4 / 3 * pi * r^2
# UP-ARROW, HOME
a <- 4 / 3 * pi * r^2
# AREA OF EARTH IN KM^2
a
# 100 MILLION KM^2
log10(a)
# SIDE OF A 'SQUARE' EARTH TO NEAREST THOUSAND
round(sqrt(a), -3)

# ------------------ SIMULATION ------------------
n <- 2^6
# VECTOR
1:n
# GUESS WHAT THIS WILL BE
x <- 1949 + 1:n 
# 'PRINT' x
x
# LENGTH
length(x)
# WHAT IS x?
class(x)
# STRUCTURE
str(x)
# TOO VAGUE
# ?normal
# WE WANT 'Normal' NEAR BOTTOM
# ??normal
# SIMULATION
e <- rnorm(length(x)) 
# WHAT IS THIS?
e
# COMPARE RESULTS
summary(e)

# ----------------- VISUALIZATION ----------------- 
# POINTS IN THE ORDER OF THE VECTOR
plot(e)
# AS A LINE - ANYTHING GOING ON?
plot(e, type = 'l')
# ADD POINTS
points(e)
# ADD ESTHETICS
points(e, col = 'red', pch = 20, cex = 2)
# MEAN CLOSE TO 0
mean(e)
# MIN, Q1, MD, Q3, MAX 
quantile(e)
# ADD LINES TO THE PLOT
abline(
  h = quantile(e), 
  col = 'green', lwd = 2, lty = 3
)

# GET HELP
# ?hist
# HISTOGRAM
hist(e)
# EXPECTED FROM MU(0, 1)?
qnorm(1/n)
# NEW LIMITS
hist(
  e, 
  xlim = c(-3, 3),
  ylim = c(0, 20)
)
# ADD TO HISTOGRAM
abline(v = mean(e), col = 'blue', lwd = 4)

# SIMULATION MODEL + ERROR
# WHAT DOES THIS 'SAY'?
y <- -3000 + 2 * x + 16 * e 
plot(y)
# Y BY X
plot(x, y, main = 'simulation')
# SAVE COMMANDS (FIND & OPEN IT)
savehistory('history.R')

#  ------------------ DATA FRAME ------------------ 
# FUNDAMENTAL R OBJECT
dat <- data.frame(x, y)
# PRINT IT - TOO MANY LINES!
dat 
# INSPECT FIRST (6) ROWS
head(dat)
# THROW OUT 'NOISE'
dat$y <- round(dat$y)
# PRETTIER
head(dat)
# STRUCTURE
str(dat)
# STATISTICS
summary(dat)

# -------------------- FUNCTION -------------------
# PLOT (SHOULDN'T CHANGE MUCH FROM EARLIER...)
plot(dat)
# A FUNCTION - NOTE INDENTING
myplot <- function(data) plot(
  data, 
  pch = 16,
  las = 1,
  main = 'simulated time series'
)
# WHAT IS IT? 
myplot
# WHAT KIND OF OBJECT?
class(myplot)
# RUN WITHOUT ARGUMENT GIVES ERROR
myplot()
# OK - AND SEE WHAT HAPPENS IF WINDOW IS RESIZED
myplot(dat)
# ESTIMATE 'CHANGE' PER 'YEAR' (SLOPE)
# (1960, 920) TO (2010:1030) = 110 / 50 = 2.2 ~ 2

# ------------------ DIFFERENCES ------------------
# LINES CAN SUGEST CHANGES
myplot(dat) ; lines(dat)
# LOOK UP diff
# ?diff
# APPLY TO THE DATA
diff(dat$y)
# LENGTH OF y 
length(dat$y)
# LENGTH OF diff(dat$y)
length(diff(dat$y))
# ANYTHING HERE?
summary(diff(dat$y))
hist(diff(dat$y))
# -------------------- SPLINE --------------------
# SPLINE
# ?spline
sp <- spline(dat)
# 3 * LENGTH(x)
str(sp)
# ADD THE SPLINE
myplot(dat)
lines(sp, col = 'green', lwd = 3)
# BRING THE POINTS FORWARD
points(dat, pch = 16)
# GOOD VISUALIZATION, BUT NOT ANALYSIS...

# ------------------ REGRESSION ------------------ 
# WRITE DOWN THE CORRELATION
cor(dat)
# LINEAR MODEL
# ?lm
# PRINTS THE RESULT
lm(y ~ x, dat)
# SAVE THE RESULT
fit <- lm(y ~ x, dat)
# COMPARE TO THE ORIGINAL CREATION OF y ABOVE
fit
# CONTAINS A LOT OF STUFF
str(fit)
# WHAT IS IN IT?
names(fit)
# RECOVERED OUR PARAMETER (CAN ABBREVIATE NAMES)
fit$coeff
# THE IMPORTANT STUFF
summary(fit)
# PLOT THE DATA
myplot(dat) 
# ADD PREDICTION
abline(
  fit, 
  col = 'red', lwd = 4, lty = 2
)

# -------------- TIME SERIES ANALYSIS --------------
# TIME SERIES (SUFFIX IS OPTIONAL, TO REMIND ME)
dat.ts <- ts(
  dat$y,
  x[1]
)
# A NEW KIND OF OBJECT - WITH METADATA
dat.ts
# WHAT IS IT?
# ?ts
# PLOTS WITH LINE SEGMENTS AND TIME LABEL
plot(dat.ts)
# CAN ADD POINTS
points(dat.ts)
# OR JUST PLOT POINTS
plot(dat.ts, type = 'p')
# REGRESSION IS THE SAME
fit1 <- lm(dat.ts ~ time(dat.ts))
fit1
abline(fit1)
summary(fit1)
plot(fit1$residuals)
# AUTOCORRELATION (ADVANCED!)
par(mfrow = c(2, 1))
acf(dat.ts)
acf(fit$resid)
plot(dat.ts)
# FILTER BY 'DECADE'
f <- filter(dat.ts, filter = rep(1, 10) / 10)
lines(f, col = 'red', lwd = 3)
# *** END ***