Time Series Analysis

April 27, 2018
Time Series Data Seminar

Load libraries

# .libPaths("P:/RLibrary2")
knitr::opts_chunk$set(echo = TRUE)

# install.packages(c("forecast","lmtest","tseries"))

library(lubridate)
library(tidyverse)
library(forecast)
library(lmtest)
library(tseries)
library(scales)
library(gridExtra)

Load data

id1 = "1lgjg9EpSgt6pFUGCNiuio3APgKZZ4un1"
id2 = "1rVzQ-kkFsRMC-IM3jDdqjOLjY1qnIRRW"

spop<- read_csv(sprintf("https://docs.google.com/uc?id=%s&export=download", id1))
## Parsed with column specification:
## cols(
##   ReleaseDate = col_integer(),
##   StateAbbreviation = col_character(),
##   None = col_double(),
##   D0 = col_double(),
##   D1 = col_double(),
##   D2 = col_double(),
##   D3 = col_double(),
##   D4 = col_double(),
##   ValidStart = col_character(),
##   ValidEnd = col_character(),
##   StatisticFormatID = col_integer()
## )
glimpse(spop)
## Observations: 366
## Variables: 11
## $ ReleaseDate       <int> 20171226, 20171219, 20171212, 20171205, 2017...
## $ StateAbbreviation <chr> "CA", "CA", "CA", "CA", "CA", "CA", "CA", "C...
## $ None              <dbl> 36.64, 36.64, 36.64, 39.44, 41.24, 44.78, 44...
## $ D0                <dbl> 63.36, 63.36, 63.36, 60.56, 58.76, 55.22, 55...
## $ D1                <dbl> 36.92, 36.92, 34.84, 34.84, 27.63, 27.63, 27...
## $ D2                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0....
## $ D3                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ D4                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ ValidStart        <chr> "12/26/2017", "12/19/2017", "12/12/2017", "1...
## $ ValidEnd          <chr> "1/1/2018", "12/25/2017", "12/18/2017", "12/...
## $ StatisticFormatID <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
news<- read_csv(sprintf("https://docs.google.com/uc?id=%s&export=download", id2))
## Parsed with column specification:
## cols(
##   Date = col_date(format = ""),
##   Headline = col_character(),
##   Source = col_character(),
##   Country = col_character(),
##   NAME = col_character(),
##   year = col_integer(),
##   month = col_integer(),
##   yrmo = col_date(format = "")
## )
glimpse(news)
## Observations: 157,165
## Variables: 8
## $ Date     <date> 2011-01-31, 2011-01-31, 2011-01-31, 2011-01-31, 2011...
## $ Headline <chr> "Biz Buzz: Citizen of the Year\u0092s passion is enha...
## $ Source   <chr> "The Tribune", "FOX40", "KQED's News Fix", "Crooks an...
## $ Country  <chr> "United States", "United States", "United States", "U...
## $ NAME     <chr> "California", "California", "California", "California...
## $ year     <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011,...
## $ month    <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ yrmo     <date> 2011-01-01, 2011-01-01, 2011-01-01, 2011-01-01, 2011...

Summarise and look at the data

news %>% 
  group_by(yrmo) %>% 
  summarise(stories = n()) -> newsMo


plotb<-ggplot(data = newsMo) +
  geom_col(aes(x = yrmo, y = stories), fill = "dodgerblue", color = "black") + 
  theme_classic()

Plot mentions of drought in the news

spop %>% 
  mutate(valDate = ymd(ReleaseDate),
         wsum = D0 + D1 + D2 + D3 + D4,
         year = year(valDate),
         month = month(valDate),
         yrmo = paste0(year,"",month),
         yrmo = parse_date_time(yrmo, "ym"),
         yrmo = as.Date(yrmo)) -> spopmo

head(spopmo$yrmo)
## [1] "2017-12-01" "2017-12-01" "2017-12-01" "2017-12-01" "2017-11-01"
## [6] "2017-11-01"
spopmo %>% 
  group_by(yrmo) %>% 
  summarise(drought = max(wsum)) -> spopmo


plota<- ggplot(data = spopmo) +
          geom_col(aes(x = yrmo, y = drought), fill = muted("red"), color = "black") + 
          theme_classic()



grid.arrange(plota,plotb, ncol = 1)

full_join(spopmo,newsMo, by = "yrmo") %>% 
  filter(!is.na(stories)) -> fullData
ts(fullData$stories,
   start = 2011,
   frequency = 12) -> stories.ts

plot(stories.ts)

decomNews <- decompose(stories.ts, type = "multi")
plot(decomNews)

stlStories<-stl(stories.ts, s.window = "periodic")

seasonadjStories<-forecast::seasadj(stlStories)

plot(seasonadjStories)

acf(stories.ts)

pacf(stories.ts)

adf.test(stories.ts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  stories.ts
## Dickey-Fuller = -2.1587, Lag order = 4, p-value = 0.5108
## alternative hypothesis: stationary
kpss.test(stories.ts)
## Warning in kpss.test(stories.ts): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  stories.ts
## KPSS Level = 0.94297, Truncation lag parameter = 2, p-value = 0.01
## Granger causality

lmtest::grangertest( fullData$stories, fullData$drought,order = 4)
## Granger causality test
## 
## Model 1: fullData$drought ~ Lags(fullData$drought, 1:4) + Lags(fullData$stories, 1:4)
## Model 2: fullData$drought ~ Lags(fullData$drought, 1:4)
##   Res.Df Df      F Pr(>F)
## 1     71                 
## 2     75 -4 0.4306  0.786

Dose Response

April 27, 2018
Dose Response Data Seminar

Spatial Autocorrelation

April 18, 2018
Spatial autocorrelation Data Seminar

Partial Least Squares Regression

April 13, 2018
Refelectance Partial Least Squares Regression Data Seminar
comments powered by Disqus