6.4 Solutions to Exercises

6.4.1 Exercise 1: Jussy air temperature

jussy <- read.csv("http://sma.epfl.ch/~lbelzile/math342/jussy_temp.csv", header = TRUE, 
    sep = ";", na.strings = "######", stringsAsFactors = FALSE)[, 1:2]
library(xts)
library(lubridate)
colnames(jussy) <- c("time", "temp")
jussy_xts <- with(jussy, xts(temp, order.by = ymd_h(time)))
jussy_max <- apply.daily(jussy_xts, max)
number_freeze <- apply.yearly(jussy_xts, function(serie) {
    sum(I(apply.daily(serie, min) < 0), na.rm = TRUE)
})
# Exclude last incomplete year
mean(number_freeze[-length(number_freeze)])
[1] 85.53846
heatwave_ind <- intersect(which(apply.daily(jussy_xts, min) > 18), which(apply.daily(jussy_xts, 
    max) > 30))  #which values match the two constraints
y <- rle(diff(heatwave_ind))  #run length
sum(I(y$lengths[y$values == 1] > 3))  #how many are above 3 days
[1] 2
# 2 `heatwaves` as per the definition Unsurprisingly, they occur in June and
# August 2003

6.4.2 Exercise 2: Tyne river flow

tyne <- read.csv(file = "http://sma.epfl.ch/~lbelzile/math342/23001-Tyne_at_Bywell.csv", 
    header = FALSE, sep = ",", skip = 16, col.names = c("time", "height", "flag"), 
    as.is = TRUE, na.strings = "NA")[, 1:2]
library(imputeTS)
plot(which(is.na(tyne)), ylab = "Index of missing value within dataset", xlab = "Index within the missing value")

tyne_ts <- with(tyne, ts(data = height, start = c(as.numeric(substr(time[1], 
    1, 4), lubridate::yday(time[1])), frequency = 365)))
plotNA.distribution(tyne_ts)

statsNA(tyne_ts)
[1] "Length of time series:"
[1] 20819
[1] "-------------------------"
[1] "Number of Missing Values:"
[1] 196
[1] "-------------------------"
[1] "Percentage of Missing Values:"
[1] "0.941%"
[1] "-------------------------"
[1] "Stats for Bins"
[1] "  Bin 1 (5205 values from 1 to 5205) :      154 NAs (2.96%)"
[1] "  Bin 2 (5205 values from 5206 to 10410) :      0 NAs (0%)"
[1] "  Bin 3 (5205 values from 10411 to 15615) :      42 NAs (0.807%)"
[1] "  Bin 4 (5204 values from 15616 to 20819) :      0 NAs (0%)"
[1] "-------------------------"
[1] "Longest NA gap (series of consecutive NAs)"
[1] "93 in a row"
[1] "-------------------------"
[1] "Most frequent gap size (series of consecutive NA series)"
[1] "93 NA in a row (occuring 1 times)"
[1] "-------------------------"
[1] "Gap size accounting for most NAs"
[1] "93 NA in a row (occuring 1 times, making up for overall 93 NAs)"
[1] "-------------------------"
[1] "Overview NA series"
[1] "  20 NA in a row: 1 times"
[1] "  22 NA in a row: 1 times"
[1] "  61 NA in a row: 1 times"
[1] "  93 NA in a row: 1 times"