Exercise 1: Jussy air temperature
jussy <- read.csv("http://sma.epfl.ch/~lbelzile/math342/jussy_temp.csv", header = TRUE,
sep = ";", na.strings = "######", stringsAsFactors = FALSE)[, 1:2]
library(xts)
library(lubridate)
colnames(jussy) <- c("time", "temp")
jussy_xts <- with(jussy, xts(temp, order.by = ymd_h(time)))
jussy_max <- apply.daily(jussy_xts, max)
number_freeze <- apply.yearly(jussy_xts, function(serie) {
sum(I(apply.daily(serie, min) < 0), na.rm = TRUE)
})
# Exclude last incomplete year
mean(number_freeze[-length(number_freeze)])
[1] 85.53846
heatwave_ind <- intersect(which(apply.daily(jussy_xts, min) > 18), which(apply.daily(jussy_xts,
max) > 30)) #which values match the two constraints
y <- rle(diff(heatwave_ind)) #run length
sum(I(y$lengths[y$values == 1] > 3)) #how many are above 3 days
[1] 2
# 2 `heatwaves` as per the definition Unsurprisingly, they occur in June and
# August 2003
Exercise 2: Tyne river flow
tyne <- read.csv(file = "http://sma.epfl.ch/~lbelzile/math342/23001-Tyne_at_Bywell.csv",
header = FALSE, sep = ",", skip = 16, col.names = c("time", "height", "flag"),
as.is = TRUE, na.strings = "NA")[, 1:2]
library(imputeTS)
plot(which(is.na(tyne)), ylab = "Index of missing value within dataset", xlab = "Index within the missing value")
tyne_ts <- with(tyne, ts(data = height, start = c(as.numeric(substr(time[1],
1, 4), lubridate::yday(time[1])), frequency = 365)))
plotNA.distribution(tyne_ts)
statsNA(tyne_ts)
[1] "Length of time series:"
[1] 20819
[1] "-------------------------"
[1] "Number of Missing Values:"
[1] 196
[1] "-------------------------"
[1] "Percentage of Missing Values:"
[1] "0.941%"
[1] "-------------------------"
[1] "Stats for Bins"
[1] " Bin 1 (5205 values from 1 to 5205) : 154 NAs (2.96%)"
[1] " Bin 2 (5205 values from 5206 to 10410) : 0 NAs (0%)"
[1] " Bin 3 (5205 values from 10411 to 15615) : 42 NAs (0.807%)"
[1] " Bin 4 (5204 values from 15616 to 20819) : 0 NAs (0%)"
[1] "-------------------------"
[1] "Longest NA gap (series of consecutive NAs)"
[1] "93 in a row"
[1] "-------------------------"
[1] "Most frequent gap size (series of consecutive NA series)"
[1] "93 NA in a row (occuring 1 times)"
[1] "-------------------------"
[1] "Gap size accounting for most NAs"
[1] "93 NA in a row (occuring 1 times, making up for overall 93 NAs)"
[1] "-------------------------"
[1] "Overview NA series"
[1] " 20 NA in a row: 1 times"
[1] " 22 NA in a row: 1 times"
[1] " 61 NA in a row: 1 times"
[1] " 93 NA in a row: 1 times"