3.4 Solutions

3.4.1 Exercise 4.4

Consider the linear model \[\boldsymbol{y} = \mathbf{X}_1\boldsymbol{\beta}_1 +\mathbf{X}_2\boldsymbol{\beta}_1 + \boldsymbol{\varepsilon}\] and suppose that \(\mathbf{X}_1\) includes a column of ones.

data(Auto, package = "ISLR")
y <- Auto$mpg
X1 <- cbind(1, Auto$horsepower)
X2 <- Auto$weight

Form the projection matrices \(\mathbf{H}_{\mathbf{X}}\), \(\mathbf{H}_{1}\), \(\mathbf{H}_{2}\) and the complementary projection matrices (the functions cbind, %*%, solve and t may be useful).

data(Auto, package = "ISLR")
y <- Auto$mpg
X1 <- cbind(1, Auto$horsepower)
X2 <- Auto$weight
##Warning: output not displayed 
#Projection matrices
X <- cbind(X1, X2) 
#Helper functions
#Create a function: function(args){ ...}
#last line will be object returned (if not assigned)
#otherwise use `return( )`: see below for example
proj_mat <- function(x){
  x %*% solve(t(x) %*% x) %*% t(x)
}
coefs_vals <- function(x, y){
  coefs <- c(solve(t(x) %*% x) %*% t(x) %*% y) 
  return(coefs)
  #`c` transform output from n x 1 matrix to n-vector
}
resid_vals <- function(y, pmat){
  c((diag(1, length(y)) - pmat)  %*% y) 
  #diag(1, ...) creates identity matrix
}
fitted_vals <- function(y, pmat){
  c(pmat %*% y)
}

#Create projection matrices
H  <- proj_mat(X)
H1 <- proj_mat(X1)
H2 <- proj_mat(X2)
M  <- diag(nrow(X)) - H
M1 <- diag(nrow(X)) - H1
M2 <- diag(nrow(X)) - H2

Obtain the OLS estimates \(\widehat{\boldsymbol{\beta}}_1\), \(\widehat{\boldsymbol{\beta}}_2\)
Use the projection matrices to obtain the fitted values \(\widehat{\boldsymbol{y}}\) and the estimated residuals \(\widehat{\boldsymbol{\varepsilon}}\).

#OLS coefficients
beta_hat <- coefs_vals(X, y)
beta_hat
#Fitted values
fitted_vals(y, H)
#Residuals
resid_vals(y, H)

What happens to the residuals if your regressors do not include a vector of constants?

If a constant is not included, the residuals are not centered unless the columns of the design matrix and the response were centered, meaning they had expectation zero. This is why a column vector of ones is always included unless the mean is known (from theory or otherwise) to be zero.

#Removing the row of constants
res_uncentered <- resid_vals(y, proj_mat(X[,-1])) #subset [row, column]
#X[,-1] take all rows, all columns but first
mean(res_uncentered)

## [1] 3.32711

#Consequence of not centering is that residuals are not mean zero

Verify numerically Frisch–Waugh–Lovell’s theorem and test the different regression models from Exercice 2.4 to validate your answers.

#The null models regression has
coef_0 <- coefs_vals(x = X, y = y)[ncol(X)]
res_0  <- resid_vals (y = y, pmat = H)
#The following function checks equality of the coefficients
#beta 2 is the last coef (here 1d)
check_FWL <- function(xmat, yvec, coef0 = coef_0, res0 = res_0){
  c(isTRUE(all.equal(coefs_vals(x = xmat, y = yvec)[ncol(xmat)], coef0)),
    isTRUE(all.equal(resid_vals(y = yvec, pmat = proj_mat(xmat)), res0))
  )
}

#Check the results of 4.3
res_mat <- cbind(check_FWL(yvec = y, xmat = X2), #1
check_FWL(yvec = H1 %*% y, xmat = X2), #2
check_FWL(yvec = H1 %*% y, xmat = H1 %*% X2), #3
check_FWL(yvec = H %*% y, xmat = X), #4
check_FWL(yvec = H %*% y, xmat = X2), #5
check_FWL(yvec = M1 %*% y, xmat = X2), #6
check_FWL(yvec = M1 %*% y, xmat = M1 %*% X2), #7 and 9
check_FWL(yvec = M1 %*% y, xmat = cbind(X1, M1 %*% X2)), #8
check_FWL(yvec = H %*% y, xmat = M1 %*% X2)) #10
colnames(res_mat) <-  c("(1)", "(2)", "(3)", "(4)", "(5)", "(6)", "(7,9)","(8)","(10)")
rownames(res_mat) <- c("coefficients","residuals")
print(res_mat)

##                (1)   (2)   (3)   (4)   (5)   (6) (7,9)  (8)  (10)
## coefficients FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE TRUE  TRUE
## residuals    FALSE FALSE FALSE FALSE FALSE FALSE  TRUE TRUE FALSE