R's basic lm() and summary.lm functions

Sat, May 11, 2013 12:55 PM

On Sat, 11 May 2013, ivo welch wrote:

But they should be easy enough to install...

Hmmm, I get the same results (up to machine precision):

## data and model
set.seed(1)
x <- rnorm(12); y <- rnorm(12); z <- rnorm(12); x[2] <-NA
m <- lm(y ~ x + z)

## standard errors
nw1 <- ols(y ~ x + z)$coefficients[, 6]
nw2 <- sqrt(diag(NeweyWest(m, lag = 0, prewhite = FALSE)))

And then I get:

R> nw1 - nw2
   (Intercept)             x             z
  0.000000e+00 -1.110223e-16 -2.775558e-17

By the way: With lag=0, these are equivalent to the basic sandwich 
standard errors and to the HC0 standard errors:

nw3 <- sqrt(diag(sandwich(m)))
nw4 <- sqrt(diag(vcovHC(m, type = "HC0")))

And then:

R> nw3 - nw2
(Intercept)           x           z
           0           0           0
R> nw4 - nw2
(Intercept)           x           z
           0           0           0

When running many regressions, it may still be useful to not run the 
summary every time, I guess.

Best,
Z

----
Ivo Welch (ivo.welch at gmail.com)



docs[["lm"]] <- c(
 author='ivo.welch at gmail.com',
 date='2013',
 description='add newey-west and standardized coefficients to lm(),
and return the summary.lm',
 usage='lm(..., newey.west=0, stdcoefs=TRUE)',
 arguments='',
 details='
   Note that when y or x have different observations, the
coef(lm(y~x))*sd(x)/sd(y)
   calculations are different from a scale(y) ~ scale(x) regression.

   Note that the NeweyWest statistics I get from the sandwich package
are different.
   could be my bug, or could be my problem specifying an additional parameter.
   Here is my code

     library(sandwich)
     library(lmtest)
     print(coeftest(lmo, vcov = NeweyWest, lag=0, prewhite=FALSE))
',
 seealso='stats:::lm, stats:::summary.lm',
 examples='
   x <- rnorm(12); y <- rnorm(12); z <- rnorm(12); x[2] <-NA;
   lm( y ~ x + z )
',
 test='eval(parse(text=(docs[["lm"]][["examples"]])))',
 changes= '',
 version='0.91'
)

################################################################

lm <- function (..., x = FALSE, newey.west=(0), stdcoefs=TRUE) {

 ## R is painfully error-tolerant. I prefer reasonable and immediate
error warnings.
 stopifnot( (is.vector(newey.west))&(length(newey.west)==1)|(is.numeric(newey.west))
)
 stopifnot( (is.vector(stdcoefs))&(length(stdcoefs)==1)|(is.logical(stdcoefs))
)
 stopifnot( (is.vector(x))&(length(x)==1)|(is.logical(x)) )
 ## I wish I could check lm()'s argument, but I cannot.

 lmo <- stats:::lm(..., x=TRUE)
 ## note that both the x matrix and the residuals from the model have
their NA's omitted by default

 if (newey.west>=0) {
   resids <- residuals(lmo)
   diagband.matrix <- function(m, ar.terms) {
     nomit <- m - ar.terms - 1
     mm <- matrix(TRUE, nrow = m, ncol = m)
     mm[1:nomit, (ncol(mm) - nomit + 1):ncol(mm)] <-
(lower.tri(matrix(TRUE, nrow = nomit, ncol = nomit)))
     mm[(ncol(mm) - nomit + 1):ncol(mm), 1:nomit] <-
(upper.tri(matrix(TRUE, nrow = nomit, ncol = nomit)))
     mm
   }
   invx <- chol2inv(chol(crossprod(lmo$x)))
   invx.x <- invx %*% t(lmo$x)
   if (newey.west==0)
     resid.matrix <- diag(resids^2)
   else {
     full <- resids %*% t(resids)
     maskmatrix <- diagband.matrix(length(resids), newey.west)
     resid.matrix <- full * maskmatrix
   }
   vmat <- invx.x %*% resid.matrix %*% t(invx.x)

   nw <- newey.west  ## the number of AR terms
   nw.se <- sqrt(diag(vmat))  ## the standard errors
 }

 if (stdcoefs) {
   xsd <- apply( lmo$x, 2, sd)
   ysd <- sd( lmo$model[,1] )
   stdcoefs.v <- lmo$coefficients*xsd/ysd
 }

 full.x.matrix <- if (x) lmo$x else NULL
 lmo <- stats:::summary.lm(lmo)  ## add the summary.lm object
 if (x) lmo$x <- full.x.matrix

 if (stdcoefs) {
   lmo$coefficients <- cbind(lmo$coefficients, stdcoefs.v )
   colnames(lmo$coefficients)[ncol(lmo$coefficients)] <- "stdcoefs"
 }

 if (newey.west>=0) {
   lmo$coefficients <- cbind(lmo$coefficients, nw.se)
   colnames(lmo$coefficients)[ncol(lmo$coefficients)] <-
paste0("se.nw(",newey.west,")")
   lmo$coefficients <- cbind(lmo$coefficients, lmo$coefficients[,1]/nw.se)
   colnames(lmo$coefficients)[ncol(lmo$coefficients)] <-
paste0("T.nw(",newey.west,")")
 }

 lmo

}

attr(lm, "original") <- stats:::lm

R's basic lm() and summary.lm functions

Thread (5 messages)