lmer(): Higher weight in case of more measurements

Hi Susanne,

I've fixed a few problems with your code.

There is a difference between the group size. That is what you expect as
the analysis weights the first group higher when it has 40 points and that
drags the line down.

For the second analysis I've changed the weight to 5000, as it seems
sensible and is less likely to give numerical problems. This does what
would be expected. It assumes that each observation in in the 1st group is
actually the mean of 5000 points. From this it obtains the variation of a
single observation which will be much higher than before as the variation
in group 1 is unchanged. It actually violates the assumptions as the
residual variation in each group is not equal, but you will see that the
estimate of the residual variance is much higher. This can be fixed by
changing the residual variance in the first group to reflect the lower
variation in each observation, and that code is at the end, and seems to do
the right thing.

Ken

## 10 groups with 10 datapoints each
# slope a and intercept b for lines
a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)

y.m <- x.m <- matrix(nrow=10,ncol=10)

# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)
  # y values
  y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}

# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
res <- lmer(y ~ x + (x|group))

summary(res)

#####################################################################
## 10 groups. 1. has 40 datapoints, others have 10
# if it matters, that group 1 has more datapoints, the regression line
should have a smaller intercept and smaller slope
# slope a and intercept b for lines

a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)

y.m <- x.m <- matrix(nrow=10,ncol=10)
# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)

  # y values
  y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}

x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)

# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
res <- lmer(y ~ x + (x|group))
summary(res)

###################################################################
# result:
# it's the same regression line for both
# => it doesn't matter that group 1 has more datapoints!!
##################################################################

# plot:
plot(x,y, pch=16, col=1, ylab="y",xlab="x",main=sprintf("plot"))
s <- summary(res)
# add linear regression lines
abline(s$coefficients[1,1],s$coefficients[2,1], lty=2, col = 1)

###################################################################
# check with weights:
##################################################################

# same as above:
# slope a and intercept b for lines
a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
y.m <- x.m <- matrix(nrow=10,ncol=10)
# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)
  # y values
  y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}
# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
## Add weights: 1. group has weight=5000, the others weight=1 #############
w <- c(rep(5000,10),rep(1,90))

res <- lmer(y ~ x + (x|group), weights=w)
summary(res)
#################################################################
# slope a and intercept b for lines
a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
y.m <- x.m <- matrix(nrow=10,ncol=10)
# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)
  # y values
  y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}

x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)

# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))

## Add weights: 1. group has weight=5000, the others weight=1 #############
w <- c(rep(5000,40),rep(5000,100))

res <- lmer(y ~ x + (x|group), weights=w)
summary(res)

# correct variances for weights
# slope a and intercept b for lines
a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
y.m <- x.m <- matrix(nrow=10,ncol=10)
# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)
  # y values
  if (i==1) y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01/sqrt(5000))
  else y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}
# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
## Add weights: 1. group has weight=5000, the others weight=1 #############
w <- c(rep(5000,10),rep(1,90))

res <- lmer(y ~ x + (x|group), weights=w)
summary(res)