tildes/distributions correspond to the r* family of functions (random draws)
everything else is a little bit of matrix math
Simple Linear Regression
library("groundhog")
Loaded 'groundhog' (version:2.0.1) using R-4.2.0
Tips and troubleshooting: https://groundhogR.com
set.seed(42)form <- y ~1+ xbeta <-c(1, 42)sig <-3.14n <-100# we can start with mean zero and then add mu in laterdat <-data.frame(y=rnorm(n, sd=sig),x=runif(n, -10, 10) # NB: uniform )X <-model.matrix(form, dat)dat$y <- X %*% beta + dat$ysummary(lm(form, dat))
Call:
lm(formula = form, data = dat)
Residuals:
Min 1Q Median 3Q Max
-9.6190 -2.0023 0.4292 2.1287 6.8879
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.15594 0.33461 3.455 0.000816 ***
x 42.04546 0.05749 731.355 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.276 on 98 degrees of freedom
Multiple R-squared: 0.9998, Adjusted R-squared: 0.9998
F-statistic: 5.349e+05 on 1 and 98 DF, p-value: < 2.2e-16
Multiple Linear Regression
set.seed(42)form <- y ~1+ x * z# intercept, x, z, x:zbeta <-c(1, 42, 27, 0.4)sig <-3.14n <-100# we can start with mean zero and then add mu in laterdat <-data.frame(y=rnorm(n, sd=sig),x=runif(n, -10, 10), # NB: uniformz=runif(n, 25, 75))X <-model.matrix(form, dat)dat$y <- X %*% beta + dat$ym2 <-lm(form, dat)summary(m2)
Call:
lm(formula = form, data = dat)
Residuals:
Min 1Q Median 3Q Max
-9.5903 -1.8976 0.5459 2.2200 7.0766
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.975166 1.202758 1.642 0.104
x 41.869442 0.218333 191.768 <2e-16 ***
z 26.983065 0.022726 1187.342 <2e-16 ***
x:z 0.403483 0.004046 99.728 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.283 on 96 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 9.614e+05 on 3 and 96 DF, p-value: < 2.2e-16
Not quite contrast coding, but the same problem
set.seed(42)form <- y ~1+ x * z# intercept, x, z, x:zbeta <-c(1, 42, 27, 0.4)sig <-3.14n <-100# we can start with mean zero and then add mu in laterdat <-data.frame(y=rnorm(n, sd=sig),x=runif(n, -10, 10), # NB: uniformz=runif(n, 25, 75))X <-model.matrix(form, dat)dat$y <- X %*% beta + dat$ym2c <-lm(y ~1+ x *scale(z, center=TRUE, scale=FALSE), dat)summary(m2c)
Call:
lm(formula = y ~ 1 + x * scale(z, center = TRUE, scale = FALSE),
data = dat)
Residuals:
Min 1Q Median 3Q Max
-9.5903 -1.8976 0.5459 2.2200 7.0766
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.355e+03 3.380e-01 4009.82 <2e-16
x 6.211e+01 5.838e-02 1063.82 <2e-16
scale(z, center = TRUE, scale = FALSE) 2.698e+01 2.273e-02 1187.34 <2e-16
x:scale(z, center = TRUE, scale = FALSE) 4.035e-01 4.046e-03 99.73 <2e-16
(Intercept) ***
x ***
scale(z, center = TRUE, scale = FALSE) ***
x:scale(z, center = TRUE, scale = FALSE) ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.283 on 96 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 9.614e+05 on 3 and 96 DF, p-value: < 2.2e-16
Effects plots are your friend I
groundhog.library("effects", "2022-07-14")
Loading required package: carData
lattice theme set by effectsTheme()
See ?effectsTheme for details.
Warning in Analyze.model(focal.predictors, mod, xlevels, default.levels, : the
predictors y, scale(z, center = TRUE, scale = FALSE) are one-column matrices
that were converted to vectors
Centering, scaling and contrast coding in linear regression
Linear transformations of the numeric representation of your predictors changes the meaning and interpretation of your model coefficients
Likewise, the way that categorical variables are changed into contrasts determines the meaning and interpretation of your model coefficients
You cannot understand what a model means without knowing the choice of contrast and any transformation of numeric predictors! (see also Brehm and Alday, JML, in press)
This is just as critical for simulation, if you want to simulate your hypotheses!
Doing this in Julia
usingDataFrames, GLM, StatsModels, Randomrng =MersenneTwister(42);form =@formula(y ~1+ x * z);# intercept, x, z, x&zbeta = [1, 42, 27, 0.4];sig =3.14;n =100;# we can start with mean zero and then add mu in laterdat =DataFrame(y=sig *randn(rng, n), # multiply by sd to scale x=20*rand(rng, n) .-10, # multiply by range and move left edge to correct postion z=50*rand(rng, n) .+25);# "hints" is the argument contrasts, etc.X =modelmatrix(form, dat);m2 =lm(form, dat)
The Real World
Imbalance
groundhog.library("car", "2022-07-14")
[36mSuccesfully attached 'car_3.1-0'[0m
set.seed(42)form <- y ~1+ xbeta <-c(1, 42, 42)sig <-3.14n <-100# we can start with mean zero and then add mu in laterdat <-data.frame(y=rnorm(n, sd=sig),x=sample(c("brown", "green", "orange"), n, replace=TRUE) # NB: uniform )dat$x <-factor(dat$x)contrasts(dat$x) <-contr.Sum(levels(dat$x))X <-model.matrix(form, dat)dat$y <- X %*% beta + dat$ysummary(lm(form, dat))
Call:
lm(formula = form, data = dat)
Residuals:
Min 1Q Median 3Q Max
-9.3230 -2.0847 0.3634 2.0419 6.6855
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.0739 0.3305 3.25 0.00159 **
x[S.brown] 42.4207 0.4517 93.91 < 2e-16 ***
x[S.green] 41.7285 0.4710 88.60 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.288 on 97 degrees of freedom
Multiple R-squared: 0.9969, Adjusted R-squared: 0.9968
F-statistic: 1.548e+04 on 2 and 97 DF, p-value: < 2.2e-16
set.seed(42)form <- y ~1+ xbeta <-c(1, 42, 42)sig <-3.14n <-100# we can start with mean zero and then add mu in laterdat <-data.frame(y=rnorm(n, sd=sig),x=sample(c("brown", "green", "orange"), n, replace=TRUE, prob=c(19/30, 1/30, 1/3)) # NB: uniform )dat$x <-factor(dat$x)contrasts(dat$x) <-contr.Sum(levels(dat$x))X <-model.matrix(form, dat)dat$y <- X %*% beta + dat$ysummary(lm(form, dat))
Call:
lm(formula = form, data = dat)
Residuals:
Min 1Q Median 3Q Max
-9.1533 -2.2243 0.2046 2.2696 6.5829
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.9428 0.6780 2.866 0.0051 **
x[S.brown] 40.8121 0.7111 57.390 <2e-16 ***
x[S.green] 43.0412 1.2752 33.753 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.24 on 97 degrees of freedom
Multiple R-squared: 0.9961, Adjusted R-squared: 0.996
F-statistic: 1.23e+04 on 2 and 97 DF, p-value: < 2.2e-16
Heavy Tails
set.seed(42)form <- y ~1+ xbeta <-c(1, 42)sig <-3.14n <-100# we can start with mean zero and then add mu in laterdat <-data.frame(y=sig *rt(n, df=3),x=runif(n, -10, 10) # NB: uniform )X <-model.matrix(form, dat)dat$y <- X %*% beta + dat$ysummary(lm(form, dat))
Call:
lm(formula = form, data = dat)
Residuals:
Min 1Q Median 3Q Max
-8.4307 -2.6328 -0.8111 1.4640 29.5658
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.23958 0.48927 2.534 0.0129 *
x 41.83358 0.08632 484.656 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.89 on 98 degrees of freedom
Multiple R-squared: 0.9996, Adjusted R-squared: 0.9996
F-statistic: 2.349e+05 on 1 and 98 DF, p-value: < 2.2e-16
Outliers
set.seed(42)form <- y ~1+ xbeta <-c(1, 42)sig <-3.14n <-100dat <-data.frame(y=rnorm(n, sd=sig),x=runif(n, -10, 10) # NB: uniform )X <-model.matrix(form, dat)# in 10% of data points, we add in a potentially huge second source of errordat$y <- dat$y +rbinom(n, 1, prob=0.1) *rnorm(n, sd=100*sig)dat$y <- X %*% beta + dat$ysummary(lm(form, dat))
Call:
lm(formula = form, data = dat)
Residuals:
Min 1Q Median 3Q Max
-541.35 -13.20 0.63 13.58 664.77
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -8.038 12.774 -0.629 0.531
x 39.016 2.195 17.778 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 125.1 on 98 degrees of freedom
Multiple R-squared: 0.7633, Adjusted R-squared: 0.7609
F-statistic: 316 on 1 and 98 DF, p-value: < 2.2e-16
Mixed Models
Recall….
\[\begin{align*}
(Y | B = b ) &\sim \mathcal{N}( X\beta + Zb, \sigma^2 I ) \\
B &\sim \mathcal{N}(0, \sigma _\theta)
\end{align*}\]
tildes/distributions correspond to the r* family of functions (random draws)
form_fe <- y ~1+ xform <- y ~1+ x + (1| subj)beta <-c(1, 42)sig <-1# try 3.14n <-500# try 100, 200, 500, 1000n_subj <-20# try 10, 15, can't be more than 26# was it better to increase n or n_subj?subj_sd <-0.5b <-rnorm(n_subj, sd=subj_sd)dat <-data.frame(y=rnorm(n, sd=sig),x=runif(n, -10, 10),subj=sample(LETTERS[1:n_subj], n, replace=TRUE))X <-model.matrix(form_fe, dat)Z <-model.matrix(~0+ subj, dat)dat$y <- X %*% beta + Z %*% b + dat$y# what happens if you use y ~ 1 + x + (1 + x | subj)?mm1 <-lmer(form, dat, REML=FALSE)summary(mm1)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: y ~ 1 + x + (1 | subj)
Data: dat
AIC BIC logLik deviance df.resid
1440.1 1456.9 -716.0 1432.1 496
Scaled residuals:
Min 1Q Median 3Q Max
-3.07161 -0.67603 -0.00644 0.66172 3.04554
Random effects:
Groups Name Variance Std.Dev.
subj (Intercept) 0.4708 0.6862
Residual 0.9253 0.9619
Number of obs: 500, groups: subj, 20
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.051725 0.159618 6.589
x 42.011144 0.007593 5532.653
Correlation of Fixed Effects:
(Intr)
x 0.006
set.seed(42)form_fe <- y ~1+ xform <- y ~1+ x + (1+ x| subj)beta <-c(1, 42)sig <-1.414n <-1000n_subj <-26# can't be more than 26 ;)subj_sd <-c(0.5, 0.3)# for now, we just let the RE correlations be zerob <-c(rnorm(n_subj, sd=subj_sd[1]), rnorm(n_subj, sd=subj_sd[2]))dat <-data.frame(y=rnorm(n, sd=sig),x=runif(n, -10, 10),subj=rep_len(LETTERS[1:n_subj], n))X <-model.matrix(form_fe, dat)Z_int <-model.matrix(~0+ subj, dat)Z_slope <- ... # I can't be bothered ot actually do write this out....Z <-cbind(Z_int, Z_slope)dat$y <- X %*% beta + Z %*% b + dat$ymm2 <-lmer(form, dat, REML=FALSE)summary(mm1)
Let lme4 do the hard work for you
set.seed(42)form <- y ~1+ x + (1+ x| subj)beta <-c(1, 42)sig <-1.414n <-1000n_subj <-26# can't be more than 26 ;)# for now, we just let the RE correlations be zerosubj_sd <-c(0.5, 0.3)dat <-data.frame(y=rnorm(n, sd=sig),x=runif(n, -10, 10),subj=rep_len(LETTERS[1:n_subj], n))mm2 <-lmer(form, dat, REML=FALSE)
boundary (singular) fit: see help('isSingular')
# this is garbage, but that's fine!summary(mm2)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: y ~ 1 + x + (1 + x | subj)
Data: dat
AIC BIC logLik deviance df.resid
3545.3 3574.8 -1766.7 3533.3 994
Scaled residuals:
Min 1Q Median 3Q Max
-3.1872 -0.6613 -0.0058 0.6801 3.5238
Random effects:
Groups Name Variance Std.Dev. Corr
subj (Intercept) 0.0122816 0.11082
x 0.0001533 0.01238 1.00
Residual 1.9893385 1.41044
Number of obs: 1000, groups: subj, 26
Fixed effects:
Estimate Std. Error t value
(Intercept) -0.034332 0.049646 -0.692
x -0.002603 0.008101 -0.321
Correlation of Fixed Effects:
(Intr)
x 0.117
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
simr fixef I
groundhog.library("simr", "2022-07-14")
Attaching package: 'simr'
The following object is masked from 'package:lme4':
getData
[36mSuccesfully attached 'simr_1.0.6'[0m
fixef(mm2) <- beta# note that the estimates have been overridden, but nothing else updatedsummary(mm2)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: y ~ 1 + x + (1 + x | subj)
Data: dat
AIC BIC logLik deviance df.resid
3545.3 3574.8 -1766.7 3533.3 994
Scaled residuals:
Min 1Q Median 3Q Max
-3.1872 -0.6613 -0.0058 0.6801 3.5238
Random effects:
Groups Name Variance Std.Dev. Corr
subj (Intercept) 0.0122816 0.11082
x 0.0001533 0.01238 1.00
Residual 1.9893385 1.41044
Number of obs: 1000, groups: subj, 26
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.000000 0.049646 20.14
x 42.000000 0.008101 5184.58
Correlation of Fixed Effects:
(Intr)
x 0.117
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
simr fixef II
summary(refit(mm2, simulate(mm2)[,1]))
boundary (singular) fit: see help('isSingular')
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: y ~ 1 + x + (1 + x | subj)
Data: dat
AIC BIC logLik deviance df.resid
3546.4 3575.9 -1767.2 3534.4 994
Scaled residuals:
Min 1Q Median 3Q Max
-2.8279 -0.6810 -0.0193 0.6511 3.6094
Random effects:
Groups Name Variance Std.Dev. Corr
subj (Intercept) 0.0143537 0.119807
x 0.0000646 0.008037 -1.00
Residual 1.9925556 1.411579
Number of obs: 1000, groups: subj, 26
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.030736 0.050464 20.43
x 41.999333 0.007885 5326.45
Correlation of Fixed Effects:
(Intr)
x -0.111
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
simr sig
sigma(mm2) <- sigsummary(mm2)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: y ~ 1 + x + (1 + x | subj)
Data: dat
AIC BIC logLik deviance df.resid
3545.3 3574.8 -1766.7 3533.3 994
Scaled residuals:
Min 1Q Median 3Q Max
-3.1792 -0.6596 -0.0057 0.6784 3.5150
Random effects:
Groups Name Variance Std.Dev. Corr
subj (Intercept) 0.0122816 0.11082
x 0.0001533 0.01238 1.00
Residual 1.9993960 1.41400
Number of obs: 1000, groups: subj, 26
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.000000 0.049771 20.09
x 42.000000 0.008121 5171.53
Correlation of Fixed Effects:
(Intr)
x 0.117
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
VarCorr: the variance-covariance of the random effects
vc <-VarCorr(mm2)print(vc)
Groups Name Std.Dev. Corr
subj (Intercept) 0.110822
x 0.012383 1.000
Residual 1.414000
simr random effects I
# this is on the variance/covariance scaleVarCorr(mm2) <-diag(subj_sd^2)summary(mm2)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: y ~ 1 + x + (1 + x | subj)
Data: dat
AIC BIC logLik deviance df.resid
3545.3 3574.8 -1766.7 3533.3 994
Scaled residuals:
Min 1Q Median 3Q Max
-3.1792 -0.6596 -0.0057 0.6784 3.5150
Random effects:
Groups Name Variance Std.Dev. Corr
subj (Intercept) 0.250 0.500
x 0.090 0.300 0.00
Residual 1.999 1.414
Number of obs: 1000, groups: subj, 26
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.000000 0.049771 20.09
x 42.000000 0.008121 5171.53
Correlation of Fixed Effects:
(Intr)
x 0.117
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
simr random effects II
# this is on the variance/covariance scalesdcovar <-diag(subj_sd)sdcovar[1,2] <-0.1# only need to fill in the upper triangle for simr# sdcor2cov is an experimental function in lme4VarCorr(mm2) <-sdcor2cov(sdcovar)summary(mm2)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: y ~ 1 + x + (1 + x | subj)
Data: dat
AIC BIC logLik deviance df.resid
3545.3 3574.8 -1766.7 3533.3 994
Scaled residuals:
Min 1Q Median 3Q Max
-3.1792 -0.6596 -0.0057 0.6784 3.5150
Random effects:
Groups Name Variance Std.Dev. Corr
subj (Intercept) 0.250 0.500
x 0.090 0.300 0.10
Residual 1.999 1.414
Number of obs: 1000, groups: subj, 26
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.000000 0.049771 20.09
x 42.000000 0.008121 5171.53
Correlation of Fixed Effects:
(Intr)
x 0.117
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: y ~ 1 + x + (1 + x | subj)
Data: dat
AIC BIC logLik deviance df.resid
3747.4 3776.9 -1867.7 3735.4 994
Scaled residuals:
Min 1Q Median 3Q Max
-3.4317 -0.6577 0.0009 0.6404 3.1423
Random effects:
Groups Name Variance Std.Dev. Corr
subj (Intercept) 0.1669 0.4086
x 0.1048 0.3237 0.45
Residual 2.1342 1.4609
Number of obs: 1000, groups: subj, 26
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.0141 0.0927 10.94
x 42.0402 0.0640 656.84
Correlation of Fixed Effects:
(Intr)
x 0.388
What about when we have more complex data?
listeners (subjects) listened to speakers make either a positive or a negative statement
each statement (item) appeared either as a positive or a negative statement
we only had a small number of speakers, so modelling speaker-age or speaker-idiosyncraticities (by-speaker random effects) probably won’t be meaningful or useful
however, we had more than one speaker of each gender, so speaker-gender and speaker-idiosyncraticity isn’t completely confounded
we had a fairly large number of speakers
we had a fairly large number of items
how do speaker attributes, listener attributes, and the type of statement interact to influence listener response times in some task?
set.seed(42)# ==== Generate the Design ===# generate our sentencesn_sentences <-60sentences <-data.frame(sentence=sprintf("sent%02d",1:n_sentences),# balanced: half positive, half negativepositive_statement=sample(c("yes", "no"), n_sentences,replace=TRUE, prob=c(0.5, 0.5)))# generate our speakers and determine which speakers spoke which sentencesn_spkrs <-4# all speakers produced all sentencesspkrs_sentences <-expand.grid(spkr=sprintf("spkr%02d",1:n_spkrs),sentence=sprintf("sent%02d",1:n_sentences))# speakers only produced half of all sentencesspkrs_sentences_1 <-expand.grid(spkr=sprintf("spkr%02d",1:(n_spkrs/2)),sentence=sprintf("sent%02d",1:(n_sentences/2)))spkrs_sentences_2 <-expand.grid(spkr=sprintf("spkr%02d",(n_spkrs/2+1):n_spkrs),sentence=sprintf("sent%02d",(n_sentences/2+1):n_sentences))spkrs_sentences <-rbind(spkrs_sentences_1, spkrs_sentences_2)spkrs <-data.frame(spkr=sprintf("spkr%02d",1:n_spkrs),spkr_age=round(runif(n_spkrs,min=18,max=50)),# keeping it simple for nowspkr_gender=sample(c("female", "male"), n_spkrs,replace=TRUE, prob=c(0.5, 0.5)))spkrs_sentences <-left_join(spkrs, spkrs_sentences, by="spkr")stimuli <-left_join(spkrs_sentences, sentences, by="sentence")# generate our subjects and match them to speakersn_subjs <-35# odd number! need integer division (%/%) latersubjs <-data.frame(subj=sprintf("subj%02d",1:n_subjs),subj_age=round(runif(n_subjs,min=18,max=28)),# keeping it simple for nowsubj_gender=sample(c("female", "male"), n_subjs,replace=TRUE, prob=c(0.5, 0.5)))subjs_sentences_1 <-expand.grid(spkr=sprintf("spkr%02d",1:(n_spkrs/2)),subj=sprintf("subj%02d",1:(n_subjs %/%2)))subjs_sentences_2 <-expand.grid(spkr=sprintf("spkr%02d",(n_spkrs/2+1):n_spkrs),subj=sprintf("subj%02d",(n_subjs %/%2+1):n_subjs))subjs_sentences <-rbind(subjs_sentences_1, subjs_sentences_2)subjs_sentences <-left_join(subjs, subjs_sentences, by="subj")experiment <-left_join(subjs_sentences, stimuli, by="spkr")# === The Ground Truth Model ===true_form <- rt ~1+ positive_statement * spkr_gender * subj_age * subj_gender +# listeners vary in their response to speaker attributes and statement polarity (1+ positive_statement * spkr_age * spkr_gender| subj) +# sentences have some idiosyncratic aspects that lead to different base RTs# but don't otherwise differ based on speaker/listener features (1| sentence) # +# speakers vary in the RT they elicit for a particular polarity, e.g.,# by having different levels of associated affect/intonation# but with so few speakers, we can't even begin to model this# (1 + positive_statement | spkr)# set our contrasts, because nothing make sense without it!experiment$positive_statement <-factor(experiment$positive_statement)contrasts(experiment$positive_statement) <-contr.Sum(levels(experiment$positive_statement))experiment$subj_gender <-factor(experiment$subj_gender)contrasts(experiment$subj_gender) <-contr.Sum(levels(experiment$subj_gender))experiment$spkr_gender <-factor(experiment$spkr_gender)contrasts(experiment$spkr_gender) <-contr.Sum(levels(experiment$spkr_gender))# let's re-center our ages so that we're modelling the effect at age 25# instead of at age 0, when we look at the slopes and the interceptsexperiment$subj_age <- experiment$subj_age -25experiment$spkr_age <- experiment$spkr_age -25# fixed effects, to get the coefficient names in the right order, use:mmat <-model.matrix(~1+ positive_statement * spkr_gender * subj_age * subj_gender, data=experiment)print(colnames(mmat)) # 16 coefficients!
# everything in millisecondsbeta <-c(300, # (Intercept) 300ms base RT+25, # positive_statement[S.no] 25ms slower RT than average for negative statements (=50ms slower than positive)+10, # spkr_gender[S.female] 10ms slower RT than average for female speakers5, # subj_age 5ms/year slower RT at age 26 (center+1) vs age 25 (center)7, # subj_gender[S.female] 7ms slower RT than average for female listeners3, # positive_statement[S.no]:spkr_gender[S.female]1, # positive_statement[S.no]:subj_age2, # spkr_gender[S.female]:subj_age-4, # positive_statement[S.no]:subj_gender[S.female]1, # spkr_gender[S.female]:subj_gender[S.female]-1, # subj_age:subj_gender[S.female]3, # positive_statement[S.no]:spkr_gender[S.female]:subj_age-2, # positive_statement[S.no]:spkr_gender[S.female]:subj_gender[S.female]-3, # positive_statement[S.no]:subj_age:subj_gender[S.female]-5, # spkr_gender[S.female]:subj_age:subj_gender[S.female]0.5# positive_statement[S.no]:spkr_gender[S.female]:subj_age:subj_gender[S.female])# residual standard deviation is 25ms# this means that most trials are within ±50ms of the meansig <-25# we don't need to define n explicitly -- this emerged naturally from our design!# for now, we just let the RE correlations be zero because that makes our lives a lot# easier and I have no idea what plausible values for the correlations are# subject RE: (1 + positive_statement * spkr_age * spkr_gender| subj)subj_mmat <-model.matrix(~1+ positive_statement * spkr_age * spkr_gender, experiment)print(colnames(subj_mmat))
subj_sd <-c(20, # (Intercept)5, # positive_statement[S.no]1, # spkr_age2, # spkr_gender[S.female]1, # positive_statement[S.no]:spkr_age1, # positive_statement[S.no]:spkr_gender[S.female]1, # spkr_age:spkr_gender[S.female]1# positive_statement[S.no]:spkr_age:spkr_gender[S.female]))subj_sd <-diag(subj_sd)# sentence RE: (1|sentence)sentence_sd <-as.matrix(5)# speaker RE: (1 + positive_statement | spkr)spkr_sd <-c(5, # (Intercept)3# positive_statement[S.no])spkr_sd <-diag(spkr_sd)# there's a bug in simr -- if you get an error or things don't align,# then try re-arranging the order so that things are ordered by decreasing number of levels:# sentences (60), subjects (35), speakers (4)# can't include speakers here because there are just too few to do anything meaningfulmm_sim <-makeLmer(true_form, beta,list(sentence=sdcor2cov(sentence_sd),subj=sdcor2cov(subj_sd)),#spkr=sdcor2cov(spkr_sd)), sig, experiment)# check to make sure things match up with your settings aboveprint(VarCorr(mm_sim))
# note if you change your design/setup, you can use the newdata argument to# simulate with the model you have instead of needing to use makeLmermod againexperiment$rt <-simulate(mm_sim)[, 1]