*******************************************************************************
**** ****
**** Stata-program (do-file) associated with the lectures on day four. ****
**** ****
*******************************************************************************
* Specifying the path to where the data are, for example
cd "C:\ANOVA\"
******* Comparison of ear thickness of wild-type and knockout mice. *******
* Reading in the data.
use mouse.dta, clear
* Making a scatter plot of the data and connecting the points corresponding to
* the same mouse.
scatter thickness ear, connect(L) sort(mouse ear) by(type)
* Random effects model can be fitted using 'mixed'. Before the '||' the fixed
* effects are specified. Random effects are specified after the '||' (in case
* of more than one random effect they would be separated by '||'). It is also
* possible to have covariates associated with a random effect as we saw in the
* situation with the random coefficient model. In that case they would be
* written after the ':'. In this example we do not have any. The option 'var'
* asks Stata to print the estimated variances (instead of the estimated
* standard deviations). The option 'dfmethod(kroger)' along with the 'small'
* option to 'pwcompare' secures that we get exact F-test instead of the
* approximate chi-square-tests.
mixed thickness bn.type, nocons || mouse: , reml var dfmethod(kroger)
pwcompare type, eff small
* The correlation between observations corresponding to two ears on the same
* mouse is calculated using 'estat icc'.
estat icc
* Analysis of mouse means. In order to calculate the mean for each mouse, we
* create one variable for each ear ('thickness1' and 'thickness2').
reshape wide thickness, i(mouse) j(ear)
generate meanthick = 0.5*(thickness1 + thickness2)
* The mouse means for the two types can be compared using a two-sample t-test.
ttest meanthick, by(type)
******* 5-repetition sit-to-stand test in MS patients. *******
use sts.dta, clear
* Making a scatter plot of the data and connecting the points corresponding to
* the same patient on the same day.
scatter time trial, connect(L) sort(patient trial) by(day)
* Plotting the mean curves.
egen groupmean = mean(time), by(day trial)
sort trial day patient
twoway (connected groupmean day if trial==1, msymbol(Oh)) ///
(connected groupmean day if trial==2, msymbol(O)), ///
legend(label(1 "Trial 1") label(2 "Trial 2")) ///
ytitle("Mean 5STS-time (sec)")
* Analysis of the data using a random effects analysis. Including two '||'
* means that we get a random effect of patient and day within patient. The
* option 'dfmethod(kroger)' along with the 'small' option to 'testparm' secures
* that we get exact F-test instead of the approximate chi-square-tests.
mixed time bn.day##bn.trial, nocons || patient: || day: , reml dfmethod(kroger)
testparm bn.day#bn.trial, small
* As the interaction between day and trial was not really significant, we
* proceed with the additive model
mixed time bn.day bn.trial || patient: || day: , reml dfmethod(kroger)
testparm bn.day, small
testparm bn.trial, small
* Model validation plotting the standardized residuals against the fitted
* values and making a QQ-plot og the standardized residuals.
predict fit, fitted
predict res, rstandard
scatter res fit, name(g1,replace)
qnorm res, name(g2,replace)
graph combine g1 g2
* Analysis on the log-scale
generate ltime = log(time)
* The same analysis as before now just on the log-scale
mixed ltime bn.day##bn.trial, nocons || patient: || day: , reml dfmethod(kroger)
testparm bn.day#bn.trial, small
drop fit res
predict fit, fitted
predict res, rstandard
scatter res fit, name(g1,replace)
qnorm res, name(g2,replace)
graph combine g1 g2
margins day#trial
marginsplot
mixed ltime bn.day bn.trial || patient: || day: , reml dfmethod(kroger)
testparm bn.day, small
testparm bn.trial, small
* Pairwise comparison of the three days and a comparison of the two trials.
pwcompare day trial, eff eform small
****** Cardiac output for CHF patients and healthy during exercise. ********
* Reading in the data.
use cardiac.dta, clear
* Plotting the individual curves.
scatter co workload, connect(L) sort(workload) by(group subject)
* Plotting the individual curves grouped by group
scatter co workload, connect(L) sort(subject workload) by(group)
* Plotting the mean curves.
egen groupmean = mean(co), by(workload group)
sort group workload subject
twoway (connected groupmean workload if group==1, msymbol(Oh)) ///
(connected groupmean workload if group==2, msymbol(O)), ///
legend(label(1 "Healthy") label(2 "CHF")) ///
ytitle("Mean cardiac output (l/min)")
* The standard deviations and correlations in the two groups.
drop groupmean
reshape wide co, i(group subject) j(workload)
bysort group: pwcorr co0 co30 co60 co90 co120
tabstat co0 co30 co60 co90 co120, nototal by(group) stat(sd)
* The multivariate repeated measurements analysis using mixed so
* that Stata does not throw away any subjects. The cov(un) specifies
* an unstructured correlation matrix corresponding to measurements
* on the same subject.
reshape long co, i(group subject) j(workload)
* Multivariate model with different curves in the two groups. The
* model is saved so that we can compare it to other models using
* a likelihood ratio test. subj is a unique identifier of the subject.
egen subj = group(group subject)
xi: mixed co bn.workload#bn.group, nocons || subj: i.workload, mle cov(un) ///
technique(bfgs)
estimates store model1
* Multivariate model with parallel curves in the two groups.
xi: mixed co bn.workload bn.group || subj: i.workload, mle cov(un) ///
technique(bfgs)
estimates store model2
* Likelihood ratio test for parallel curves in the two groups (Test 1).
lrtest model1 model2
* QQ-plot of the standardized residuals.
xi: mixed co bn.workload#bn.group, nocons || subj: i.workload, cov(un) ///
technique(bfgs)
predict res if e(sample), rstandard
qnorm res
*** The following test hypotheses about equal sd's and correlations in the
*** two groups
mixed co bn.workload#bn.group, nocons || subj: , nocons ///
residual(un, t(workload) by(group)) reml stddev
estimates store m1a
xi: mixed co bn.workload#bn.group, nocons || subj: i.workload, cov(un) reml
estimates store m2a
lrtest m1a m2a
* Analysis without workload 0
* Testing the hypothesis that the sd's and correlations are the same in the
* two groups
mixed co bn.workload#bn.group if workload>0 || subj: , nocons ///
residual(un, t(workload) by(group)) reml
estimates store m1b
xi: mixed co bn.workload#bn.group if workload>0 || subj: i.workload, ///
cov(un) technique(bfgs) reml
estimates store m2b
lrtest m1b m2b
* The multivariate repeated measurements model with parallel curves
xi: mixed co bn.workload#bn.group if workload>0 || subj: i.workload, ///
cov(un) technique(bfgs)
estimates store m4b
xi: mixed co bn.workload bn.group if workload>0 || subj: i.workload, ///
cov(un) technique(bfgs)
estimates store m5b
lrtest m4b m5b
* The multivariate model with unequal standard errors and correlations.
mixed co bn.workload#bn.group, nocons || subj: , nocons ///
residual(un, t(workload) by(group))
estimates store m1
* The multivariate model with unequal standard errors and correlations but
* with parallel mean curves
mixed co bn.workload bn.group || subj: , nocons ///
residual(un, t(workload) by(group))
estimates store m2
lrtest m1 m2
* Analyses with unequal standard deviations and correlations
mixed co bn.workload#bn.group, nocons || subj: , nocons ///
residual(un, t(workload) by(group)) reml stddev
* Estimated means with 95%-CI in a plot
margins workload#group
marginsplot
* All pairwise comparisons
pwcompare workload#group
* Alternatively, estimates of group differences at each workload.
contrast group@workload, eff
* Estimated differences between group differences
lincom (30.workload#1.group - 30.workload#2.group) - ///
(0.workload#1.group - 0.workload#2.group)
lincom (60.workload#1.group - 60.workload#2.group) - ///
(30.workload#1.group - 30.workload#2.group)
lincom (90.workload#1.group - 90.workload#2.group) - ///
(60.workload#1.group - 60.workload#2.group)
lincom (120.workload#1.group - 120.workload#2.group) - ///
(90.workload#1.group - 90.workload#2.group)
** Estimates of the successive workload differences for each group
contrast a.workload@group, eff
* Estimated increase (from rest) in cardiac output in a plot
contrast workload@group, eff
margins r.workload@group
marginsplot