****************************************************************************************
**** ****
**** Stata-program (do-file) associated with the lectures on day three. ****
**** ****
****************************************************************************************
* Specifying the path to where the data are, for example
cd "C:\ANOVA\"
****** Growth of preadolescent girls from 6 to 10 years of age ******
* Reading in the data.
use "height.dta", clear
* Plotting the individual curves.
scatter height age, connect(L) sort(age) by(girl)
scatter height age, connect(L) sort(girl age) by(mother)
* Plotting the mean curves.
egen groupmean = mean(height), by(mother age)
sort mother age girl
twoway (connected groupmean age if mother==1, msymbol(Oh)) ///
(connected groupmean age if mother==2, msymbol(O)) ///
(connected groupmean age if mother==3, msymbol(+)), ///
legend(label(1 "Short") label(2 "Medium") label(3 "Tall")) ///
ytitle("Mean height (cm)")
* The random coefficient model. The systematic part of the model (before the ||)
* includes group specific intercepts and slopes. The random part (after
* the ||) specifies that for each subject (uniquely identified by girl)
* we want a random shift of the intercept and slope. Note that it is
* not necessary to indicate the intercept explicitly.
mixed height bn.mother bn.mother#c.age, nocons || girl: age, cov(un) reml
* Testing the hypothesis of equal slopes in all three groups.
test 1.mother#c.age = 2.mother#c.age = 3.mother#c.age
* Pairwise comparisons of slopes
mixed height bn.mother bn.mother#c.age, nocons || girl: age, cov(un) reml
pwcompare mother#c.age, eff
* Plotting the individual curves with the best straight line.
twoway (scatter height age, sort(age) by(girl)) (lfit height age, sort(age) by(girl))
* Plotting the standardized residuals against the fitted values and a QQ-plot
* for the standardized residuals.
predict fit, fitted
predict res, rstandard
scatter res fit, name(g1,replace)
qnorm res, name(g2,replace)
graph combine g1 g2
* Correlations between the height measurements for each group.
drop groupmean fit res
reshape wide height, i(girl) j(age)
bysort mother: corr height6 height7 height8 height9 height10, means
* corr is used since we have no missing observations. If there
* are missing observations use pwcorr and tabstat.
* The random coefficient model with age - 6 as the covariate.
reshape long height, i(girl) j(age)
generate age6 = age - 6
mixed height bn.mother bn.mother#c.age6, nocons || girl: age6, cov(un) reml
pwcompare mother, eff
****** Orthodontic measurement over time for boys and girls. **********
* Reading in the data.
use "distance.dta", clear
* In order to do the random coefficient model we need the data in a "long" format.
reshape long dist, i(id2) j(age)
* Plotting the individual curves.
scatter dist age, connect(L) sort(age) by(id2)
scatter dist age, connect(L) sort(sex id2 age) by(sex)
* Plotting the mean curves.
egen groupmean = mean(dist), by(sex age)
sort sex age id
twoway (connected groupmean age if sex==1, msymbol(Oh)) ///
(connected groupmean age if sex==2, msymbol(O)), ///
legend(label(1 "Boys") label(2 "Girls")) ///
ytitle("Mean distance (mm)")
* Plotting the mean curve with the best straight line for each sex
twoway (scatter groupmean age if sex==1, msymbol(Oh)) ///
(scatter groupmean age if sex==2, msymbol(O)) ///
(lfit groupmean age if sex==1) ///
(lfit groupmean age if sex==2), ///
legend(label(1 "Boys") label(2 "Girls")) ///
ytitle("Mean distance (mm)")
* The random coefficient model (with the same between subject variation
* for boys and girls). The systematic part of the model (before the ||)
* includes sex specific intercepts and slopes. The random part (after
* the ||) specifies that for each subject (uniquely identified by id2)
* we want a random shift of the intercept and slope. Finally, we specify
* different within subject variation for boys and girls.
generate age11 = age - 11
mixed dist bn.sex bn.sex#c.age11, nocons || id2: age11, residual(independent, t(age11) by(sex)) reml
* Plotting the individual curves with the best straight line.
twoway (scatter dist age, sort(age) by(id2)) (lfit dist age, sort(age) by(id2))
* Plotting the standardized residuals against the fitted values and a QQ-plot
* for the standardized residuals.
predict fit, fitted
predict res, rstandard
scatter res fit, name(g1,replace)
qnorm res, name(g2,replace)
graph combine g1 g2
****** Diet and plasma glucose for diabetic patients. **********
* Reading in the data.
use "diet.dta", clear
* Plotting the individual curves.
scatter pgluc time, connect(L) sort(breakfast time) by(pt) msy(i)
scatter pgluc time, connect(L) sort(breakfast pt time) by(breakfast)
* Plotting the mean curves.
egen groupmean = mean(pgluc), by(breakfast time)
sort breakfast time pt
twoway (connected groupmean time if breakfast==0, msymbol(Oh)) ///
(connected groupmean time if breakfast==1, msymbol(O)), ///
legend(label(1 "No breakfast") label(2 "Breakfast")) ///
ytitle("Mean plasma glucose (mmol/l)")
* Analysis of summary statistics (the mean and sd)
egen mpgluc = mean(pgluc), by(pt day)
egen sdpgluc = sd(pgluc), by(pt day)
drop groupmean
reshape wide pgluc, i(pt day) j(time)
anova mpgluc order/pt day breakfast
pwcompare breakfast, eff
anova sdpgluc order/pt day breakfast
pwcompare breakfast, eff
* Before lunch
use diet.dta, clear
keep if time<5
egen mpglucbl = mean(pgluc), by(pt day)
egen sdpglucbl = sd(pgluc), by(pt day)
reshape wide pgluc, i(pt day) j(time)
anova mpglucbl order/pt day breakfast
pwcompare breakfast, eff
anova sdpglucbl order/pt day breakfast
pwcompare breakfast, eff
* After lunch
use diet.dta, clear
keep if time>4
egen mpglucal = mean(pgluc), by(pt day)
egen sdpglucal = sd(pgluc), by(pt day)
reshape wide pgluc, i(pt day) j(time)
anova mpglucal order/pt day breakfast
pwcompare breakfast, eff
anova sdpglucal order/pt day breakfast
pwcompare breakfast, eff
** Analysis of all the data
use diet.dta, clear
anova pgluc order /pt day breakfast /pt|day time breakfast#time, bse(pt#day) repeated(time)
predict res, residual
qnorm res
* Test for reduction to the univariate repeated measurement model
drop res
reshape wide pgluc, i(pt day) j(time)
mvtest cov pgluc0 pgluc1 pgluc2 pgluc3 pgluc4 pgluc5 pgluc6 pgluc7 pgluc8 pgluc9 pgluc10 pgluc11 pgluc12 pgluc19 if breakfast==1, comp
mvtest cov pgluc0 pgluc1 pgluc2 pgluc3 pgluc4 pgluc5 pgluc6 pgluc7 pgluc8 pgluc9 pgluc10 pgluc11 pgluc12 pgluc19 if breakfast==0, comp
* The analysis using mixed
reshape long pgluc, i(pt day) j(time)
mixed pgluc bn.order bn.day bn.breakfast bn.time bn.breakfast#bn.time || pt: || day: , reml
* Test for the same development over time in the two diet groups.
mixed pgluc bn.order bn.day bn.breakfast bn.time bn.breakfast#bn.time || pt: || day:
estimates store m1
mixed pgluc bn.order bn.day bn.breakfast bn.time || pt: || day:
estimates store m2
lrtest m1 m2