************************************************* * Linear and Logistic Regression * Ph. D course , Aarhus University * copyright: Morten Frydenberg * Department of Biostatistics * * Most of the STATA code used at * DAY 1 lecture 2 * Stat11 ************************************************* cd E:\KURSER\PostReg\Version6\Lectures\Day1 capture log close log using v11RegDay1_2.log,text replace use lung,clear ************************************************** * Finding the estimates ************************************************** regress PEFR height ************************************************** * Finding the fitted values and the residuals * based on the last model fitted and * the subdata set it was fitted "if e(sample)" ************************************************** predict PEFR_hat if e(sample),xb predict PEFR_res if e(sample),resid ************************************************** * Residuals versus fitted PLOT01 ************************************************** scatter PEFR_res PEFR_hat /// , mco(blue) msym(O) yline(0) ylabel(-300(100)300) graph export Reg1_2_plot01.wmf,replace ************************************************** * Residuals versus independent variable PLOT02 ************************************************** scatter PEFR_res height /// , mco(blue) msym(O) yline(0) ylabel(-300(100)300) graph export Reg1_2_plot02.wmf,replace ************************************************** * Histgram and qqplot of the residuals * The two plotss are saved and combined into one graph * PLOT03 ************************************************** hist PEFR_res,norm freq name(p1, replace) qnorm PEFR_res,mco(black) name(p2, replace) graph combine p1 p2 graph export Reg1_2_plot03.wmf,replace ************************************************** * A new data set ************************************************** use gfrdata, clear ************************************************** * GFR versus Cr * Observe data and fitted lien PLOT04 ************************************************** twoway /// (scatter gfr cr, mco(blue) msym(O)) /// (lfit gfr cr,clpat(1)) /// ,legend(off) ytit("GFR (ml/min)") graph export Reg1_2_plot04.wmf,replace ************************************************* * Plot of diagnostics PLOT05 ************************************************* regress gfr cr predict gfr_hat1 if e(sample),xb predict gfr_res1 if e(sample),resid hist gfr_res1,norm freq name(p1, replace) qnorm gfr_res1,mco(black) name(p2, replace) scatter gfr_res1 gfr_hat1, mco(blue) msym(O) /// yline(0) ylabel(-100(50)100) name(p3, replace) scatter gfr_res1 cr, mco(blue) msym(O) /// yline(0) ylabel(-100(50)100) name(p4, replace) graph combine p1 p2 p3 p4 graph export Reg1_2_plot05.wmf,replace ************************************************** * GFR versus 1/Cr * ************************************************** generate invCr=1/cr label variable invCr " 1/Cr (100ml/mg) " twoway /// (scatter gfr invCr, mco(blue) msym(O)) /// (lfit gfr invCr,clpat(1)) /// ,legend(off) ytit("GFR (ml/min)") graph export Reg1_2_plot06.wmf,replace regress gfr invCr predict gfr_hat2 if e(sample),xb predict gfr_res2 if e(sample),resid hist gfr_res2,norm freq name(p1, replace) qnorm gfr_res2,mco(black) name(p2, replace) scatter gfr_res2 gfr_hat2, mco(blue) msym(O) /// yline(0) ylabel(-100(50)100) name(p3, replace) scatter gfr_res2 invCr, mco(blue) msym(O) /// yline(0) ylabel(-100(50)100) name(p4, replace) graph combine p1 p2 p3 p4 graph export Reg1_2_plot07.wmf,replace ************************************************** * back to the PEFR data set * Fitted values, standardized residuals and leverage ************************************************** use lung,clear regress PEFR height predict PEFR_hat if e(sample),xb predict PEFR_zres if e(sample),rstand predict PEFR_lev if e(sample),leve twoway /// (scatter PEFR_lev PEFR_zres if e(sample), mco(blue) msym(O)) /// (scatter PEFR_lev PEFR_zres if e(sample)&number==83, mco(red) msym(O)) /// , ylabel(0 (0.25)1) xlabel(-4(1)4) xline(0) legend(off) name(p2, replace) graph export Reg1_2_plot13.wmf,replace ************************************************** * analysis of PEFR data set excluding no 83 ************************************************** use lung,clear regress PEFR height if number!=83 predict PEFR_hat if e(sample),xb predict PEFR_zres if e(sample),rstandard hist PEFR_zres,norm freq name(p1, replace) qnorm PEFR_zres,mco(black) name(p2, replace) scatter PEFR_zres PEFR_hat /// , mco(blue) msym(O) yline(0) ylabel(-3(1)3) name(p3, replace) scatter PEFR_zres height /// , mco(blue) msym(O) yline(0) ylabel(-3(1)3) name(p4, replace) graph combine p1 p2 p3 p4 graph export Reg1_2_plot14.wmf,replace ************************************************** * A scatter plot with fitted line and PI ************************************************** twoway /// (scatter PEFR height, mco(blue) msym(O)) /// (lfitci PEFR height, stdf clpat(1) cip(rline) ) /// ,legend(off) ytit("PEFR (l/min)") graph export Reg1_2_plot15.wmf,replace log close