version 8 capture log close set matsize 500 set more off set scheme s2mono log using st8ch4, text replace // * // * RM4STATA Ch 4: Models for Binary Outcomes - 5/26/2003 // * // * Section 4.2: estimation using -logit- and -probit- use binlfp2, clear desc lfp k5 k618 age wc hc lwg inc summarize lfp k5 k618 age wc hc lwg inc logit lfp k5 k618 age wc hc lwg inc, nolog estimates store logit probit lfp k5 k618 age wc hc lwg inc, nolog estimates store probit estimates table logit probit, b(%9.3f) t label varwidth(30) // * Section 4.2.1: predicting perfectly (using artificial data) use science2, clear gen college = 1-mmale gen vote = pub1>10 tab vote college logit vote college phd, nolog // * Section 4.3.1: testing individual coefficients use binlfp2, clear logit lfp k5 k618 age wc hc lwg inc, nolog * Wald test test k5 display sqrt(55.14) * LR test logit lfp k5 k618 age wc hc lwg inc, nolog estimates store fmodel logit lfp k618 age wc hc lwg inc, nolog estimates store nmodel lrtest fmodel nmodel // * Section 4.3.2: testing multiple coefficients logit lfp k5 k618 age wc hc lwg inc, nolog * Wald tests test hc wc test hc=wc * LR tests logit lfp k5 k618 age wc hc lwg inc, nolog estimates store fmodel logit lfp k5 k618 age lwg inc, nolog estimates store nmodel lrtest fmodel nmodel logit lfp, nolog estimates store intercept_only lrtest fmodel intercept_only // * Section 4.4: residuals and influence using -predict- * create artificial data clear set seed 315 set obs 34 gen y = (uniform()*4) in 1/30 gen x = (4 - (y)) + (uniform()) in 1/30 * change one point replace y = 9 in 31 sum x sca meanx = r(mean) replace x = meanx in 31 * compute the regression reg y x * change two points replace x = 0 in 32 replace x = 5 in 33 * compute predictions predict yhat in 32/33 predict res in 32/33, res gen y3 = 9 in 31 set textsize 150 drop yhat res predict yhat predict res , res local yout = y[31] local xout = x[31] * graph outlier that is not influential graph twoway (scatter y x) (line yhat x) /// (scatteri `yout' `xout' (3) "outlier"), /// title("Large outlier that is not influential") /// legend( order( 2 ) label(2 "Regression line") ) /// ytitle("y") xlabel( 0 5 10) name(reg1, replace) * change the data replace y = 6 in 31 replace x = 29.5 in 31 replace x = 30 in 34 local yout = y[31] local xout = x[31] * compute the regression reg y x predict yhat2 replace yhat2 = . in 2/31 * graph the results graph twoway (scatter y x) (line yhat2 x, sort clpattern(dash)) /// (line yhat x if x<25, sort) /// (scatteri `yout' `xout' (9) "influential observation ", /// msymbol(S) mlabgap(*3)), /// title("Smaller outlier that is influential") /// legend( order( 2 3) label(2 "New Regression") /// label(3 "Old regression") ) ytitle("y") name(reg2,replace) graph combine reg1 reg2, col(1) ysize(4.31) xsize(3.287) iscale(*.9) graph export 04residstata.eps, replace // * Section 4.4.1: residuals use binlfp2, clear logit lfp k5 k618 age wc hc lwg inc, nolog predict rstd, rs label var rstd "Standardized Residual" sort inc, stable generate index = _n label var index "Observation Number" * index plot of std pearson residuals graph twoway scatter rstd index, xlabel(0(200)800) ylabel(-4(2)4) /// xtitle("Observation Number") yline(0) msymbol(Oh) /// ysize(2.7051) xsize(4.0413) graph export 04rstd.eps , replace * index plot of std pearson residuals labeled with the index graph twoway scatter rstd index, xlabel(0(200)800) ylabel(-4(2)4) /// xtitle("Observation Number") yline(0) /// msymbol(none) mlabel(index) mlabposition(0) /// ysize(2.7051) xsize(4.0413) graph export 04rstdcase.eps , replace * list a single point list in 142, noobs * list large outliers list rstd index if rstd>2.5 | rstd<-2.5 // * Section 4.4.2: influential cases predict cook,dbeta label var cook "Cook's Statistic" graph twoway scatter cook index, xlabel(0(200)800) ylabel(0(.1).3) /// xtitle("Observation Number") yline(.1 .2) /// msymbol(none) mlabel(index) mlabposition(0) /// ysize(2.7051) xsize(4.0413) graph export 04cookcase.eps , replace // * Section 4.5: scalar measures of fit quietly logit lfp k5 k618 age wc hc lwg inc, nolog estimates store model1 quietly fitstat, save gen agesq = age*age logit lfp k5 age agesq wc inc, nolog estimates store model2 estimates table model1 model2, b(%9.3f) t fitstat, dif // * Section 4.6.1: predicted probabilities with predict * predictions from -logit- use binlfp2, clear logit lfp k5 k618 age wc hc lwg inc, nolog predict prlogit summarize prlogit label var prlogit "Logit: Pr(lfp)" dotplot prlogit, ylabel(0(.2)1) /// ysize(2.7051) xsize(4.0413) graph export 04dotpredict.eps, replace * comparing -logit- and -probit- predictions use binlfp2, clear logit lfp k5 k618 age wc hc lwg inc, nolog predict prlogit label var prlogit "Logit: Pr(lfp)" probit lfp k5 k618 age wc hc lwg inc, nolog predict prprobit label var prprobit "Probit: Pr(lfp)" pwcorr prlogit prprobit * graphing predicted probabilities from -logit- and -probit- graph twoway scatter prlogit prprobit, /// xlabel(0(.25)1) ylabel(0(.25)1) /// xline(.25(.25)1) yline(.25(.25)1) /// plotregion(margin(zero)) msymbol(Oh) /// ysize(4.0413) xsize(4.0413) graph export 04logitprobit.eps, replace // * Section 4.6.2: individual predicted probabilities with -prvalue- logit lfp k5 k618 age wc hc lwg inc, nolog * young, low income, low education families with young children. prvalue, x(age=35 k5=2 wc=0 hc=0 inc=15) rest(mean) * highly education families with no children at home. prvalue, x(age=50 k5=0 k618=0 wc=1 hc=1) rest(mean) * an average person prvalue, rest(mean) // * Section 4.6.3: tables of predicted probabilities with -prtab- logit lfp k5 k618 age wc hc lwg inc, nolog * using -prvalue- prvalue, x(k5=0 wc=0) rest(mean) brief prvalue, x(k5=1 wc=0) rest(mean) brief prvalue, x(k5=2 wc=0) rest(mean) brief prvalue, x(k5=3 wc=0) rest(mean) brief * or use -prtab- prtab k5 wc, rest(mean) * compute the differences: di 0.6069-0.7758 di 0.2633-0.4449 di 0.0764-0.1565 di 0.0188-0.0412 // * Section 4.6.4: graphing predicted probabilities with -prgen- * compute predictions at different ages * age 30 prgen inc, from(0) to(100) generate(p30) x(age=30) rest(mean) n(11) label var p30p1 "Age 30" * age 40 prgen inc, from(0) to(100) generate(p40) x(age=40) rest(mean) n(11) label var p40p1 "Age 40" * age 50 prgen inc, from(0) to(100) generate(p50) x(age=50) rest(mean) n(11) label var p50p1 "Age 50" * age 60 prgen inc, from(0) to(100) generate(p60) x(age=60) rest(mean) n(11) label var p60p1 "Age 60" * -list- and -graph- predictions list p30p1 p40p1 p50p1 p60p1 p60x in 1/11 graph twoway connected p30p1 p40p1 p50p1 p60p1 p60x, /// ytitle("Pr(In Labor Force)") ylabel(0(.25)1) /// xtitle("Income") /// ysize(2.7051) xsize(4.0413) graph export 04ageincome.eps, replace * another example of -prgen- comparing those who do and do not attend college // This example is not in the book prgen age, from(30) to(60) generate(wc1) x(wc=1) rest(mean) n(13) label var wc1p1 "Attended College" prgen age, from(30) to(60) generate(wc0) x(wc=0) rest(mean) n(13) label var wc0p1 "Did Not Attend College" graph twoway connected wc1p1 wc0p1 wc1x, /// xtitle("Age") ytitle("Pr(In Labor Force)") /// ylabel(0(.25)1) xlabel(30(10)60) // * Section 4.6.5: changes in predicted probabilities * discrete change with -prchange- prchange age, x(wc=1 age=40) mfx compute, at(wc=1 age=40) prchange, help prchange k5 age wc lwg inc, fromto * discrete change using -prvalue- prvalue, x(age=30) save brief prvalue, x(age=40) dif brief * discrete change using -prchange- with -delta- and -uncentered- options prchange age, x(age=30) uncentered delta(10) rest(mean) brief // * Section 4.7: odds ratios using -listcoef- listcoef, help listcoef, reverse listcoef, percent capture log close