/************************************************************************************** Stata script for "Regression Models for Binary Dependent Variables" and http://www.indiana.edu/~statmath/stat/all/cdvm/index.html Created on 09/17/2005 Last Modified on September 15, 2009 Written by Hun Myoung Park, Ph.D. Statistical Software Analyst and Programmer UITS Center for Statistical and Mathematical Computing, Indiana University 535 West Michigan Street IT417 Indianapolis, IN 46202 317-274-0573 kucc625@indiana.edu http://www.masil.org University Information Technology Services, Indiana University Center for Statistical and Mathematical Computing http://www.indiana.edu/~statmath First, change the default directory **************************************************************************************/ cd c:\temp\cdvm net from http://www.indiana.edu/~jslsoc/stata/ net install spost9_ado, replace net get spost9_do, replace use http://www.indiana.edu/~statmath/stat/all/cdvm/gss_cdvm.dta, clear sum trust belief educate income age male www, sep(10) tab trust male, miss tab trust www, miss tab male www, miss tab belief male, miss tab belief www, miss // Binary logit model use http://www.indiana.edu/~statmath/stat/all/cdvm/gss_cdvm.dta, clear logistic trust educate income age male www logit logit trust educate income age male www predict resid, residual test educate mfx, dydx at(mean educate=16 male=0 www=1) fitstat di 2*(-326.755 - (-357.926)) // factor changes in the odds listcoef, help listcoef, reverse listcoef, percent help // predicted probability prtab male www, x(educate=16 male=0 www=1) rest(mean) prvalue, x(educate=16 male=0 www=1) rest(mean) // marginal effects and discrete changes prchange, x(educate=16 male=0 www=1) rest(mean) // Draw a plot of predicted probability // use http://www.indiana.edu/~statmath/stat/all/cdvm/gss_cdvm.dta, clear quietly logit trust educate income age male www global points = 20 global group1 = "Logit_ed11" global group2 = "Logit_ed10" global group3 = "Logit_ed01" global group4 = "Logit_ed00" prgen educate, from(0) to(20) ncases($points) x(male=1 www=1) rest(mean) gen($group1) prgen educate, from(0) to(20) ncases($points) x(male=1 www=0) rest(mean) gen($group2) prgen educate, from(0) to(20) ncases($points) x(male=0 www=1) rest(mean) gen($group3) prgen educate, from(0) to(20) ncases($points) x(male=0 www=0) rest(mean) gen($group4) rename ${group1}x x gen bygroup = 1 if x !=. replace bygroup = 0 if (_n > $points) & (x== .) replace x = x[_n-$points] if _n > $points forvalues v = 1/4 { gen cat`v' = ${group`v'}p1 } drop if _n > $points*2 gen y1 = cat1 gen y2 = cat3 replace y1=cat2[_n-$points] if y1==. replace y2=cat4[_n-$points] if y2==. label define bygroup 1 "WWW Users" 0 "WWW Non-users" label values bygroup bygroup label var y1 "Men" label var y2 "Women" label var x "Education (Years)" global min = 0 global max = 1 global step = .2 twoway /// (connected y1 x, lcolor(maroon) lpattern(solid) lwidth(medthick) connect(direct) msymbol(dh) mcolor(maroon) ) /// (connected y2 x, lcolor(blue) lpattern(solid) lwidth(medium) connect(direct) msymbol(X) mcolor(blue) ) if _n<=40, /// by(bygroup) xscale(range(0 20)) xlabel(1(2)20, grid) yscale(range($min $max)) yline(.5) ylabel($min($step)$max, grid) /// legend(region(lpattern(blank))) xtitle("Education (Years)") ytitle("Predicted Probabilities") // Binary probit model use http://www.indiana.edu/~statmath/stat/all/cdvm/gss_cdvm.dta, clear probit trust educate income age male www di _pi/sqrt(3)*.0907207 di 1.7*.0907207 probit trust educate income age male www, robust fitstat // standardized effects listcoef, help // marginal effects and discrete changes mfx, at(mean educate=16 male=0 www=1) prchange, x(educate=16 male=0 www=1) rest(mean) // predicted probabilities prtab male www, x(educate=16 male=0 www=1) rest(mean) prvalue, x(educate=16 male=0 www=1) rest(mean) // Draw a plot of predicted probabilities //use http://www.indiana.edu/~statmath/stat/all/cdvm/gss_cdvm.dta, clear probit trust educate income age male www global points = 20 global group1 = "Probit_ed11" global group2 = "Probit_ed10" global group3 = "Probit_ed01" global group4 = "Probit_ed00" prgen educate, from(0) to(20) ncases($points) x(male=1 www=1) rest(mean) gen($group1) prgen educate, from(0) to(20) ncases($points) x(male=1 www=0) rest(mean) gen($group2) prgen educate, from(0) to(20) ncases($points) x(male=0 www=1) rest(mean) gen($group3) prgen educate, from(0) to(20) ncases($points) x(male=0 www=0) rest(mean) gen($group4) rename ${group1}x x gen bygroup = 1 if x !=. replace bygroup = 0 if (_n > $points) & (x== .) replace x = x[_n-$points] if _n > $points forvalues v = 1/4 { gen cat`v' = ${group`v'}p1 // replace cat`v'=${group2}s`v'[_n-$points] if cat`v'==. } drop if _n > $points*2 gen y1 = cat1 gen y2 = cat3 replace y1=cat2[_n-$points] if y1==. replace y2=cat4[_n-$points] if y2==. label define bygroup 1 "WWW Users" 0 "WWW Non-users" label values bygroup bygroup label var y1 "Men" label var y2 "Women" label var x "Education (Years)" global min = 0 global max = 1 global step = .2 twoway /// (connected y1 x, lcolor(maroon) lpattern(solid) lwidth(medthick) connect(direct) msymbol(dh) mcolor(maroon) ) /// (connected y2 x, lcolor(blue) lpattern(solid) lwidth(medium) connect(direct) msymbol(X) mcolor(blue) ) if _n<=40, /// by(bygroup) xscale(range(0 20)) xlabel(1(2)20, grid) yscale(range($min $max)) yline(.5) ylabel($min($step)$max, grid) /// legend(region(lpattern(blank))) xtitle("Education (Years)") ytitle("Predicted Probabilities") // Bivariate probit model use http://www.indiana.edu/~statmath/stat/all/cdvm/gss_cdvm.dta, clear quietly probit www educate income age male biprobit (trust = educate income age male) (www = educate income age male) // to get AIC and BIC estat ic // conditional marginal effects mfx, predict(pcond1) at(mean educate=16 male=0 www=1) // marginal effects mfx, predict(pmarg1) at(mean educate=16 male=0 www=1) // recursive bivariate probit model biprobit (trust = educate income age male www) (www = educate income age male) // to get AIC and BIC estat ic // conditional marginal effects mfx, predict(pcond1) at(mean educate=16 male=0 www=1) // marginal effects mfx, predict(pmarg1) at(mean educate=16 male=0 www=1) mfx, predict(pmarg2) at(mean educate=16 male=0 www=1) // compute direct and indirect effects manually biprobit (trust = educate income age male www) (www = educate income age male) global rho=e(rho) // correlation coefficient of disturbances global n1 = 6 // the number of parameters in equation 1 global n2 = 5 // the number of parameters in equation 2 // get the reference points //tabstat $rhs, stat(mean) col(variable) save tabstat $rhs, stat(mean) col(stat) save matrix ref1 = r(StatTotal),I(1) // reference points for equation 1 //matrix ref1[1,1]=16 // education (college graduation) //matrix ref1[1,4]=0 // female //matrix ref1[1,5]=1 // WWW use matrix ref0 = ref1[1,1..$n2] // for display matrix ref2 = ref1[1,1..$n2] // reference points for equation 2 matrix ref2[1,$n2]=1 // get parameter estimates matrix b0=e(b) matrix b1=b0[1,1..$n1] // parameter estimates for equation 1 matrix b2=b0[1,$n1+1..$n1+$n2] // parameter estimates for equation 2 matrix xb1=b1*ref1' // compute xb1 of equation 1 matrix xb2=b2*ref2' // compute xb2 of equation 2 global xb1=xb1[1,1] // put xb1 into a global macro for computation global xb2=xb2[1,1] // put xb1 into a global macro for computation // compute the predicted probability at the reference points di binormal($xb1, $xb2, $rho)/normal($xb2) // compute direct effects global g1=normalden($xb1)*normal(($xb2-($rho)*$xb1)/sqrt(1-($rho)^2)) matrix directE=$g1/normal($xb2)*b1 matrix directE=directE[1,1..$n2] // compute indirect effects global g2=normalden($xb2)*normal(($xb1-($rho)*$xb2)/sqrt(1-($rho)^2)) matrix indirectE=($g2/normal($xb2)-(binormal($xb1,$xb2,$rho)*normalden($xb2))/(normal($xb2)^2))*b2 matrix indirectE[1,$n2]=0 // compute overall effects matrix Overall=directE+indirectE //mfx, predict(pcond1) at(mean www=1) //mfx, predict(pcond1) at(mean www=0) // to compute discrete changes // list the location of binary variables foreach bin of numlist 4 5 { matrix ref10=ref1 matrix ref10[1,`bin']=0 matrix ref11=ref1 matrix ref11[1,`bin']=1 matrix ref20=ref2 if `bin' != 5 { // if the endogenous variable, do not change the value matrix ref20[1,`bin']=0 } matrix ref21=ref2 if `bin' != 5 { // if the endogenous variable, do not change the value matrix ref21[1,`bin']=1 } matrix xb10=b1*ref10' // compute xb1 of equation 1 when d=0 matrix xb20=b2*ref20' // compute xb2 of equation 2 when d=0 matrix xb11=b1*ref11' // compute xb1 of equation 1 when d=1 matrix xb21=b2*ref21' // compute xb2 of equation 2 when d=1 global xb10=xb10[1,1] // put xb10 to a global macro xb10 global xb20=xb20[1,1] global xb11=xb11[1,1] global xb21=xb21[1,1] //di binormal($xb11, $xb21, $rho)/normal($xb21) // predicted probability when d=1 //di binormal($xb10, $xb20, $rho)/normal($xb20) // predicted probability when d=0 // discrete changes matrix dchange = binormal($xb11, $xb21, $rho)/normal($xb21)- binormal($xb10, $xb20, $rho)/normal($xb20) //di binormal($xb11, $xb21, $rho)/normal($xb21)- binormal($xb10, -($xb21), -($rho))/(1-normal($xb21)) matrix Overall[1,`bin']=dchange } // foreach // join matrices vertically matrix Marginal=ref0\directE\indirectE\Overall // add row and column names matrix coleq Marginal="" // remove equation names matrix colnames Marginal = Education Income Age Male WWW matrix rownames Marginal = Reference Direct Indirect Overall // display marginal effects and discrete changes (direct and indirect effects) matrix list Marginal // to compare with LIMDEP (Section 4.4) mfx, predict(pcond1) at(mean male=.450596 www=.785349) // end of Stata script