/******************************************************* Linear Regression Models for Panel Data using Stata Data: R&D Expenditure of IT firms (OECD 2002) Airline cost data (Greene 2003) Tested on Stata 11 Created on June 21, 2004 Last Modified on 09/05/2009; November 23, 2005 Author: Hun Myoung Park (Jeeshim and KUCC625) kucc625 at indiana.edu http://www.masil.org and http://mypage.iu.edu/~kucc625 ********************************************************/ cd c:\temp\stata set more off log using panel.log, replace //************************************************************************** // Chapter 1 //************************************************************************** use http://www.indiana.edu/~statmath/stat/all/panel/airline.dta, clear list airline year load cost output fuel in 1/20, sep(20) keep airline year load cost output fuel reshape wide cost output fuel load, i(airline) j(year) reshape long cost output fuel load, i(airline) j(year) //************************************************************************** // Chapter 2 //************************************************************************** /***************************************************/ // R&D expenditure of IT firm (OECD 2002) /***************************************************/ use http://www.indiana.edu/~statmath/stat/all/panel/rnd2002.dta, clear format rnd income %10.2fc list firm rnd income type d1 d2, noobs // pooled regression regress rnd income // Dummy variable model regress rnd income d1 // drop a dummy (d1) regress rnd income d2 // drop a dummy (d2) regress rnd income d1 d2, noconstant // suppress the intercept constraint 1 d1 + d2 = 0 // impose a restriction cnsreg rnd income d1 d2, constraint(1) // Figure 2.2 twoway /// || function y=1482.697+.223*x, range(0 2500) lpattern(solid) lwidth(thick) lcolor(maroon) lstyle(foreground) /// || function y=2140.205+.218*x, range(0 2500) lpattern(dash) lwidth(medthick) lcolor(blue) lstyle(foreground) /// || function y=1133.579+.218*x, range(0 2500) lpattern(dash) lwidth(medthick) lcolor(green) lstyle(foreground) /// ||, ylabel(0(500)2500) xlabel(0(500)2500) legend(off) /// ytitle(R&D (USD Millions)) xtitle(Income (USD Millions)) /// note(Source: OECD Information Technology Outlook 2004. http://thesius.sourceoecd.org/) /// title("2002 R&D Investment of OECD IT Firms", position(12)) /***************************************************/ // Cost of U.S. Airlines (Greene 2003) /***************************************************/ use http://www.indiana.edu/~statmath/stat/all/panel/airline, clear keep airline year cost0 output0 fuel0 load // log transformation gen cost=ln(cost0) gen output=ln(output0) gen fuel=ln(fuel0) // create dummy varaibles forvalues i= 1(1)6 { // create dummies for groups gen byte g`i'=0 replace g`i'=1 if airline==`i' tab g`i' airline } forvalues j= 1(1)15 { // create dummies for time gen byte t`j'=0 replace t`j'=1 if year==`j' tab t`j' year } label data "Cost of U.S. Airlines (Greene 2003)" label var airline "Airline name" label var year "Year" label var output0 "Output in revenue passenger miles, index number" label var cost0 "Total cost in $1000" label var fuel0 "Fuel price" label var output "Output in revenue passenger miles, index number" label var cost "Total cost in $1000" label var fuel "Fuel price" label var load "Load factor, the average capacity utilization of the fleet" save "airline.dta", replace //************************************************************************** // Chapter 4 //************************************************************************** use http://www.indiana.edu/~statmath/stat/all/panel/airline.dta, clear describe airline year cost output fuel load tsset airline year xtsum cost output fuel load use airline.dta, clear /***************************************************/ // Pooled OLS: No group and time effect regress cost output fuel load // pooled regression predict resid, resid /***************************************************/ // One way fixed group effect model // LSDV1: Without one dummy variable regress cost g1-g5 output fuel load // LSDV1 regress cost g2-g6 output fuel load // LSDV1 dropping g2 xi: regress cost i.airline output fuel load quietly regress cost g1-g5 output fuel load test g1 g2 g3 g4 g5 // test group effects // LSDV2: Without an intercept regress cost g1-g6 output fuel load, noc // LSDV2 // LSDV3: With a restriciton constraint define 1 g1 + g2 + g3 + g4 + g5 +g6 = 0 cnsreg cost g1-g6 output fuel load, constraint(1) /***************************************************/ // within effect model (group effect) egen gm_cost=mean(cost), by(airline) // compute group means egen gm_output=mean(output), by(airline) egen gm_fuel=mean(fuel), by(airline) egen gm_load=mean(load), by(airline) gen gw_cost = cost - gm_cost // transform variables gen gw_output = output - gm_output gen gw_fuel = fuel - gm_fuel gen gw_load = load - gm_load regress gw_cost gw_output gw_fuel gw_load, noc // within effect using .regress predict ge, resid quietly tsset airline year // specify group and time xtreg cost output fuel load, fe i(airline) // within effect using .xtreg areg cost output fuel load, absorb(airline) save "airline.dta", replace /***************************************************/ // Between effect model xtreg cost output fuel load, be i(airline) // between effect using .xtreg // compute group means collapse (mean) gm_cost=cost (mean) gm_output=output (mean) gm_fuel=fuel (mean) gm_load=load, by(airline) list, sep(10) noobs // list time means of variables regress gm_cost gm_output gm_fuel gm_load // between effect using .regress //************************************************************************** // Chapter 5 //************************************************************************** /***************************************************/ // One way fixed time effect model /***************************************************/ use "airline.dta", clear // LSDV1: Without one dummy variable regress cost t1-t14 output fuel load predict te, resid test t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 t11 t12 t13 t14 // 'LSDV2: Without an intercept'; regress cost t1-t15 output fuel load, noc // 'LSDV3: With a restriciton'; constraint define 3 t1+t2+t3+t4+t5+t6+t7+t8+t9+t10+t11+t12+t13+t14+t15=0 cnsreg cost t1-t15 output fuel load, constraint(3) save "airline.dta", replace /***************************************************/ // within effect model egen tm_cost = mean(cost), by(year) // compute time means egen tm_output = mean(output), by(year) egen tm_fuel = mean(fuel), by(year) egen tm_load = mean(load), by(year) gen tw_cost = cost - tm_cost // transform variables gen tw_output = output - tm_output gen tw_fuel = fuel - tm_fuel gen tw_load = load - tm_load save "airline.dta", replace regress tw_cost tw_output tw_fuel tw_load, noc // within effect using .regress tsset year airline // to specify group and time iis year xtreg cost output fuel load, fe i(year) // within effect using .xtreg /***************************************************/ // Between effect model xtreg cost output fuel load, be i(year) // between effect using .xtreg collapse (mean) tm_cost=cost (mean) tm_output=output (mean) tm_fuel=fuel (mean) tm_load=load, by(year) list, sep(10) noobs // list time means of variables regress tm_cost tm_output tm_fuel tm_load // between effect using .regress //************************************************************************** // Chapter 6 //************************************************************************** /***************************************************/ // Two way fixed group and time effect model /***************************************************/ use "airline.dta", clear /***************************************************/ // LSDV1: Without two dummy variables regress cost g1-g5 t1-t14 output fuel load // LSDV1 predict gte, resid test g1 g2 g3 g4 g5 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 t11 t12 t13 t14 // LSDV1 + LSDV2 regress cost g1-g5 t1-t15 output fuel load, noc regress cost g1-g6 t1-t14 output fuel load, noc // LSDV1 + LSDV3: Dropping a dummy and imposing a restriction constraint define 1 g1 + g2 + g3 + g4 + g5 + g6 = 0 cnsreg cost g1-g6 t1-t14 output fuel load, constraint(2) constraint define 3 t1+t2+t3+t4+t5+t6+t7+t8+t9+t10+t11+t12+t13+t14+t15=0 cnsreg cost g1-g5 t1-t15 output fuel load, constraint(3) // 'LSDV3: With a restriciton'; // constraint define 2 g1 + g2 + g3 + g4 + g5 + g6 = 0 // constraint define 3 t1+t2+t3+t4+t5+t6+t7+t8+t9+t10+t11+t12+t13+t14+t15=0 cnsreg cost g1-g6 t1-t15 output fuel load, constraint(2 3) /***************************************************/ // Within Effect Model egen m_cost = mean(cost) // compute overall means egen m_output = mean(output) egen m_fuel = mean(fuel) egen m_load = mean(load) tabstat cost output fuel load, stat(mean) // data transformation gen w_cost = cost - gm_cost - tm_cost + m_cost // transform variables gen w_output = output - gm_output - tm_output + m_output gen w_fuel = fuel - gm_fuel - tm_fuel + m_fuel gen w_load = load - gm_load - tm_load + m_load save "airline.dta", replace regress w_cost w_output w_fuel w_load, noc // within effect using .regress sum cost output fuel load sum cost output fuel load if airline==3 sum cost output fuel load if year==9 //************************************************************************** // Chapter 7 //************************************************************************** /***************************************************/ // One way random group effect model /***************************************************/ use "airline.dta", clear egen m_ge=mean(ge), by(airline) // compute group error means tab airline m_ge gen d_ge2=(ge-m_ge)^2 // compute squared deviations from the group error means tabstat d_ge2, stat(sum) // sum of the squared deviatins // this returns .2926229 that is equivalent to SSE of LSDV1 and the within effect model di "sigma_error2: " .292622872/(6*15-6-3) // from SSE (ee) of LSDV1 (n*T-n-k) di "sigma_between2: " .031675926/(6-4)- .00361263/15 // from SSE of the between effect di "theta: " 1-sqrt((.292622872/(6*15-6-3))/(15*(.031675926/(6-4)))) // We get the theta of .87668488 gen rg_cost = cost - .87668488*gm_cost // transform variables gen rg_output = output - .87668488*gm_output gen rg_fuel = fuel - .87668488*gm_fuel gen rg_load = load - .87668488*gm_load gen rg_int = 1 - .87668488 // for the intercept save "airline.dta", replace regress rg_cost rg_int rg_output rg_fuel rg_load, noc // one-way random group effect iis airline xtreg cost output fuel load, re theta xttest0 xtmixed cost output fuel load || airline:, xtreg cost output fuel load, re mle xtmixed cost output fuel load || airline:, mle xtgls cost output fuel load, i(airline) panels(hetero) corr(independent) // testing one-way random group effect collapse (mean) gm_resid=resid, by(airline) gen gm_resid2=gm_resid^2 list, sep(10) noobs // list time means of variables tabstat gm_resid2, stat(sum) //.0665147 di (6*15)/(2*(15-1))*(15^2*.0665147/1.33544153-1)^2 // 334.84958 di chi2tail(1, 334.84958) // .0000 use "airline.dta", clear quietly xtreg cost output fuel load, re i(airline) xttest0 /***************************************************/ //'One way random time effect model'; // '================================================'; use "airline.dta", clear egen m_te=mean(te), by(year) // compute time error means tab airline m_te gen d_te2=(te-m_te)^2 // compute squared deviations from the time error means tabstat d_te2, stat(sum) // sum of the squared deviatins // this returns 1.08819 that is equivalent to SSE of LSDV1 and the within effect model di "sigma_error2: " 1.08819022/(6*15-15-3) // from SSE (ee) of LSDV1 (n*T-n-k) di "sigma_between2: " .005590631/(15-4)- .01511375/6 // from SSE of the between effect di "theta: " 1-sqrt((1.08819022/(6*15-15-3))/(6*(.005590631/(15-4)))) // We get the theta of -1.226263 gen rt_cost = cost - (-1.226263)*tm_cost // transform variables gen rt_output = output - (-1.226263)*tm_output gen rt_fuel = fuel - (-1.226263)*tm_fuel gen rt_load = load - (-1.226263)*tm_load gen rt_int = 1 - (-1.226263) // for the intercept save "airline.dta", replace regress rt_cost rt_int rt_output rt_fuel rt_load, noc // one-way random time effect tsset year airline xtreg cost output fuel load, re i(year) theta xtmixed cost output fuel load || airline: || year:, mle // testing onw-way random time effect collapse (mean) tm_resid=resid, by(year) gen tm_n_r2=(6*tm_resid)^2 list, sep(20) noobs // list time means of variables tabstat tm_n_r2, stat(sum) //.7817371 di (15*6)/(2*(6-1))*(.7817371/1.33544153-1)^2 // 1.5472082 di chi2tail(1, 1.5472121) // .21354748 use "airline.dta", clear quietly xtreg cost output fuel load, re i(year) xttest0 /***************************************************/ // Hausman Test /***************************************************/ tsset airline year quietly xtreg cost output fuel load, fe estimates store fixed_group quietly xtreg cost output fuel load, re hausman fixed_group . /***************************************************/ // Poolability Test // '================================================' use "airline.dta", clear regress cost output fuel load // pooled OLS global sse = 0 forvalues i= 1(1)6 { // run group by group regression display "OLS regression for group " `i' regress cost output fuel load if airline==`i' global sse = $sse + e(rss) } //di .006798918 + .007587838 + .022869767 + .034752343 + .012986435 + .015663323 di $sse //di (1.33544153-.10065862)/((6-1)*4)/(.10065862)*(6*(15-4)) di (1.33544153-$sse)/((6-1)*4)/($sse)*(6*(15-4)) di "df1: " (6-1)*4 " df2: " 6*(15-4) di Ftail(20,66, 40.481219) global sse = 0 forvalues t= 1(1)15 { // run time by time regression display "OLS regression for time " `t' regress cost output fuel load if year==`t' global sse = $sse + e(rss) } /* di .044807673 + .023093978 + .016506613 + .012170358 + .014104542 + /// .000469826 + .063648817 + .085430285 + .049329439 + .077112957 + /// .029913538 + .087240016 + .143348297 + .066075346 + .037256216 */ di $sse //di (1.33544153-.7505079)/((15-1)*4)/(.7505079)*(15*(6-4)) di (1.33544153-$sse)/((15-1)*4)/($sse)*(15*(6-4)) di "df1: " (15-1)*6 " df2: " 15*(6-4) di Ftail(84,30, .41752699) log close