# delimit; *********************************************************************************** this is the stata file that makes the answers for problem set 2 february 18, 2014 february 19, 2014 february 20, 2014 february 21, 2014 march 16, 2014 march 19, 2014 march 24, 2014 february 12, 2015 february 13, 2015 march 18, 2015 ps2_2015v03x.do ***********************************************************************************; *********************************************************************************** ****** 1. iv: ipums-census data ******************************************************* ***********************************************************************************; ****** A. prep stuff **********************************************************; clear all; pause on; set more off; * set todays date; adopath ++ /home/lfbrooks/ado; dateo; ****** B. load data ***********************************************************; * switch for which sample we use *; local sample big; * local sample small; * load the big sample *; if "`sample'" == "big" {; * load census data, keeping only variables that are of interest *; use /groups/brooksgrp/census/1980census/ipums/pppa6022_spring2014/c1980_ipums_20140220; * take a smaller random sample *; gen double rand_samp = runiform(); keep if rand_samp > 0.9; * save this as a junk dataset to load *; save /groups/brooksgrp/census/1980census/ipums/pppa6022_spring2014/c1980_ipums_20140220_small, replace; }; if "`sample'" == "small" {; * bring in the smaller data *; use /groups/brooksgrp/census/1980census/ipums/pppa6022_spring2014/c1980_ipums_20140220_small; }; /* ****** C. set up data *********************************************************; ** keep as in A and K **; drop if birthyr > 1959; drop if birthyr < 1930; * drop women *; drop if sex == 2; tab educ; tab educd; * make birth decade markers *; gen b_decade = 0; replace b_decade=1930 if birthyr >= 1930 & birthyr < 1940; replace b_decade=1940 if birthyr >= 1940 & birthyr < 1950; * make in smsa marker *; gen in_smsa = 0; replace in_smsa = 1 if metro == 2| metro == 3| metro == 4; * make marital status indicator *; gen married = 0; replace married = 1 if marst == 1| marst == 2; * make quarterly dummies to match a & k's table *; tab birthq, gen(bqdum); * make birth year dummies for 1930s *; tab birthyr if b_decade == 1930, gen(bydum); * make log of wages *; * this is going to effectively drop people with zero wages *; * this is quite a few observations *; gen ln_incwage = ln(incwage); * make interaction of quarter*birth year *; local eqdum ""; forvalues q=1/3 {; forvalues y=1/9 {; gen bdum_q`q'y`y' = bqdum`q'*bydum`y'; local eqdum "`eqdum' bdum_q`q'y`y' ="; }; }; * make age squared *; gen age2 = age*age; ** make education into a continuous variable **; gen yrs_educ = 0; replace yrs_educ = 0 if educ == 0; replace yrs_educ = 4 if educ == 1; replace yrs_educ = 8 if educ == 2; replace yrs_educ = 9 if educ == 3; replace yrs_educ = 10 if educ == 4; replace yrs_educ = 11 if educ == 5; replace yrs_educ = 12 if educ == 6; replace yrs_educ = 13 if educ == 7; replace yrs_educ = 14 if educ == 8; replace yrs_educ = 15 if educ == 9; replace yrs_educ = 16 if educ == 10; replace yrs_educ = 17 if educ == 11; ** other sample limits **; gen reg_keep = 1; replace reg_keep = 0 if incwage < 0; * keep only positive weeks worked and positive wage and salary -- but i didnt get weeks worked *; ****** D. Table 1, first two rows ***************************************************; sort b_decade; by b_decade: summ yrs_educ; regress yrs_edu bqdum1-bqdum3 if b_decade == 1930; test bqdum1=bqdum2=bqdum3=0; estadd scalar f_qs_eq = r(F); estadd scalar p_qs_eq = r(p); estimates store t1a; regress yrs_edu bqdum1-bqdum3 if b_decade == 1940; test bqdum1=bqdum2=bqdum3=0; estadd scalar f_qs_eq = r(F); estadd scalar p_qs_eq = r(p); estimates store t1b; ****** E. first stage ***************************************************************; * set covariates *; local covs "in_smsa married i.region age age2"; * quarters only *; xi: regress yrs_educ `covs' if b_decade == 1930 & ln_incwage != .; estimates store fs1nbq; xi: regress yrs_educ `covs' bqdum1-bqdum3 if b_decade == 1930 & ln_incwage != .; test bqdum1=bqdum2=bqdum3; estadd scalar f_qs_eq = r(F); estadd scalar p_qs_eq = r(p); predict xhat1, xb; estimates store fs1; * quarters * year of birth *; xi: regress yrs_educ `covs' if b_decade == 1930 & ln_incwage != .; estimates store fs2nbq; xi: regress yrs_educ `covs' bdum_q*y* if b_decade == 1930 & ln_incwage != .; predict xhat2, xb; test `eqdum'=0; estadd scalar f_qs_eq = r(F); estadd scalar p_qs_eq = r(p); estimates store fs2; ****** F. second stage **************************************************************; * just quarters as instruments *; regress ln_incwage `covs' xhat1 if b_decade == 1930 & ln_incwage != .; estimates store ss1a; ivregress 2sls ln_incwage `covs' (yrs_educ=bqdum1-bqdum3) if b_decade == 1930 & ln_incwage != .; estimates store ss1b; * quarters*years of birth *; regress ln_incwage `covs' xhat1 if b_decade == 1930 & ln_incwage != .; estimates store ss2a; ivregress 2sls ln_incwage `covs' (yrs_educ=bqdum1-bqdum3) if b_decade == 1930 & ln_incwage != .; estimates store ss2b; ****** G. output results **************************************************************; estout * using "/home/lfbrooks/pppa6022/2015/stataout/problem_set_2/${date}_prob2_ivregs.txt", replace varwidth(12) varlabels(_cons Constant) cells(b(star fmt(%12.3f)) se(par fmt(%12.3f))) stats(r2 N f_qs_eq p_qs_eq, fmt(%9.3f %9.0g %9.3f %9.3f) labels("R-squared" "Observations" "F test: instruments" "p-value of F test")); estimates clear; */ *********************************************************************************** ****** 2. regression discontinuity ************************************************ ***********************************************************************************; ****** A. load and clean data *****************************************************; clear all; set more off; pause on; dateo; * these data are created in /home/lfbrooks/pppa6022/2015/stataprg/problem_set_2/usa_00006.do; use /home/lfbrooks/pppa6022/2015/datasets/ipums1940_20150212; ** make education into a continuous variable **; gen yrs_educ = 0; replace yrs_educ = 0 if educ == 0; replace yrs_educ = 4 if educ == 1; replace yrs_educ = 8 if educ == 2; replace yrs_educ = 9 if educ == 3; replace yrs_educ = 10 if educ == 4; replace yrs_educ = 11 if educ == 5; replace yrs_educ = 12 if educ == 6; replace yrs_educ = 13 if educ == 7; replace yrs_educ = 14 if educ == 8; replace yrs_educ = 15 if educ == 9; replace yrs_educ = 16 if educ == 10; replace yrs_educ = 17 if educ == 11; ** replace incwage missing code with missing *; replace incwage = . if incwage == 999999; summ incwage, detail; ****** B. make regression discontinuity charts ************************************; preserve; * find average years of education and wage by birthplace state birth year and sex *; sort bpl birthyr sex; collapse (mean) yrs_educ incwage, by(bpl birthyr sex); sort bpl birthyr sex; save /home/lfbrooks/pppa6022/2015/datasets/${date}_mnbpl, replace; * find average years of education and wage but w/o college or more *; restore; preserve; sort bpl birthyr sex; collapse (mean) yrs_educ_noc=yrs_educ incwage_noc = incwage if yrs_educ < 10, by(bpl birthyr sex); sort bpl birthyr sex; * merge two datasets together *; merge 1:1 bpl birthyr sex using /home/lfbrooks/pppa6022/2015/datasets/${date}_mnbpl; * set up for graphs *; graph set eps orientation landscape; * program to run graphs *; capture program drop graphit; program define graphit; syntax, stab(string) stnum(string) styear(string) stname(string) dv(string); graph twoway (scatter `dv' birthyr if bpl == `stnum' & sex == 2, mcolor(pink) xline(`styear')) (scatter `dv' birthyr if bpl == `stnum' & sex == 1 , mcolor(blue)), xsize(11) ysize(8.5) ; graph export "/home/lfbrooks/pppa6022/2015/statafig/problem_set_2/${date}_`stab'_`dv'_vs_yob.eps", replace; end; ** set year the policy starts *; local layear = 1893; local ilyear = 1867; ** 1(a) charts for overall regression discontinuity **; graphit, stab(la) stnum(22) styear(`layear') stname(Louisiana) dv(yrs_educ); graphit, stab(la) stnum(22) styear(`layear') stname(Louisiana) dv(incwage); *graphit, stab(md) stnum(24) styear(1902) stname(Maryland) dv(yrs_educ); *graphit, stab(md) stnum(24) styear(1902) stname(Maryland) dv(incwage); graphit, stab(il) stnum(17) styear(`ilyear') stname(Illinois) dv(yrs_educ); graphit, stab(il) stnum(17) styear(`ilyear') stname(Illinois) dv(incwage); ** 1(b) charts omitting college graduates ***; graphit, stab(la) stnum(22) styear(`layear') stname(Louisiana) dv(yrs_educ_noc); graphit, stab(la) stnum(22) styear(`layear') stname(Louisiana) dv(incwage_noc); *graphit, stab(md) stnum(24) styear(1902) stname(Maryland) dv(yrs_educ_noc); *graphit, stab(md) stnum(24) styear(1902) stname(Maryland) dv(incwage_noc); graphit, stab(il) stnum(17) styear(`ilyear') stname(Illinois) dv(yrs_educ_noc); graphit, stab(il) stnum(17) styear(`ilyear') stname(Illinois) dv(incwage_noc); ****** C. do regression discontinuity regressions ****************************; capture program drop rdreg; program define rdreg; syntax, stnum(string) styear(string); * need to make a flexible time before, flexible time after *; * if the policy is passed in year x, if affects people born in year y or later, where x = y + 16, so y = x - 16; * make dummy for being after the policy *; gen after = 0; replace after = 1 if birthyr >= `styear' - 15; * make X-c as in Lee and Lemieux *; gen yr = (birthyr + 16) - `styear'; * make X-c * after *; gen yr_after = yr*after; * make similar values for squared and cubed *; forvalues j=2/3 {; gen yr_`j' = yr^`j'; gen yr_`j'_after = yr_`j' * after; }; * regression *; regress incwage after yr yr_2 yr_3 yr_after yr_2_after yr_3_after if bpl == `stnum'; estimates store r`stnum'_1; * restricted to people who are 20+ in 1940 *; regress incwage after yr yr_2 yr_3 yr_after yr_2_after yr_3_after if bpl == `stnum' & birthyr < 1920; estimates store r`stnum'_2; * output regressions *; * drop variables ill want to create again *; drop after yr yr_*; end; * bring back the person-level dataset we started with *; restore; * run the rd regressions *; rdreg, stnum(22) styear(`layear'); *rdreg, stnum(24) styear(1902); rdreg, stnum(17) styear(`ilyear'); * output regression results *; estout * using "/home/lfbrooks/pppa6022/2015/stataout/problem_set_2/${date}_prob2_rdregs.txt", replace varwidth(12) varlabels(_cons Constant) cells(b(star fmt(%12.3f)) se(par fmt(%12.3f))) stats(r2 N, fmt(%9.3f %9.0g %9.3f %9.3f) labels("R-squared" "Observations")); estimates clear;