* (i) preamble version 12 clear all set more off set maxvar 10000 capture log close log using SMIF-Weighting-Kolenikov-Multiple-Levels-`=subinstr( trim("`c(current_date)'"), " ", "-", . )', replace set seed 112233 * (ii) load the ACS 2012 data use ACS2012_mult_level_raking_example, clear * unique identifiers isid serial pernum * adult non-institutionalized population keep if age >= 18 & relate <= 12 * (iii) intermediate variables bysort serial (pernum): gen int num_adults = _N label variable num_adults "HH size (# of adults)" recode num_adults (1=1) (2=2) (3=3) (4/20=4 "4 or more"), gen(num_adults4) label variable num_adults4 "HH size (# of adults), capped at 4" recode race (1=1 "White only") (2=2 "Black/African American only") (3/9=3 "Other"), generate(race3) label variable race3 "Race, 3 categories" recode educd (1/61=1 "Below high school") (63/64=2 "High school/GED") (65/99=3 "Some college") /// (100/110=4 "Bachelor's degree") (111/116=5 "Graduate/professional degree"), gen( educ5 ) label variable educ5 "Education, 5 categories" recode hhincome (-20000/19999=1 "Under $20,000") (20000/39999=2 "$20,000 to under $40,000") /// (40000/64999=3 "$40,000 to under $65,000") (65000/99999=4 "$65,000 to under $100,000") /// (100000/1e9=5 "$100,000 and above"), gen( hhincome5 ) label variable hhincome5 "Household income, 5 categories" recode age (18/29=1 "18-29") (30/44=2 "30-44") (45/54=3 "45-54") (55/64=4 "55-64") (65/100=5 "65+") /// , gen(age5) label variable age5 "Age, 5 categories" * (iv) exclude renters, non-inmates, and HH without the HH head drop if num_adults == 1 & relate == 12 bysort serial (relate pernum): gen byte _weird = !(relate[1] == 1) drop if _weird drop _weird bysort serial (relate pernum): assert relate[1] == 1 total num_adults* if relate==1 matrix list e(b), f( %12.2f) * (v) review the analysis variables d serial pernum relate hhincome num_adults age sex race educd num_adults4 hhincome5 any_hisp_hh any_migrated lingisol race3 educ5 age5 married empstat * (vi) checks foreach x of varlist hhincome num_adults age sex race educd num_adults4 hhincome5 any_hisp_hh any_migrated lingisol race3 educ5 age5 married empstat { assert !mi(`x') } * (vii) define calibration targets for ipfraking gen byte _one = 1 svyset _n [pw=_one] * HH targets foreach x of varlist num_adults4 hhincome5 any_hisp_hh any_migrated lingisol { * capture run tries to recover the saved estimates, if any * if none are found, the target matrices are created from scratch cap run target_ACS2012_hh_`x'.do if _rc { * scaled at household level: # of households of this kind total _one if relate==1, over(`x', nolab) matrix ACS2012_hh_`x' = e(b) matrix rownames ACS2012_hh_`x' = `x' mat2do ACS2012_hh_`x' using target_ACS2012_hh_`x'.do, replace } cap run target_ACS2012_per_`x'.do if _rc { * scaled at individual level: # of people who live in the households of this kind total _one, over(`x', nolab) matrix ACS2012_per_`x' = e(b) matrix rownames ACS2012_per_`x' = `x' mat2do ACS2012_per_`x' using target_ACS2012_per_`x'.do, replace } * for later tabulations svy : tab `x' if relate==1, count format(%10.2f) se est store hh0_`x'_count est save hh0_`x'_count, replace svy : tab `x' if relate==1, format(%6.4f) se est store hh0_`x'_prop est save hh0_`x'_prop, replace } * individual targets foreach x of varlist sex race3 educ5 age5 married empstat { * scaled at individiual level cap run target_ACS2012_per_`x' if _rc { total _one , over(`x', nolab) matrix ACS2012_per_`x' = e(b) matrix rownames ACS2012_per_`x' = `x' mat2do ACS2012_per_`x' using target_ACS2012_per_`x'.do * scaled at HH level: use num_adults4 as multiplier matrix ACS2012_hhsc_`x' = e(b) matrix rownames ACS2012_hhsc_`x' = `x' matrix coleq ACS2012_hhsc_`x' = num_adults4 mat2do ACS2012_hhsc_`x' using target_ACS2012_hhsc_`x'.do } else { run target_ACS2012_hhsc_`x'.do } * for later tabulations svy : tab `x', count format(%10.2f) se est store per0_`x'_count est save per0_`x'_count, replace svy : tab `x', format(%6.4f) se est store per0_`x'_prop est save per0_`x'_prop, replace } * (viii) sample * seed was set on top of the file * sample households gen byte hh_head = relate == 1 gen rr = uniform() sort hh_head rr serial pernum assert hh_head == 1 if _n > _N - 5000 gen byte sampled_hh = (_n > _N - 5000) bysort serial (relate) : replace sampled_hh = sampled_hh[1] * sample one person per household gen rr2 = uniform() bysort serial (rr2 pernum): gen byte sampled_person = (_n==1) * sampled_hh count if sampled_person == 1 assert r(N) == 5000 count if hh_head == 1 gen baseweight_hh = r(N)/5000 * the actual data sampling step keep if sampled_person * remove intermediate sampling variables drop sampled* rr* hh_head gen baseweight_per = baseweight_hh * num_adults4 * (ix) non-response gen response_propensity = 1/(1+exp( -( -0.3 + 0.25*1.race3 - 0.4*1.educ5 + 0.1*4.educ5 + 0.3*5.educ5 - 0.1*ln(hhincome+20000) ) ) ) gen byte responded = (uniform() < response_propensity) * (x) bootstrap weights svyset [pw=_one], strata( _one ) gen byte bsw0 = 1 bsweights bsw, n(-1) rep(500) balanced * (xii) prepare for raking local ipfreportopts by(num_adults4 hhincome5 sex race3 educ5 age5) xls replace gen weight_hh_base = . gen weight_per_base = . forvalues r=0/500 { sum responded [aw=bsw`r'] scalar response_rate = r(mean) * (xii.a) approach one: two passes, produce HH weights first, then produce person weights replace weight_hh_base = bsw`r' * baseweight_hh / scalar(response_rate) if responded ipfraking [pw=weight_hh_base], gen( weight1_hh_final`r' ) meta nograph /// ctotal(ACS2012_hh_num_adults4 ACS2012_hh_hhincome5) replace weight_per_base = weight1_hh_final`r' * num_adults4 ipfraking [pw=weight_per_base], gen(weight1_per_final`r') meta nograph /// ctotal(ACS2012_per_sex ACS2012_per_race3 ACS2012_per_educ5 ACS2012_per_age5) if `r' == 0 { qui ipfraking_report using weight1_hh_raked , raked_weight(weight1_hh_final0) `ipfreportopts' qui ipfraking_report using weight1_per_raked, raked_weight(weight1_per_final0) `ipfreportopts' } * (xii.b) approach two: single pass, produce individual weights with person-level targets replace weight_per_base = bsw`r' * baseweight_per / scalar(response_rate) if responded ipfraking [pw=weight_per_base], gen(weight2_per_final`r') meta nograph /// ctotal(ACS2012_per_num_adults4 ACS2012_per_hhincome5 ACS2012_per_sex ACS2012_per_race3 ACS2012_per_educ5 ACS2012_per_age5) if `r' == 0 qui ipfraking_report using weight2_per_raked`r', raked_weight(weight2_per_final0) `ipfreportopts' gen weight2_hh_final`r' = weight2_per_final`r' / num_adults4 * (xii.c) approach 3: single pass, produce HH weights with multipliers replace weight_hh_base = bsw`r' * baseweight_hh / scalar(response_rate) if responded ipfraking [pw=weight_hh_base], gen( weight3_hh_final`r' ) meta nograph /// ctotal(ACS2012_hh_num_adults4 ACS2012_hh_hhincome5 ACS2012_hhsc_sex ACS2012_hhsc_race3 ACS2012_hhsc_educ5 ACS2012_hhsc_age5) if `r' == 0 qui ipfraking_report using weight3_hh_raked, raked_weight(weight3_hh_final0) `ipfreportopts' gen weight3_per_final`r' = weight3_hh_final`r' * num_adults4 } drop weight_hh_base weight_per_base save ACS2012_mult_level_raking_example_weighted, replace * (xx) all done log close exit