log using "/home/projects/sehs/replication/logs/exitentry.log", replace

* Program:	EXITENTRY.DO
* Date:		09 feb 2018
* Written by:	LB
*
* Notes:	Explore the characteristics of those who are not in our data.
*
* Input Files:	
*	/home/projects/sehs/data/applications_20180120.dta
*	/home/projects/sehs/replication/data/analytic_replication.dta

tempfile temp1 temp2
global grade8 "black8 hisp8 male8 frpl8 assignedsch8"


* -- Count the number of complete applications that never get assigned a sid.
* -- We lose these students completely.
use "/home/projects/sehs/data/applications_20180120.dta"
drop dupsid
gen completeapp=0
replace completeapp=1 if pointsexam!=. & pointstest7!=. & pointsgrades!=.
keep if completeapp==1
keep if sid==.
tab cohort9
drop if cohort9<2010
save `temp1'
clear
insheet using "/home/projects/sehs/data/cutscores.csv"
rename lindbloom_cut lindblom_cut

merge 1:m tier cohort9 using `temp1'
drop if _merge==1
drop _merge
*centering the application score around each cutscore
foreach v of varlist brooks_cut-young_cut {
        gen `v'_centered=pointstotal-`v'
}

foreach v of varlist *rank {
destring `v', replace
}
replace sid=_n
save `temp1', replace


* -- sidtype by cohort
use "/home/projects/sehs/replication/data/analytic_replication.dta", clear

sum hasapp completeapp
keep if completeapp==1
append using `temp1'
* Need to adjust below for students missing sids
replace sidtype="neither" if sidtype=="" 
tab sidtype cohort9

tab cohort9 if dsped8==1
drop if dsped8==1
drop if inlist(tier, 0, .)

gen selectruleany=offerrule~=""

expand 6

sort sid
by sid: gen numobs=_n

gen schoice=schoice1 if numobs==1
replace schoice=schoice2 if numobs==2
replace schoice=schoice3 if numobs==3
replace schoice=schoice4 if numobs==4
replace schoice=schoice5 if numobs==5
replace schoice=schoice6 if numobs==6

drop if schoice=="Skinner North"
drop if schoice==""

gen centered=brooks_cut_centered if schoice=="Brooks"
replace centered=jones_cut_centered if schoice=="Jones"
replace centered=king_cut_centered if schoice=="King"
replace centered=lane_cut_centered if schoice=="Lane"
replace centered=lindblom_cut_centered if schoice=="Lindblom"
replace centered=northside_cut_centered if schoice=="Northside"
replace centered=payton_cut_centered if schoice=="Payton"
replace centered=southshore_cut_centered if schoice=="South Shore"
replace centered=westing_cut_centered if schoice=="Westinghouse"
replace centered=young_cut_centered if schoice=="Young"
drop if centered==.

sort sid
by sid: egen maxcenter=max(centered)
keep if centered==maxcenter
by sid: egen maxrank=max(numobs)
keep if numobs==maxrank

gen admit=0
replace admit=1 if centered>=0

*creating variables to narrow the bandwidth according to sd around the cutpoint
sum centered 
gen sdcentered=r(sd)

gen halfsdcent=sdcentered/2
gen qrtrsdcent=sdcentered/4

gen sdcent=.
replace sdcent=100 if completeapp==1
replace sdcent=1 if centered>=-sdcentered & centered<=sdcentered
replace sdcent=.5 if centered>=-halfsdcent & centered<=halfsdcent
replace sdcent=.25 if centered>=-qrtrsdcent & centered<=qrtrsdcent
label var sdcent "indicator for sd away from cutpoint"

tab sidtype cohort9, col
tab sidtype cohort9 if inlist(sdcent, .25, .5), col

gen enrollcps=0
replace enrollcps=1 if sidtype=="both"
replace enrollcps=1 if sidtype=="only9"
label var enrollcps "Enroll in CPS in grade 9"
label var exit "Exit CPS in grade 9"

tab exit
tab exit if admit==1 
tab exit if admit==0 
tab tier exit if admit==0 , row

tab exit if admit==1 & inlist(sdcent, .25, .5)
tab exit if admit==0 & inlist(sdcent, .25, .5)
tab tier exit if admit==0 & inlist(sdcent, .25, .5), row
tab tier exit if admit==1 & inlist(sdcent, .25, .5), row
